def test_dataset_to_bucket(self): expected = pandas.DataFrame(data={'x': ['a8_dataset']}) populate_dataset() gpl = create_loader(bucket_dir_path=constants.bucket_subdir_path, local_dir_path=None) gpl.load(source='dataset', destination='bucket', data_name='a8') blob_name = ids.build_blob_name_2('a8-000000000000.csv.gz') computed = load.bucket_to_dataframe(blob_name, decompress=True) self.assert_pandas_equal(expected, computed)
def test_raise_error_if_write_empty_and_already_exists(self): populate_dataset() populate_local() with self.assertRaises(Conflict) as cm: create_loader().load(source='local', destination='dataset', data_name='a10', write_disposition='WRITE_EMPTY') self.assertEqual(str(cm.exception), '409 Already Exists: Table dmp-y-tests:test_gpl.a10')
def test_query_to_dataset(self): expected = pandas.DataFrame(data={'x': [3, 2], 'y': ['a', 'b']}) populate_dataset() gpl = create_loader(gs_client=None, bucket_name=None) gpl.load( source='query', destination='dataset', query="select 3 as x, 'a' as y union all select 2 as x, 'b' as y", data_name='a0') computed = load.dataset_to_dataframe('a0') self.assert_pandas_equal(expected, computed)
def test_bucket_to_dataset(self): expected = pandas.DataFrame( data={'x': [f'a{i}_bucket' for i in range(7, 12)]}) populate_dataset() populate_bucket() gpl = create_loader_quick_setup(local_dir_path=None) gpl.load( source='bucket', destination='dataset', data_name='a', bq_schema=[bigquery.SchemaField(name='x', field_type='STRING')]) computed = load.dataset_to_dataframe('a') self.assert_pandas_equal(expected, computed)
def test_keep_source_in_dataset(self): populate_dataset() gpl = create_loader(bucket_dir_path=constants.bucket_subdir_path, local_dir_path=constants.local_subdir_path) gpl.load(source='dataset', destination='local', data_name='a7') self.assertTrue(exist.table_exists('a7'))
def test_exist_in_dataset(self): gpl = create_loader_quick_setup( local_dir_path=constants.local_subdir_path) self.assertFalse(gpl.exist_in_dataset('a8')) populate_dataset() self.assertTrue(gpl.exist_in_dataset('a8'))