def test_heterogeneous_configs(self): expected1 = pandas.DataFrame(data={'x': [3, 10]}) expected2 = pandas.DataFrame(data={'y': [4]}) expected3 = pandas.DataFrame(data={'x': ['b'], 'y': ['a']}) populate() config1 = LoadConfig(source='dataframe', destination='dataset', dataframe=expected1, data_name='a10') config2 = LoadConfig(source='query', destination='dataframe', query='select 4 as y') config3 = LoadConfig(source='query', destination='bucket', query="select 'b' as x, 'a' as y", data_name='a11') gpl = create_loader(bucket_dir_path=constants.bucket_subdir_path) load_results = gpl.multi_load([config1, config2, config3]) self.assertEqual(len(load_results), 3) self.assertTrue(load_results[0] is None) self.assertTrue(load_results[2] is None) computed1 = load.dataset_to_dataframe('a10') self.assert_pandas_equal(expected1, computed1) computed2 = load_results[1] self.assert_pandas_equal(expected2, computed2) blob_name = ids.build_blob_name_2('a11-000000000000.csv.gz') computed3 = load.bucket_to_dataframe(blob_name, decompress=True) self.assert_pandas_equal(expected3, computed3)
def test_write_disposition_default_bucket_to_dataset(self): expected = pandas.DataFrame(data={'x': [1]}) blob_name = ids.build_blob_name_2('s10') load.dataframe_to_bucket(expected, blob_name) gpl = create_loader_quick_setup( bucket_dir_path=constants.bucket_subdir_path, local_dir_path=None) for _ in range(2): gpl.load(source='bucket', destination='dataset', data_name='s10') computed = load.dataset_to_dataframe('s10') self.assert_pandas_equal(expected, computed)
def test_dataframe_to_dataset(self): expected = pandas.DataFrame(data={'x': [1, 2, 3], 'y': [1, 2, 4]}) populate() gpl = create_loader_quick_setup() gpl.load(source='dataframe', destination='dataset', dataframe=expected, data_name='a1') computed = load.dataset_to_dataframe('a1') self.assert_pandas_equal(expected, computed)
def test_write_empty_local_to_dataset(self): expected = pandas.DataFrame(data={'x': [1]}) local_file_path = ids.build_local_file_path_1('s12') load.dataframe_to_local(expected, local_file_path) gpl = create_loader(local_dir_path=constants.local_subdir_path) gpl.load(source='local', destination='dataset', data_name='s12', write_disposition='WRITE_EMPTY') computed = load.dataset_to_dataframe('s12') self.assert_pandas_equal(expected, computed)
def test_write_truncate_query_to_dataset(self): expected = pandas.DataFrame(data={'x': [1]}) gpl = create_loader_quick_setup(bucket_name=None, local_dir_path=None) for _ in range(2): gpl.load(source='query', destination='dataset', query='select 1 as x', data_name='s11', write_disposition='WRITE_TRUNCATE') computed = load.dataset_to_dataframe('s11') self.assert_pandas_equal(expected, computed)
def test_query_to_dataset(self): expected = pandas.DataFrame(data={'x': [3, 2], 'y': ['a', 'b']}) populate_dataset() gpl = create_loader(gs_client=None, bucket_name=None) gpl.load( source='query', destination='dataset', query="select 3 as x, 'a' as y union all select 2 as x, 'b' as y", data_name='a0') computed = load.dataset_to_dataframe('a0') self.assert_pandas_equal(expected, computed)
def test_bucket_to_dataset(self): expected = pandas.DataFrame( data={'x': [f'a{i}_bucket' for i in range(7, 12)]}) populate_dataset() populate_bucket() gpl = create_loader_quick_setup(local_dir_path=None) gpl.load( source='bucket', destination='dataset', data_name='a', bq_schema=[bigquery.SchemaField(name='x', field_type='STRING')]) computed = load.dataset_to_dataframe('a') self.assert_pandas_equal(expected, computed)
def test_download_upload(self): expected = pandas.DataFrame(data={'x': [3, 2]}) gpl = create_loader(bucket_dir_path=constants.bucket_dir_path, local_dir_path=constants.local_subdir_path) df0 = gpl.load(source='query', destination='dataframe', query='select 3 as x union all select 2 as x') gpl.load(source='dataframe', destination='dataset', dataframe=df0, data_name='b1') computed = load.dataset_to_dataframe('b1') self.assert_pandas_equal(expected, computed)
def test_write_append_dataframe_to_dataset(self): expected = pandas.DataFrame(data={'x': [0, 1]}) df00 = pandas.DataFrame(data={'x': [0]}) df01 = pandas.DataFrame(data={'x': [1]}) gpl = create_loader(chunk_size=2**18, timeout=5) gpl.load(source='dataframe', destination='dataset', dataframe=df00, data_name='s13') gpl.load(source='dataframe', destination='dataset', dataframe=df01, data_name='s13', write_disposition='WRITE_APPEND') computed = load.dataset_to_dataframe('s13') self.assert_pandas_equal(expected, computed)