예제 #1
0
 def test_dataset_to_bucket(self):
     expected = pandas.DataFrame(data={'x': ['a8_dataset']})
     populate_dataset()
     gpl = create_loader(bucket_dir_path=constants.bucket_subdir_path,
                         local_dir_path=None)
     gpl.load(source='dataset', destination='bucket', data_name='a8')
     blob_name = ids.build_blob_name_2('a8-000000000000.csv.gz')
     computed = load.bucket_to_dataframe(blob_name, decompress=True)
     self.assert_pandas_equal(expected, computed)
 def test_raise_error_if_write_empty_and_already_exists(self):
     populate_dataset()
     populate_local()
     with self.assertRaises(Conflict) as cm:
         create_loader().load(source='local',
                              destination='dataset',
                              data_name='a10',
                              write_disposition='WRITE_EMPTY')
     self.assertEqual(str(cm.exception),
                      '409 Already Exists: Table dmp-y-tests:test_gpl.a10')
예제 #3
0
 def test_query_to_dataset(self):
     expected = pandas.DataFrame(data={'x': [3, 2], 'y': ['a', 'b']})
     populate_dataset()
     gpl = create_loader(gs_client=None, bucket_name=None)
     gpl.load(
         source='query',
         destination='dataset',
         query="select 3 as x, 'a' as y union all select 2 as x, 'b' as y",
         data_name='a0')
     computed = load.dataset_to_dataframe('a0')
     self.assert_pandas_equal(expected, computed)
예제 #4
0
 def test_bucket_to_dataset(self):
     expected = pandas.DataFrame(
         data={'x': [f'a{i}_bucket' for i in range(7, 12)]})
     populate_dataset()
     populate_bucket()
     gpl = create_loader_quick_setup(local_dir_path=None)
     gpl.load(
         source='bucket',
         destination='dataset',
         data_name='a',
         bq_schema=[bigquery.SchemaField(name='x', field_type='STRING')])
     computed = load.dataset_to_dataframe('a')
     self.assert_pandas_equal(expected, computed)
예제 #5
0
 def test_keep_source_in_dataset(self):
     populate_dataset()
     gpl = create_loader(bucket_dir_path=constants.bucket_subdir_path,
                         local_dir_path=constants.local_subdir_path)
     gpl.load(source='dataset', destination='local', data_name='a7')
     self.assertTrue(exist.table_exists('a7'))
 def test_exist_in_dataset(self):
     gpl = create_loader_quick_setup(
         local_dir_path=constants.local_subdir_path)
     self.assertFalse(gpl.exist_in_dataset('a8'))
     populate_dataset()
     self.assertTrue(gpl.exist_in_dataset('a8'))