def test_raise_error_if_d_and_b_none_project_id_not_none(self):
     with self.assertRaises(ValueError) as cm:
         create_loader_quick_setup(project_id='pi',
                                   dataset_name=None,
                                   bucket_name=None)
     msg = ('At least one of dataset_name or bucket_name '
            'must be provided if project_id is provided')
     self.assertEqual(msg, str(cm.exception))
예제 #2
0
 def test_keep_source_in_bucket(self):
     populate_bucket()
     gpl = create_loader_quick_setup(
         bucket_dir_path=constants.bucket_subdir_path)
     gpl.load(source='bucket', destination='dataframe', data_name='a10')
     blob_name = ids.build_blob_name_2('a10')
     self.assertTrue(exist.blob_exists(blob_name))
예제 #3
0
 def test_keep_source_in_local(self):
     populate_local()
     gpl = create_loader_quick_setup(
         dataset_name=None, bucket_dir_path=constants.bucket_dir_path)
     gpl.load(source='local', destination='bucket', data_name='a10')
     local_file_path = ids.build_local_file_path_0('a10')
     self.assertTrue(exist.local_file_exists(local_file_path))
예제 #4
0
    def test_list_blob_uris(self):
        populate_bucket()

        gpl00 = create_loader()
        gpl10 = create_loader(bucket_dir_path=constants.bucket_dir_path)
        gpl20 = create_loader_quick_setup(
            bucket_dir_path=constants.bucket_subdir_path, separator='#')

        blob_names = [ids.build_blob_name_0(f'a{i}') for i in range(7, 12)]
        blob_uris = [ids.build_bucket_uri(n) for n in blob_names]
        blob_uris = sorted(blob_uris)
        self.assertEqual(blob_uris, gpl00.list_blob_uris('a'))

        blob_names = [ids.build_blob_name_1(f'a{i}') for i in range(10, 13)]
        blob_uris = [ids.build_bucket_uri(n) for n in blob_names]
        blob_uris = sorted(blob_uris)
        self.assertEqual(blob_uris, gpl10.list_blob_uris('a1'))

        blob_names = [ids.build_blob_name_2(f'a{i}') for i in range(9, 14)]
        blob_uris = [ids.build_bucket_uri(n) for n in blob_names]
        blob_uris = sorted(blob_uris)
        self.assertEqual(blob_uris, gpl20.list_blob_uris('a'))

        self.assertEqual([], gpl00.list_blob_uris('dir'))
        self.assertEqual([], gpl10.list_blob_uris('su'))
예제 #5
0
 def test_local_to_bucket(self):
     expected = pandas.DataFrame(data={'y': ['c', 'a', 'b']})
     local_file_path = ids.build_local_file_path_0('b')
     load.dataframe_to_local(expected, local_file_path)
     gpl = create_loader_quick_setup(
         dataset_name=None, bucket_dir_path=constants.bucket_subdir_path)
     gpl.load(source='local', destination='bucket', data_name='b')
     blob_name = ids.build_blob_name_2('b')
     computed = load.bucket_to_dataframe(blob_name, decompress=False)
     self.assert_pandas_equal(expected, computed)
예제 #6
0
 def test_write_disposition_default_bucket_to_dataset(self):
     expected = pandas.DataFrame(data={'x': [1]})
     blob_name = ids.build_blob_name_2('s10')
     load.dataframe_to_bucket(expected, blob_name)
     gpl = create_loader_quick_setup(
         bucket_dir_path=constants.bucket_subdir_path, local_dir_path=None)
     for _ in range(2):
         gpl.load(source='bucket', destination='dataset', data_name='s10')
     computed = load.dataset_to_dataframe('s10')
     self.assert_pandas_equal(expected, computed)
예제 #7
0
 def test_dataframe_to_dataset(self):
     expected = pandas.DataFrame(data={'x': [1, 2, 3], 'y': [1, 2, 4]})
     populate()
     gpl = create_loader_quick_setup()
     gpl.load(source='dataframe',
              destination='dataset',
              dataframe=expected,
              data_name='a1')
     computed = load.dataset_to_dataframe('a1')
     self.assert_pandas_equal(expected, computed)
예제 #8
0
 def test_write_truncate_query_to_dataset(self):
     expected = pandas.DataFrame(data={'x': [1]})
     gpl = create_loader_quick_setup(bucket_name=None, local_dir_path=None)
     for _ in range(2):
         gpl.load(source='query',
                  destination='dataset',
                  query='select 1 as x',
                  data_name='s11',
                  write_disposition='WRITE_TRUNCATE')
     computed = load.dataset_to_dataframe('s11')
     self.assert_pandas_equal(expected, computed)
예제 #9
0
 def test_config_repeated(self):
     expected = pandas.DataFrame(data={'x': [3]})
     populate()
     config = LoadConfig(source='query',
                         destination='dataframe',
                         query='select 3 as x')
     gpl = create_loader_quick_setup(
         local_dir_path=constants.local_subdir_path)
     computeds = gpl.multi_load(configs=[config] * 3)
     for computed in computeds:
         self.assert_pandas_equal(expected, computed)
예제 #10
0
 def test_local_to_bucket(self):
     populate_local()
     with self.assertLogs('google_pandas_load.loader', level='DEBUG') as cm:
         gpl = create_loader_quick_setup(
             dataset_name=None, bucket_dir_path=constants.bucket_dir_path)
         gpl.load(source='local', destination='bucket', data_name='a9')
         records = cm.records
         self.assertEqual(2, len(records))
         log = formatter.format(records[0])
         self.assertEqual(
             'google_pandas_load.loader # DEBUG # '
             'Starting local to bucket...', log)
예제 #11
0
 def test_bucket_to_dataset(self):
     expected = pandas.DataFrame(
         data={'x': [f'a{i}_bucket' for i in range(7, 12)]})
     populate_dataset()
     populate_bucket()
     gpl = create_loader_quick_setup(local_dir_path=None)
     gpl.load(
         source='bucket',
         destination='dataset',
         data_name='a',
         bq_schema=[bigquery.SchemaField(name='x', field_type='STRING')])
     computed = load.dataset_to_dataframe('a')
     self.assert_pandas_equal(expected, computed)
 def test_compress_dataframe_to_local(self):
     gpl = create_loader_quick_setup(
         project_id=None,
         dataset_name=None,
         bucket_name=None,
         local_dir_path=constants.local_subdir_path)
     gpl.load(
         source='dataframe',
         destination='local',
         dataframe=pandas.DataFrame(data={'x': [1]}),
         data_name='b100')
     local_file_path = ids.build_local_file_path_1('b100.csv.gz')
     self.assertTrue(is_gz_file(local_file_path))
예제 #13
0
 def test_post_clear_dataframe_to_dataset(self):
     populate()
     blob_name = ids.build_blob_name_2('a10')
     local_file_path = ids.build_local_file_path_0('a10')
     self.assertTrue(exist.blob_exists(blob_name))
     self.assertTrue(exist.local_file_exists(local_file_path))
     gpl = create_loader_quick_setup(
         bucket_dir_path=constants.bucket_subdir_path)
     gpl.load(source='dataframe',
              destination='dataset',
              dataframe=pandas.DataFrame(data={'x': [1]}),
              data_name='a10')
     self.assertFalse(exist.blob_exists(blob_name))
     self.assertFalse(exist.local_file_exists(local_file_path))
 def test_exist_in_bucket(self):
     gpl01 = create_loader(local_dir_path=constants.local_subdir_path)
     gpl11 = create_loader(bucket_dir_path=constants.bucket_dir_path,
                           local_dir_path=constants.local_subdir_path)
     gpl21 = create_loader_quick_setup(
         dataset_name=None,
         bucket_dir_path=constants.bucket_subdir_path,
         local_dir_path=constants.local_subdir_path)
     self.assertFalse(gpl01.exist_in_bucket('a1'))
     self.assertFalse(gpl11.exist_in_bucket('a10'))
     self.assertFalse(gpl21.exist_in_bucket('a'))
     populate_bucket()
     self.assertTrue(gpl01.exist_in_bucket('a1'))
     self.assertTrue(gpl11.exist_in_bucket('a10'))
     self.assertTrue(gpl21.exist_in_bucket('a'))
예제 #15
0
 def test_local_to_dataframe(self):
     populate_local()
     with self.assertLogs('google_pandas_load.loader', level='DEBUG') as cm:
         gpl = create_loader_quick_setup(
             project_id=None,
             dataset_name=None,
             bucket_name=None,
             bucket_dir_path=constants.bucket_subdir_path)
         gpl.load(source='local', destination='dataframe', data_name='a9')
         records = cm.records
         self.assertEqual(2, len(records))
         regexp = (r'^google_pandas_load.loader # DEBUG # '
                   r'Ended local to dataframe \[[0-9]+s\]$')
         pattern = re.compile(regexp)
         log = formatter.format(records[1])
         self.assertIsNotNone(pattern.search(log))
 def test_call_loader_getters(self):
     gpl00 = create_loader()
     gpl10 = create_loader(bucket_dir_path=bucket_dir_path)
     gpl20 = create_loader(bucket_dir_path=bucket_subdir_path)
     gpl01 = create_loader_quick_setup(project_id=None,
                                       dataset_name=None,
                                       bucket_name=None,
                                       local_dir_path=local_subdir_path)
     self.assertIsNotNone(gpl00.bq_client)
     self.assertIsNotNone(gpl00.gs_client)
     self.assertIsNotNone(gpl00.bucket)
     self.assertEqual(dataset_id, gpl00.dataset_id)
     self.assertEqual(dataset_name, gpl00.dataset_name)
     self.assertEqual(bucket_name, gpl00.bucket_name)
     self.assertIsNone(gpl00.bucket_dir_path)
     self.assertEqual(bucket_dir_path, gpl10.bucket_dir_path)
     self.assertEqual(bucket_subdir_path, gpl20.bucket_dir_path)
     self.assertEqual(local_dir_path, gpl00.local_dir_path)
     self.assertEqual(local_subdir_path, gpl01.local_dir_path)
예제 #17
0
    def test_list_blobs(self):
        populate_bucket()

        gpl00 = create_loader_quick_setup()
        gpl10 = create_loader(bucket_dir_path=constants.bucket_dir_path)
        gpl20 = create_loader(bucket_dir_path=constants.bucket_subdir_path)

        self.assertEqual(
            sorted([ids.build_blob_name_0(f'a{i}') for i in range(7, 12)]),
            [b.name for b in gpl00.list_blobs('a')])

        self.assertEqual(
            sorted([ids.build_blob_name_1(f'a{i}') for i in range(10, 13)]),
            [b.name for b in gpl10.list_blobs('a1')])

        self.assertEqual(
            sorted([ids.build_blob_name_2(f'a{i}') for i in range(9, 14)]),
            [b.name for b in gpl20.list_blobs('')])

        self.assertEqual([], gpl00.list_blobs('dir'))
        self.assertEqual([], gpl10.list_blobs('su'))
 def test_exist_in_dataset(self):
     gpl = create_loader_quick_setup(
         local_dir_path=constants.local_subdir_path)
     self.assertFalse(gpl.exist_in_dataset('a8'))
     populate_dataset()
     self.assertTrue(gpl.exist_in_dataset('a8'))
 def test_call_loader_quick_setup_getters(self):
     gpl = create_loader_quick_setup(bucket_name=None)
     self.assertEqual(project_id, gpl.project_id)
 def test_raise_error_if_project_id_none_bucket_name_not_none(self):
     with self.assertRaises(ValueError) as cm:
         create_loader_quick_setup(project_id=None, dataset_name=None)
     msg = 'project_id must provided if bucket_name is provided'
     self.assertEqual(msg, str(cm.exception))