Example #1
0
    def test_download_file_sliced(self):
        buckets = Buckets(os.getenv('KBC_TEST_API_URL'),
                          os.getenv('KBC_TEST_TOKEN'))
        try:
            buckets.delete('in.c-py-test-files', force=True)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        buckets.create(name='py-test-files', stage='in')

        tables = Tables(os.getenv('KBC_TEST_API_URL'),
                        os.getenv('KBC_TEST_TOKEN'))
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
        os.close(file)
        table_id = tables.create(name='some-table',
                                 file_path=path,
                                 bucket_id='in.c-py-test-files')
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'foo', 'col2': 'bar'})
        os.close(file)
        tables.load(table_id=table_id, file_path=path, is_incremental=True)
        file_id = tables.export(table_id=table_id)
        temp_path = tempfile.TemporaryDirectory()
        local_path = self.files.download(file_id, temp_path.name)
        with open(local_path, mode='rt') as file:
            lines = file.readlines()
        self.assertEqual(['"foo","bar"\n', '"ping","pong"\n'], sorted(lines))
class TestTables(unittest.TestCase):
    def setUp(self):
        self.tables = Tables(os.getenv('KBC_TEST_API_URL'),
                             os.getenv('KBC_TEST_TOKEN'))
        self.buckets = Buckets(os.getenv('KBC_TEST_API_URL'),
                               os.getenv('KBC_TEST_TOKEN'))
        try:
            self.buckets.delete('in.c-py-test-tables', force=True)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        self.buckets.create(name='py-test-tables', stage='in')
        # https://github.com/boto/boto3/issues/454
        warnings.simplefilter("ignore", ResourceWarning)

    def tearDown(self):
        try:
            self.buckets.delete('in.c-py-test-tables', force=True)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise

    def test_create_table_minimal(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
        os.close(file)
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        table_info = self.tables.detail(table_id)
        with self.subTest():
            self.assertEqual(table_id, table_info['id'])
        with self.subTest():
            self.assertEqual('in.c-py-test-tables', table_info['bucket']['id'])

    def test_create_table_primary_key(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
            writer.writerow({'col1': 'pong', 'col2': 'ping'})
        os.close(file)
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables',
                                      primary_key=['col1', 'col2'])
        table_info = self.tables.detail(table_id)
        with self.subTest():
            self.assertEqual(table_id, table_info['id'])
        with self.subTest():
            self.assertEqual('in.c-py-test-tables', table_info['bucket']['id'])
        with self.subTest():
            self.assertEqual(['col1', 'col2'], table_info['primaryKey'])

    def test_table_detail(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        table_info = self.tables.detail(table_id)
        with self.subTest():
            self.assertEqual(table_id, table_info['id'])
        with self.subTest():
            self.assertEqual('some-table', table_info['name'])
        with self.subTest():
            self.assertTrue(
                'in.c-py-test-tables.some-table' in table_info['uri'])
        with self.subTest():
            self.assertEqual([], table_info['primaryKey'])
        with self.subTest():
            self.assertEqual(['col1', 'col2'], table_info['columns'])
        with self.subTest():
            self.assertTrue('created' in table_info)
        with self.subTest():
            self.assertTrue('lastImportDate' in table_info)
        with self.subTest():
            self.assertTrue('lastChangeDate' in table_info)
        with self.subTest():
            self.assertTrue('rowsCount' in table_info)
        with self.subTest():
            self.assertTrue('metadata' in table_info)
        with self.subTest():
            self.assertTrue('bucket' in table_info)
        with self.subTest():
            self.assertTrue('columnMetadata' in table_info)

    def test_delete_table(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        table_info = self.tables.detail(table_id)
        self.assertEqual(table_id, table_info['id'])
        self.tables.delete(table_id)
        try:
            self.tables.detail('some-totally-non-existent-table')
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise

    def test_invalid_create(self):
        try:
            self.tables.detail('some-totally-non-existent-table')
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise

    def test_import_table_incremental(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
        os.close(file)
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        table_info = self.tables.detail(table_id)
        with self.subTest():
            self.assertEqual(table_id, table_info['id'])
        with self.subTest():
            self.assertEqual(1, table_info['rowsCount'])

        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'foo', 'col2': 'bar'})
        os.close(file)
        self.tables.load(table_id=table_id,
                         file_path=path,
                         is_incremental=True)
        table_info = self.tables.detail(table_id)
        with self.subTest():
            self.assertEqual(table_id, table_info['id'])
        with self.subTest():
            self.assertEqual(2, table_info['rowsCount'])

    def test_import_table_no_incremental(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
        os.close(file)
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        table_info = self.tables.detail(table_id)
        with self.subTest():
            self.assertEqual(table_id, table_info['id'])
        with self.subTest():
            self.assertEqual(1, table_info['rowsCount'])

        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'foo', 'col2': 'bar'})
        os.close(file)
        self.tables.load(table_id=table_id,
                         file_path=path,
                         is_incremental=False)
        table_info = self.tables.detail(table_id)
        with self.subTest():
            self.assertEqual(table_id, table_info['id'])
        with self.subTest():
            self.assertEqual(1, table_info['rowsCount'])

    def test_table_preview(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
            writer.writerow({'col1': 'foo', 'col2': 'bar'})
        os.close(file)
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        contents = self.tables.preview(table_id=table_id)
        lines = contents.split('\n')
        self.assertEqual(['', '"col1","col2"', '"foo","bar"', '"ping","pong"'],
                         sorted(lines))

    def test_table_export(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
            writer.writerow({'col1': 'foo', 'col2': 'bar'})
        os.close(file)
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        result = self.tables.export(table_id=table_id)
        self.assertIsNotNone(result)

    def test_table_export_file_plain(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
            writer.writerow({'col1': 'foo', 'col2': 'bar'})
        os.close(file)
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        temp_path = tempfile.TemporaryDirectory()
        local_path = self.tables.export_to_file(table_id=table_id,
                                                path_name=temp_path.name,
                                                is_gzip=False)
        with open(local_path, mode='rt') as file:
            lines = file.readlines()
        self.assertEqual(
            ['"col1","col2"\n', '"foo","bar"\n', '"ping","pong"\n'],
            sorted(lines))

    def test_table_export_file_gzip(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
            writer.writerow({'col1': 'foo', 'col2': 'bar'})
        os.close(file)
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        temp_path = tempfile.TemporaryDirectory()
        local_path = self.tables.export_to_file(table_id=table_id,
                                                path_name=temp_path.name,
                                                is_gzip=True)
        with open(local_path, mode='rt') as file:
            lines = file.readlines()
        self.assertEqual(
            ['"col1","col2"\n', '"foo","bar"\n', '"ping","pong"\n'],
            sorted(lines))

    def test_table_export_sliced(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
        os.close(file)
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        table_info = self.tables.detail(table_id)
        with self.subTest():
            self.assertEqual(table_id, table_info['id'])
        with self.subTest():
            self.assertEqual(1, table_info['rowsCount'])

        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'foo', 'col2': 'bar'})
        os.close(file)
        self.tables.load(table_id=table_id,
                         file_path=path,
                         is_incremental=True)
        temp_path = tempfile.TemporaryDirectory()
        local_path = self.tables.export_to_file(table_id=table_id,
                                                path_name=temp_path.name)
        with open(local_path, mode='rt') as file:
            lines = file.readlines()
        self.assertEqual(
            ['"col1","col2"\n', '"foo","bar"\n', '"ping","pong"\n'],
            sorted(lines))

    def test_table_columns(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(
                csv_file,
                fieldnames=['col1', 'col2', 'col3', 'col4'],
                lineterminator='\n',
                delimiter=',',
                quotechar='"')
            writer.writeheader()
            writer.writerow({
                'col1': 'ping',
                'col2': 'pong',
                'col3': 'king',
                'col4': 'kong'
            })
        os.close(file)
        table_id = self.tables.create(name='some-table',
                                      file_path=path,
                                      bucket_id='in.c-py-test-tables')
        temp_path = tempfile.TemporaryDirectory()
        local_path = self.tables.export_to_file(table_id=table_id,
                                                path_name=temp_path.name,
                                                is_gzip=False,
                                                columns=['col3', 'col2'])

        with open(local_path, mode='rt') as file:
            lines = file.readlines()
        self.assertEqual(['"col3","col2"\n', '"king","pong"\n'], sorted(lines))
Example #3
0
class TestWorkspaces(unittest.TestCase):
    def setUp(self):
        self.workspaces = Workspaces(os.getenv('KBC_TEST_API_URL'),
                                     os.getenv('KBC_TEST_TOKEN'))
        self.buckets = Buckets(os.getenv('KBC_TEST_API_URL'),
                               os.getenv('KBC_TEST_TOKEN'))
        self.jobs = Jobs(os.getenv('KBC_TEST_API_URL'),
                         os.getenv('KBC_TEST_TOKEN'))
        self.tables = Tables(os.getenv('KBC_TEST_API_URL'),
                             os.getenv('KBC_TEST_TOKEN'))
        self.files = Files(os.getenv('KBC_TEST_API_URL'),
                           os.getenv('KBC_TEST_TOKEN'))
        try:
            file_list = self.files.list(tags=['sapi-client-python-tests'])
            for file in file_list:
                self.files.delete(file['id'])
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        try:
            self.buckets.delete('in.c-py-test-buckets', force=True)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        # https://github.com/boto/boto3/issues/454
        warnings.simplefilter("ignore", ResourceWarning)

    def tearDown(self):
        try:
            if hasattr(self, 'workspace_id'):
                self.workspaces.delete(self.workspace_id)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        try:
            self.buckets.delete('in.c-py-test-tables', force=True)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise

    def test_create_workspace(self):
        workspace = self.workspaces.create()
        self.workspace_id = workspace['id']
        with self.subTest():
            self.assertTrue('id' in workspace)
        with self.subTest():
            self.assertTrue('type' in workspace)
            self.assertTrue(workspace['type'] in ['table', 'file'])
        with self.subTest():
            self.assertTrue('name' in workspace)
        with self.subTest():
            self.assertTrue('component' in workspace)
        with self.subTest():
            self.assertTrue('configurationId' in workspace)
        with self.subTest():
            self.assertTrue('created' in workspace)
        with self.subTest():
            self.assertTrue('connection' in workspace)
        with self.subTest():
            self.assertTrue('backend' in workspace['connection'])
        with self.subTest():
            self.assertTrue('creatorToken' in workspace)

    def test_load_tables_to_workspace(self):
        bucket_id = self.buckets.create('py-test-tables')['id']
        table1_id = self.__create_table(bucket_id, 'test-table-1', {
            'col1': 'ping',
            'col2': 'pong'
        })
        table2_id = self.__create_table(bucket_id, 'test-table-2', {
            'col1': 'king',
            'col2': 'kong'
        })
        workspace = self.workspaces.create()
        self.workspace_id = workspace['id']
        job = self.workspaces.load_tables(workspace['id'], {
            table1_id: 'destination_1',
            table2_id: 'destination_2'
        })
        self.jobs.block_until_completed(job['id'])

        job = self.tables.create_raw(bucket_id,
                                     'back-and-forth-table',
                                     data_workspace_id=workspace['id'],
                                     data_table_name='destination_1')
        self.jobs.block_until_completed(job['id'])

        new_table = self.tables.detail(bucket_id + '.back-and-forth-table')
        self.assertEqual('back-and-forth-table', new_table['name'])

    # test load files into an abs workspace
    def test_load_files_to_workspace(self):
        if (os.getenv('SKIP_ABS_TEST')):
            self.skipTest(
                'Skipping ABS test because env var SKIP_ABS_TESTS was set')
        # put a test file to storage
        file, path = tempfile.mkstemp(prefix='sapi-test')
        os.write(file, bytes('fooBar', 'utf-8'))
        os.close(file)

        # We'll put 2 files with the same tag to test multiple results
        file1_id = self.files.upload_file(
            path, tags=['sapi-client-python-tests', 'file1'])
        file2_id = self.files.upload_file(
            path, tags=['sapi-client-python-tests', 'file2'])

        file1 = self.files.detail(file1_id)
        file2 = self.files.detail(file2_id)
        # create a workspace and load the file to it
        workspace = self.workspaces.create('abs')
        self.workspace_id = workspace['id']
        self.workspaces.load_files(workspace['id'], {
            'tags': ['sapi-client-python-tests'],
            'destination': 'data/in/files'
        })

        # assert that the file was loaded to the workspace
        blob_service_client = BlobServiceClient.from_connection_string(
            workspace['connection']['connectionString'])
        blob_client_1 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/files/%s/%s' % (file1['name'], str(file1['id'])))
        self.assertEqual(
            'fooBar',
            blob_client_1.download_blob().readall().decode('utf-8'))

        blob_client_2 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/files/%s/%s' % (file2['name'], str(file2['id'])))
        self.assertEqual(
            'fooBar',
            blob_client_2.download_blob().readall().decode('utf-8'))

        # now let's test that we can use the 'and' operator.  in this case file2 should not get loaded
        self.workspaces.load_files(
            workspace['id'], {
                'tags': ['sapi-client-python-tests', 'file1'],
                'operator': 'and',
                'destination': 'data/in/and_files'
            })
        # file 1 should be there
        blob_client_1 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/and_files/%s/%s' % (file1['name'], str(file1['id'])))
        self.assertEqual(
            'fooBar',
            blob_client_1.download_blob().readall().decode('utf-8'))

        # file 2 should not
        blob_client_2 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/and_files/%s/%s' % (file2['name'], str(file2['id'])))
        with self.assertRaises(ResourceNotFoundError) as context:
            blob_client_2.download_blob().readall().decode('utf-8')

        self.assertTrue(
            'The specified blob does not exist' in str(context.exception))

    def __create_table(self, bucket_id, table_name, row):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow(row)
        return self.tables.create(name=table_name,
                                  file_path=path,
                                  bucket_id=bucket_id)