def setUp(self): self.workspaces = Workspaces(os.getenv('KBC_TEST_API_URL'), os.getenv('KBC_TEST_TOKEN')) self.buckets = Buckets(os.getenv('KBC_TEST_API_URL'), os.getenv('KBC_TEST_TOKEN')) self.jobs = Jobs(os.getenv('KBC_TEST_API_URL'), os.getenv('KBC_TEST_TOKEN')) self.tables = Tables(os.getenv('KBC_TEST_API_URL'), os.getenv('KBC_TEST_TOKEN')) self.files = Files(os.getenv('KBC_TEST_API_URL'), os.getenv('KBC_TEST_TOKEN')) try: file_list = self.files.list(tags=['sapi-client-python-tests']) for file in file_list: self.files.delete(file['id']) except exceptions.HTTPError as e: if e.response.status_code != 404: raise try: self.buckets.delete('in.c-py-test-buckets', force=True) except exceptions.HTTPError as e: if e.response.status_code != 404: raise # https://github.com/boto/boto3/issues/454 warnings.simplefilter("ignore", ResourceWarning)
def load_files(self, workspace_id, file_mapping): """ Load files from file storage into a workspace. * only supports abs workspace writes the matching files to "{destination}/file_name/file_id" Args: workspace_id (int or str): The id of the workspace to which to load the tables. file_mapping (:obj:`dict`): tags: [], operator: enum('or', 'and') default or, destination: string path without trailing / Raises: requests.HTTPError: If the API request fails. """ workspace = self.detail(workspace_id) if (workspace['type'] != 'file' and workspace['connection']['backend'] != 'abs'): raise Exception( 'Loading files to workspace is only available for ABS workspaces' ) files = Files(self.root_url, self.token) if ('operator' in file_mapping and file_mapping['operator'] == 'and'): query = ' AND '.join( map(lambda tag: 'tags:"' + tag + '"', file_mapping['tags'])) file_list = files.list(q=query) else: file_list = files.list(tags=file_mapping['tags']) jobs = Jobs(self.root_url, self.token) jobs_list = [] for file in file_list: inputs = { file['id']: "%s/%s" % (file_mapping['destination'], file['name']) } body = _make_body(inputs, source_key='dataFileId') # always preserve the workspace, otherwise it would be silly body['preserve'] = 1 url = '{}/{}/load'.format(self.base_url, workspace['id']) job = self._post(url, data=body) jobs_list.append(job) for job in jobs_list: if not (jobs.block_for_success(job['id'])): try: print("Failed to load a file with error: %s" % job['results']['message']) except IndexError: print( "An unknown error occurred loading data. Job ID %s" % job['id'])
def export(self, table_id, limit=None, file_format='rfc', changed_since=None, changed_until=None, columns=None, where_column=None, where_values=None, where_operator='eq', is_gzip=False): """ Export data from a table to a Storage file Args: table_id (str): Table id limit (int): Number of rows to export. file_format (str): 'rfc', 'escaped' or 'raw' changed_until (str): Filtering by import date Both until and since values can be a unix timestamp or any date accepted by strtotime. changed_since (str): Filtering by import date Both until and since values can be a unix timestamp or any date accepted by strtotime. where_column (str): Column for exporting only matching rows where_operator (str): 'eq' or 'neq' where_values (list): Values for exporting only matching rows columns (list): List of columns to display is_gzip (bool): Result will be gzipped Returns: response_body: File id of the table export Raises: requests.HTTPError: If the API request fails. """ job = self.export_raw(table_id=table_id, limit=limit, file_format=file_format, changed_since=changed_since, changed_until=changed_until, columns=columns, where_column=where_column, where_values=where_values, where_operator=where_operator, is_gzip=is_gzip) jobs = Jobs(self.root_url, self.token) job = jobs.block_until_completed(job['id']) if job['status'] == 'error': raise RuntimeError(job['error']['message']) return job['results']['file']['id']
def load(self, table_id, file_path, is_incremental=False, delimiter=',', enclosure='"', escaped_by='', columns=None, without_headers=False): """ Load data into an existing table Args: table_id (str): Table id file_path (str): Path to local CSV file. is_incremental (bool): Load incrementally (do not truncate table). delimiter (str): Field delimiter used in the CSV file. enclosure (str): Field enclosure used in the CSV file. escaped_by (str): Escape character used in the CSV file. columns (list): List of columns without_headers (bool): CSV does not contain headers Returns: response_body: The parsed json from the HTTP response containing write results Raises: requests.HTTPError: If the API request fails. """ files = Files(self.root_url, self.token) file_id = files.upload_file(file_path=file_path, tags=['file-import'], do_notify=False, is_public=False) job = self.load_raw(table_id=table_id, data_file_id=file_id, delimiter=delimiter, enclosure=enclosure, escaped_by=escaped_by, is_incremental=is_incremental, columns=columns, without_headers=without_headers) jobs = Jobs(self.root_url, self.token) job = jobs.block_until_completed(job['id']) if job['status'] == 'error': raise RuntimeError(job['error']['message']) return job['results']
def create(self, bucket_id, name, file_path, delimiter=',', enclosure='"', escaped_by='', primary_key=None): """ Create a new table from CSV file. Args: bucket_id (str): Bucket id where table is created name (str): The new table name (only alphanumeric and underscores) file_path (str): Path to local CSV file. delimiter (str): Field delimiter used in the CSV file. enclosure (str): Field enclosure used in the CSV file. escaped_by (str): Escape character used in the CSV file. primary_key (list): Primary key of a table. Returns: table_id (str): Id of the created table. Raises: requests.HTTPError: If the API request fails. """ files = Files(self.root_url, self.token) file_id = files.upload_file(file_path=file_path, tags=['file-import'], do_notify=False, is_public=False) job = self.create_raw(bucket_id=bucket_id, name=name, data_file_id=file_id, delimiter=delimiter, enclosure=enclosure, escaped_by=escaped_by, primary_key=primary_key) jobs = Jobs(self.root_url, self.token) job = jobs.block_until_completed(job['id']) if job['status'] == 'error': raise RuntimeError(job['error']['message']) return job['results']['id']
def __init__(self, api_domain, token): """ Initialise a client. Args: api_domain (str): The domain on which the API sits. eg. "https://connection.keboola.com". token (str): A storage API key. """ self.root_url = api_domain self._token = token self.buckets = Buckets(self.root_url, self.token) self.files = Files(self.root_url, self.token) self.jobs = Jobs(self.root_url, self.token) self.tables = Tables(self.root_url, self.token) self.workspaces = Workspaces(self.root_url, self.token)
def export_to_file(self, table_id, path_name, limit=None, file_format='rfc', changed_since=None, changed_until=None, columns=None, where_column=None, where_values=None, where_operator='eq', is_gzip=True): """ Export data from a table to a local file Args: table_id (str): Table id path_name (str): Destination path for file. limit (int): Number of rows to export. file_format (str): 'rfc', 'escaped' or 'raw' changed_until (str): Filtering by import date Both until and since values can be a unix timestamp or any date accepted by strtotime. changed_since (str): Filtering by import date Both until and since values can be a unix timestamp or any date accepted by strtotime. where_column (str): Column for exporting only matching rows where_operator (str): 'eq' or 'neq' where_values (list): Values for exporting only matching rows columns (list): List of columns to display is_gzip (bool): Result will be gzipped Returns: destination_file: Local file with exported data Raises: requests.HTTPError: If the API request fails. """ table_detail = self.detail(table_id) job = self.export_raw(table_id=table_id, limit=limit, file_format=file_format, changed_since=changed_since, changed_until=changed_until, columns=columns, where_column=where_column, where_values=where_values, where_operator=where_operator, is_gzip=is_gzip) jobs = Jobs(self.root_url, self.token) job = jobs.block_until_completed(job['id']) if job['status'] == 'error': raise RuntimeError(job['error']['message']) files = Files(self.root_url, self.token) temp_path = tempfile.TemporaryDirectory() local_file = files.download(file_id=job['results']['file']['id'], local_path=temp_path.name) destination_file = os.path.join(path_name, table_detail['name']) # the file containing table export is always without headers (it is # always sliced on Snowflake and Redshift if is_gzip: import gzip import shutil with gzip.open(local_file, 'rb') as f_in, \ open(local_file + '.un', 'wb') as f_out: shutil.copyfileobj(f_in, f_out) os.remove(local_file) local_file = local_file + '.un' with open(local_file, mode='rb') as in_file, \ open(destination_file, mode='wb') as out_file: if columns is None: columns = table_detail['columns'] columns = ['"{}"'.format(col) for col in columns] header = ",".join(columns) + '\n' out_file.write(header.encode('utf-8')) for line in in_file: out_file.write(line) return destination_file
def setUp(self): token = 'dummy_token' base_url = 'https://connection.keboola.com/' self.jobs = Jobs(base_url, token)
class TestJobsEndpointWithMocks(unittest.TestCase): """ Test the methods of a Jobs endpoint instance with mock HTTP responses """ def setUp(self): token = 'dummy_token' base_url = 'https://connection.keboola.com/' self.jobs = Jobs(base_url, token) @responses.activate def test_list(self): """ Jobs mocks list correctly """ responses.add( responses.Response( method='GET', url='https://connection.keboola.com/v2/storage/jobs', json=list_response ) ) jobs_list = self.jobs.list() assert isinstance(jobs_list, list) @responses.activate def test_detail_by_id(self): """ Jobs Endpoint can mock detail by integer id """ responses.add( responses.Response( method='GET', url='https://connection.keboola.com/v2/storage/jobs/22077337', json=detail_response ) ) job_id = 22077337 job_detail = self.jobs.detail(job_id) assert job_detail['id'] == 22077337 @responses.activate def test_job_status(self): """ Jobs mock status works correctly. """ responses.add( responses.Response( method='GET', url='https://connection.keboola.com/v2/storage/jobs/22077337', json=detail_response ) ) job_id = 22077337 job_status = self.jobs.status(job_id) assert job_status == 'success' @responses.activate def test_job_completion(self): """ Jobs mock completion check works correctly. """ responses.add( responses.Response( method='GET', url='https://connection.keboola.com/v2/storage/jobs/22077337', json=detail_response ) ) job_id = 22077337 job_completed = self.jobs.completed(job_id) assert job_completed is True @responses.activate def test_job_blocking(self): """ Jobs mock blocking polls until completion. """ for _ in range(2): responses.add( responses.Response( method='GET', url=('https://connection.keboola.com/v2/storage/jobs/' '22077337'), json={'status': 'processing'} ) ) responses.add( responses.Response( method='GET', url='https://connection.keboola.com/v2/storage/jobs/22077337', json=detail_response ) ) job_id = '22077337' self.jobs.block_until_completed(job_id) assert True @responses.activate def test_success_blocking_if_success(self): """ Jobs mock blocking polls until completion. """ for _ in range(2): responses.add( responses.Response( method='GET', url=('https://connection.keboola.com/v2/storage/jobs/' '22077337'), json={'status': 'processing'} ) ) responses.add( responses.Response( method='GET', url='https://connection.keboola.com/v2/storage/jobs/22077337', json={'status': 'success'} ) ) job_id = '22077337' success = self.jobs.block_for_success(job_id) assert success is True @responses.activate def test_success_blocking_if_error(self): """ Jobs mock blocking polls until completion. """ for _ in range(2): responses.add( responses.Response( method='GET', url=('https://connection.keboola.com/v2/storage/jobs/' '22077337'), json={'status': 'processing'} ) ) responses.add( responses.Response( method='GET', url='https://connection.keboola.com/v2/storage/jobs/22077337', json={'status': 'error'} ) ) job_id = '22077337' success = self.jobs.block_for_success(job_id) assert success is False
class TestWorkspaces(unittest.TestCase): def setUp(self): self.workspaces = Workspaces(os.getenv('KBC_TEST_API_URL'), os.getenv('KBC_TEST_TOKEN')) self.buckets = Buckets(os.getenv('KBC_TEST_API_URL'), os.getenv('KBC_TEST_TOKEN')) self.jobs = Jobs(os.getenv('KBC_TEST_API_URL'), os.getenv('KBC_TEST_TOKEN')) self.tables = Tables(os.getenv('KBC_TEST_API_URL'), os.getenv('KBC_TEST_TOKEN')) self.files = Files(os.getenv('KBC_TEST_API_URL'), os.getenv('KBC_TEST_TOKEN')) try: file_list = self.files.list(tags=['sapi-client-python-tests']) for file in file_list: self.files.delete(file['id']) except exceptions.HTTPError as e: if e.response.status_code != 404: raise try: self.buckets.delete('in.c-py-test-buckets', force=True) except exceptions.HTTPError as e: if e.response.status_code != 404: raise # https://github.com/boto/boto3/issues/454 warnings.simplefilter("ignore", ResourceWarning) def tearDown(self): try: if hasattr(self, 'workspace_id'): self.workspaces.delete(self.workspace_id) except exceptions.HTTPError as e: if e.response.status_code != 404: raise try: self.buckets.delete('in.c-py-test-tables', force=True) except exceptions.HTTPError as e: if e.response.status_code != 404: raise def test_create_workspace(self): workspace = self.workspaces.create() self.workspace_id = workspace['id'] with self.subTest(): self.assertTrue('id' in workspace) with self.subTest(): self.assertTrue('type' in workspace) self.assertTrue(workspace['type'] in ['table', 'file']) with self.subTest(): self.assertTrue('name' in workspace) with self.subTest(): self.assertTrue('component' in workspace) with self.subTest(): self.assertTrue('configurationId' in workspace) with self.subTest(): self.assertTrue('created' in workspace) with self.subTest(): self.assertTrue('connection' in workspace) with self.subTest(): self.assertTrue('backend' in workspace['connection']) with self.subTest(): self.assertTrue('creatorToken' in workspace) def test_load_tables_to_workspace(self): bucket_id = self.buckets.create('py-test-tables')['id'] table1_id = self.__create_table(bucket_id, 'test-table-1', { 'col1': 'ping', 'col2': 'pong' }) table2_id = self.__create_table(bucket_id, 'test-table-2', { 'col1': 'king', 'col2': 'kong' }) workspace = self.workspaces.create() self.workspace_id = workspace['id'] job = self.workspaces.load_tables(workspace['id'], { table1_id: 'destination_1', table2_id: 'destination_2' }) self.jobs.block_until_completed(job['id']) job = self.tables.create_raw(bucket_id, 'back-and-forth-table', data_workspace_id=workspace['id'], data_table_name='destination_1') self.jobs.block_until_completed(job['id']) new_table = self.tables.detail(bucket_id + '.back-and-forth-table') self.assertEqual('back-and-forth-table', new_table['name']) # test load files into an abs workspace def test_load_files_to_workspace(self): if (os.getenv('SKIP_ABS_TEST')): self.skipTest( 'Skipping ABS test because env var SKIP_ABS_TESTS was set') # put a test file to storage file, path = tempfile.mkstemp(prefix='sapi-test') os.write(file, bytes('fooBar', 'utf-8')) os.close(file) # We'll put 2 files with the same tag to test multiple results file1_id = self.files.upload_file( path, tags=['sapi-client-python-tests', 'file1']) file2_id = self.files.upload_file( path, tags=['sapi-client-python-tests', 'file2']) file1 = self.files.detail(file1_id) file2 = self.files.detail(file2_id) # create a workspace and load the file to it workspace = self.workspaces.create('abs') self.workspace_id = workspace['id'] self.workspaces.load_files(workspace['id'], { 'tags': ['sapi-client-python-tests'], 'destination': 'data/in/files' }) # assert that the file was loaded to the workspace blob_service_client = BlobServiceClient.from_connection_string( workspace['connection']['connectionString']) blob_client_1 = blob_service_client.get_blob_client( container=workspace['connection']['container'], blob='data/in/files/%s/%s' % (file1['name'], str(file1['id']))) self.assertEqual( 'fooBar', blob_client_1.download_blob().readall().decode('utf-8')) blob_client_2 = blob_service_client.get_blob_client( container=workspace['connection']['container'], blob='data/in/files/%s/%s' % (file2['name'], str(file2['id']))) self.assertEqual( 'fooBar', blob_client_2.download_blob().readall().decode('utf-8')) # now let's test that we can use the 'and' operator. in this case file2 should not get loaded self.workspaces.load_files( workspace['id'], { 'tags': ['sapi-client-python-tests', 'file1'], 'operator': 'and', 'destination': 'data/in/and_files' }) # file 1 should be there blob_client_1 = blob_service_client.get_blob_client( container=workspace['connection']['container'], blob='data/in/and_files/%s/%s' % (file1['name'], str(file1['id']))) self.assertEqual( 'fooBar', blob_client_1.download_blob().readall().decode('utf-8')) # file 2 should not blob_client_2 = blob_service_client.get_blob_client( container=workspace['connection']['container'], blob='data/in/and_files/%s/%s' % (file2['name'], str(file2['id']))) with self.assertRaises(ResourceNotFoundError) as context: blob_client_2.download_blob().readall().decode('utf-8') self.assertTrue( 'The specified blob does not exist' in str(context.exception)) def __create_table(self, bucket_id, table_name, row): file, path = tempfile.mkstemp(prefix='sapi-test') with open(path, 'w') as csv_file: writer = csv.DictWriter(csv_file, fieldnames=['col1', 'col2'], lineterminator='\n', delimiter=',', quotechar='"') writer.writeheader() writer.writerow(row) return self.tables.create(name=table_name, file_path=path, bucket_id=bucket_id)