Beispiel #1
0
 def setUp(self):
     self.workspaces = Workspaces(os.getenv('KBC_TEST_API_URL'),
                                  os.getenv('KBC_TEST_TOKEN'))
     self.buckets = Buckets(os.getenv('KBC_TEST_API_URL'),
                            os.getenv('KBC_TEST_TOKEN'))
     self.jobs = Jobs(os.getenv('KBC_TEST_API_URL'),
                      os.getenv('KBC_TEST_TOKEN'))
     self.tables = Tables(os.getenv('KBC_TEST_API_URL'),
                          os.getenv('KBC_TEST_TOKEN'))
     self.files = Files(os.getenv('KBC_TEST_API_URL'),
                        os.getenv('KBC_TEST_TOKEN'))
     try:
         file_list = self.files.list(tags=['sapi-client-python-tests'])
         for file in file_list:
             self.files.delete(file['id'])
     except exceptions.HTTPError as e:
         if e.response.status_code != 404:
             raise
     try:
         self.buckets.delete('in.c-py-test-buckets', force=True)
     except exceptions.HTTPError as e:
         if e.response.status_code != 404:
             raise
     # https://github.com/boto/boto3/issues/454
     warnings.simplefilter("ignore", ResourceWarning)
    def load_files(self, workspace_id, file_mapping):
        """
        Load files from file storage into a workspace.
        * only supports abs workspace
        writes the matching files to "{destination}/file_name/file_id"

        Args:
            workspace_id (int or str): The id of the workspace to which to load
                the tables.
            file_mapping (:obj:`dict`):
                tags: [],
                operator: enum('or', 'and') default or,
                destination: string path without trailing /

        Raises:
            requests.HTTPError: If the API request fails.
        """
        workspace = self.detail(workspace_id)
        if (workspace['type'] != 'file'
                and workspace['connection']['backend'] != 'abs'):
            raise Exception(
                'Loading files to workspace is only available for ABS workspaces'
            )
        files = Files(self.root_url, self.token)
        if ('operator' in file_mapping and file_mapping['operator'] == 'and'):
            query = ' AND '.join(
                map(lambda tag: 'tags:"' + tag + '"', file_mapping['tags']))
            file_list = files.list(q=query)
        else:
            file_list = files.list(tags=file_mapping['tags'])

        jobs = Jobs(self.root_url, self.token)
        jobs_list = []
        for file in file_list:
            inputs = {
                file['id']:
                "%s/%s" % (file_mapping['destination'], file['name'])
            }
            body = _make_body(inputs, source_key='dataFileId')
            # always preserve the workspace, otherwise it would be silly
            body['preserve'] = 1
            url = '{}/{}/load'.format(self.base_url, workspace['id'])
            job = self._post(url, data=body)
            jobs_list.append(job)

        for job in jobs_list:
            if not (jobs.block_for_success(job['id'])):
                try:
                    print("Failed to load a file with error: %s" %
                          job['results']['message'])
                except IndexError:
                    print(
                        "An unknown error occurred loading data.  Job ID %s" %
                        job['id'])
Beispiel #3
0
    def setUp(self):

        # timeout for files from previous tests to appear
        time.sleep(1)
        self.files = Files(os.getenv('KBC_TEST_API_URL'),
                           os.getenv('KBC_TEST_TOKEN'))
        files = self.files.list(tags=['py-test'])
        for file in files:
            self.files.delete(file['id'])
        # https://github.com/boto/boto3/issues/454
        warnings.simplefilter("ignore", ResourceWarning)
Beispiel #4
0
    def load(self,
             table_id,
             file_path,
             is_incremental=False,
             delimiter=',',
             enclosure='"',
             escaped_by='',
             columns=None,
             without_headers=False):
        """
        Load data into an existing table

        Args:
            table_id (str): Table id
            file_path (str): Path to local CSV file.
            is_incremental (bool): Load incrementally (do not truncate table).
            delimiter (str): Field delimiter used in the CSV file.
            enclosure (str): Field enclosure used in the CSV file.
            escaped_by (str): Escape character used in the CSV file.
            columns (list): List of columns
            without_headers (bool): CSV does not contain headers

        Returns:
            response_body: The parsed json from the HTTP response
                containing write results

        Raises:
            requests.HTTPError: If the API request fails.
        """
        files = Files(self.root_url, self.token)
        file_id = files.upload_file(file_path=file_path,
                                    tags=['file-import'],
                                    do_notify=False,
                                    is_public=False)
        job = self.load_raw(table_id=table_id,
                            data_file_id=file_id,
                            delimiter=delimiter,
                            enclosure=enclosure,
                            escaped_by=escaped_by,
                            is_incremental=is_incremental,
                            columns=columns,
                            without_headers=without_headers)
        jobs = Jobs(self.root_url, self.token)
        job = jobs.block_until_completed(job['id'])
        if job['status'] == 'error':
            raise RuntimeError(job['error']['message'])
        return job['results']
Beispiel #5
0
    def create(self,
               bucket_id,
               name,
               file_path,
               delimiter=',',
               enclosure='"',
               escaped_by='',
               primary_key=None):
        """
        Create a new table from CSV file.

        Args:
            bucket_id (str): Bucket id where table is created
            name (str): The new table name (only alphanumeric and underscores)
            file_path (str): Path to local CSV file.
            delimiter (str): Field delimiter used in the CSV file.
            enclosure (str): Field enclosure used in the CSV file.
            escaped_by (str): Escape character used in the CSV file.
            primary_key (list): Primary key of a table.

        Returns:
            table_id (str): Id of the created table.

        Raises:
            requests.HTTPError: If the API request fails.
        """
        files = Files(self.root_url, self.token)
        file_id = files.upload_file(file_path=file_path,
                                    tags=['file-import'],
                                    do_notify=False,
                                    is_public=False)
        job = self.create_raw(bucket_id=bucket_id,
                              name=name,
                              data_file_id=file_id,
                              delimiter=delimiter,
                              enclosure=enclosure,
                              escaped_by=escaped_by,
                              primary_key=primary_key)
        jobs = Jobs(self.root_url, self.token)
        job = jobs.block_until_completed(job['id'])
        if job['status'] == 'error':
            raise RuntimeError(job['error']['message'])
        return job['results']['id']
Beispiel #6
0
    def __init__(self, api_domain, token):
        """
        Initialise a client.

        Args:
            api_domain (str): The domain on which the API sits. eg.
                "https://connection.keboola.com".
            token (str): A storage API key.
        """
        self.root_url = api_domain
        self._token = token

        self.buckets = Buckets(self.root_url, self.token)
        self.files = Files(self.root_url, self.token)
        self.jobs = Jobs(self.root_url, self.token)
        self.tables = Tables(self.root_url, self.token)
        self.workspaces = Workspaces(self.root_url, self.token)
Beispiel #7
0
    def export_to_file(self,
                       table_id,
                       path_name,
                       limit=None,
                       file_format='rfc',
                       changed_since=None,
                       changed_until=None,
                       columns=None,
                       where_column=None,
                       where_values=None,
                       where_operator='eq',
                       is_gzip=True):
        """
        Export data from a table to a local file

        Args:
            table_id (str): Table id
            path_name (str): Destination path for file.
            limit (int): Number of rows to export.
            file_format (str): 'rfc', 'escaped' or 'raw'
            changed_until (str): Filtering by import date
                Both until and since values can be a unix timestamp or any
                date accepted by strtotime.
            changed_since (str): Filtering by import date
                Both until and since values can be a unix timestamp or any
                date accepted by strtotime.
            where_column (str): Column for exporting only matching rows
            where_operator (str): 'eq' or 'neq'
            where_values (list): Values for exporting only matching rows
            columns (list): List of columns to display
            is_gzip (bool): Result will be gzipped

        Returns:
            destination_file: Local file with exported data

        Raises:
            requests.HTTPError: If the API request fails.
        """

        table_detail = self.detail(table_id)
        job = self.export_raw(table_id=table_id,
                              limit=limit,
                              file_format=file_format,
                              changed_since=changed_since,
                              changed_until=changed_until,
                              columns=columns,
                              where_column=where_column,
                              where_values=where_values,
                              where_operator=where_operator,
                              is_gzip=is_gzip)
        jobs = Jobs(self.root_url, self.token)
        job = jobs.block_until_completed(job['id'])
        if job['status'] == 'error':
            raise RuntimeError(job['error']['message'])
        files = Files(self.root_url, self.token)
        temp_path = tempfile.TemporaryDirectory()
        local_file = files.download(file_id=job['results']['file']['id'],
                                    local_path=temp_path.name)
        destination_file = os.path.join(path_name, table_detail['name'])
        # the file containing table export is always without headers (it is
        # always sliced on Snowflake and Redshift
        if is_gzip:
            import gzip
            import shutil
            with gzip.open(local_file, 'rb') as f_in, \
                    open(local_file + '.un', 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
            os.remove(local_file)
            local_file = local_file + '.un'

        with open(local_file, mode='rb') as in_file, \
                open(destination_file, mode='wb') as out_file:
            if columns is None:
                columns = table_detail['columns']
            columns = ['"{}"'.format(col) for col in columns]
            header = ",".join(columns) + '\n'
            out_file.write(header.encode('utf-8'))
            for line in in_file:
                out_file.write(line)
        return destination_file
Beispiel #8
0
class TestWorkspaces(unittest.TestCase):
    def setUp(self):
        self.workspaces = Workspaces(os.getenv('KBC_TEST_API_URL'),
                                     os.getenv('KBC_TEST_TOKEN'))
        self.buckets = Buckets(os.getenv('KBC_TEST_API_URL'),
                               os.getenv('KBC_TEST_TOKEN'))
        self.jobs = Jobs(os.getenv('KBC_TEST_API_URL'),
                         os.getenv('KBC_TEST_TOKEN'))
        self.tables = Tables(os.getenv('KBC_TEST_API_URL'),
                             os.getenv('KBC_TEST_TOKEN'))
        self.files = Files(os.getenv('KBC_TEST_API_URL'),
                           os.getenv('KBC_TEST_TOKEN'))
        try:
            file_list = self.files.list(tags=['sapi-client-python-tests'])
            for file in file_list:
                self.files.delete(file['id'])
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        try:
            self.buckets.delete('in.c-py-test-buckets', force=True)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        # https://github.com/boto/boto3/issues/454
        warnings.simplefilter("ignore", ResourceWarning)

    def tearDown(self):
        try:
            if hasattr(self, 'workspace_id'):
                self.workspaces.delete(self.workspace_id)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        try:
            self.buckets.delete('in.c-py-test-tables', force=True)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise

    def test_create_workspace(self):
        workspace = self.workspaces.create()
        self.workspace_id = workspace['id']
        with self.subTest():
            self.assertTrue('id' in workspace)
        with self.subTest():
            self.assertTrue('type' in workspace)
            self.assertTrue(workspace['type'] in ['table', 'file'])
        with self.subTest():
            self.assertTrue('name' in workspace)
        with self.subTest():
            self.assertTrue('component' in workspace)
        with self.subTest():
            self.assertTrue('configurationId' in workspace)
        with self.subTest():
            self.assertTrue('created' in workspace)
        with self.subTest():
            self.assertTrue('connection' in workspace)
        with self.subTest():
            self.assertTrue('backend' in workspace['connection'])
        with self.subTest():
            self.assertTrue('creatorToken' in workspace)

    def test_load_tables_to_workspace(self):
        bucket_id = self.buckets.create('py-test-tables')['id']
        table1_id = self.__create_table(bucket_id, 'test-table-1', {
            'col1': 'ping',
            'col2': 'pong'
        })
        table2_id = self.__create_table(bucket_id, 'test-table-2', {
            'col1': 'king',
            'col2': 'kong'
        })
        workspace = self.workspaces.create()
        self.workspace_id = workspace['id']
        job = self.workspaces.load_tables(workspace['id'], {
            table1_id: 'destination_1',
            table2_id: 'destination_2'
        })
        self.jobs.block_until_completed(job['id'])

        job = self.tables.create_raw(bucket_id,
                                     'back-and-forth-table',
                                     data_workspace_id=workspace['id'],
                                     data_table_name='destination_1')
        self.jobs.block_until_completed(job['id'])

        new_table = self.tables.detail(bucket_id + '.back-and-forth-table')
        self.assertEqual('back-and-forth-table', new_table['name'])

    # test load files into an abs workspace
    def test_load_files_to_workspace(self):
        if (os.getenv('SKIP_ABS_TEST')):
            self.skipTest(
                'Skipping ABS test because env var SKIP_ABS_TESTS was set')
        # put a test file to storage
        file, path = tempfile.mkstemp(prefix='sapi-test')
        os.write(file, bytes('fooBar', 'utf-8'))
        os.close(file)

        # We'll put 2 files with the same tag to test multiple results
        file1_id = self.files.upload_file(
            path, tags=['sapi-client-python-tests', 'file1'])
        file2_id = self.files.upload_file(
            path, tags=['sapi-client-python-tests', 'file2'])

        file1 = self.files.detail(file1_id)
        file2 = self.files.detail(file2_id)
        # create a workspace and load the file to it
        workspace = self.workspaces.create('abs')
        self.workspace_id = workspace['id']
        self.workspaces.load_files(workspace['id'], {
            'tags': ['sapi-client-python-tests'],
            'destination': 'data/in/files'
        })

        # assert that the file was loaded to the workspace
        blob_service_client = BlobServiceClient.from_connection_string(
            workspace['connection']['connectionString'])
        blob_client_1 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/files/%s/%s' % (file1['name'], str(file1['id'])))
        self.assertEqual(
            'fooBar',
            blob_client_1.download_blob().readall().decode('utf-8'))

        blob_client_2 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/files/%s/%s' % (file2['name'], str(file2['id'])))
        self.assertEqual(
            'fooBar',
            blob_client_2.download_blob().readall().decode('utf-8'))

        # now let's test that we can use the 'and' operator.  in this case file2 should not get loaded
        self.workspaces.load_files(
            workspace['id'], {
                'tags': ['sapi-client-python-tests', 'file1'],
                'operator': 'and',
                'destination': 'data/in/and_files'
            })
        # file 1 should be there
        blob_client_1 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/and_files/%s/%s' % (file1['name'], str(file1['id'])))
        self.assertEqual(
            'fooBar',
            blob_client_1.download_blob().readall().decode('utf-8'))

        # file 2 should not
        blob_client_2 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/and_files/%s/%s' % (file2['name'], str(file2['id'])))
        with self.assertRaises(ResourceNotFoundError) as context:
            blob_client_2.download_blob().readall().decode('utf-8')

        self.assertTrue(
            'The specified blob does not exist' in str(context.exception))

    def __create_table(self, bucket_id, table_name, row):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow(row)
        return self.tables.create(name=table_name,
                                  file_path=path,
                                  bucket_id=bucket_id)
Beispiel #9
0
class TestFiles(unittest.TestCase):
    def setUp(self):

        # timeout for files from previous tests to appear
        time.sleep(1)
        self.files = Files(os.getenv('KBC_TEST_API_URL'),
                           os.getenv('KBC_TEST_TOKEN'))
        files = self.files.list(tags=['py-test'])
        for file in files:
            self.files.delete(file['id'])
        # https://github.com/boto/boto3/issues/454
        warnings.simplefilter("ignore", ResourceWarning)

    def tearDown(self):
        # timeout for files from previous tests to appear
        time.sleep(1)
        files = self.files.list(tags=['py-test'])
        for file in files:
            self.files.delete(file['id'])

    def test_create_file(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        os.write(file, bytes('fooBar', 'utf-8'))
        file_id = self.files.upload_file(path, tags=['py-test', 'file1'])
        os.close(file)
        file_info = self.files.detail(file_id)
        with self.subTest():
            self.assertEqual(file_id, file_info['id'])
        with self.subTest():
            self.assertEqual(6, file_info['sizeBytes'])
        with self.subTest():
            self.assertFalse(file_info['isPublic'])
        with self.subTest():
            self.assertFalse(file_info['isSliced'])
        with self.subTest():
            self.assertTrue(file_info['isEncrypted'])
        with self.subTest():
            self.assertTrue('name' in file_info)
        with self.subTest():
            self.assertTrue('created' in file_info)
        with self.subTest():
            self.assertTrue('url' in file_info)
        with self.subTest():
            self.assertTrue('region' in file_info)
        with self.subTest():
            self.assertTrue('creatorToken' in file_info)
        with self.subTest():
            self.assertTrue('tags' in file_info)
        with self.subTest():
            self.assertTrue('py-test' in file_info['tags'])
        with self.subTest():
            self.assertTrue('file1' in file_info['tags'])
        with self.subTest():
            self.assertFalse('credentials' in file_info)

    def test_create_file_compress(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        os.write(file, bytes('fooBar', 'utf-8'))
        file_id = self.files.upload_file(path,
                                         tags=['py-test', 'file1'],
                                         compress=True)
        os.close(file)
        time.sleep(1)
        file_info = self.files.detail(file_id)
        with self.subTest():
            self.assertEqual(file_id, file_info['id'])

    def test_delete_file(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        os.write(file, bytes('fooBar', 'utf-8'))
        file_id = self.files.upload_file(path, tags=['py-test', 'file1'])
        os.close(file)
        time.sleep(1)
        with self.subTest():
            self.assertEqual(file_id, self.files.detail(file_id)['id'])
        self.files.delete(file_id)
        try:
            self.assertEqual(file_id, self.files.detail(file_id)['id'])
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise

    def test_download_file_credentials(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        os.write(file, bytes('fooBar', 'utf-8'))
        file_id = self.files.upload_file(path, tags=['py-test', 'file1'])
        os.close(file)
        time.sleep(1)
        file_info = self.files.detail(file_id, federation_token=True)
        with self.subTest():
            self.assertEqual(file_id, file_info['id'])
        with self.subTest():
            self.assertTrue(file_info['provider'] in ['aws', 'azure'])
        if file_info['provider'] == 'aws':
            with self.subTest():
                self.assertTrue('credentials' in file_info)
            with self.subTest():
                self.assertTrue('AccessKeyId' in file_info['credentials'])
            with self.subTest():
                self.assertTrue('SecretAccessKey' in file_info['credentials'])
            with self.subTest():
                self.assertTrue('SessionToken' in file_info['credentials'])
        elif file_info['provider'] == 'azure':
            with self.subTest():
                self.assertTrue('absCredentials' in file_info)
            with self.subTest():
                self.assertTrue(
                    'SASConnectionString' in file_info['absCredentials'])
            with self.subTest():
                self.assertTrue('expiration' in file_info['absCredentials'])
            with self.subTest():
                self.assertTrue('absPath' in file_info)
            with self.subTest():
                self.assertTrue('container' in file_info['absPath'])
            with self.subTest():
                self.assertTrue('name' in file_info['absPath'])

    def test_download_file(self):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        os.write(file, bytes('fooBar', 'utf-8'))
        file_id = self.files.upload_file(path, tags=['py-test', 'file1'])
        os.close(file)
        tmp = tempfile.TemporaryDirectory()
        local_path = self.files.download(file_id, tmp.name)
        with open(local_path, mode='rb') as file:
            data = file.read()
        self.assertEqual('fooBar', data.decode('utf-8'))

    def test_download_file_sliced(self):
        buckets = Buckets(os.getenv('KBC_TEST_API_URL'),
                          os.getenv('KBC_TEST_TOKEN'))
        try:
            buckets.delete('in.c-py-test-files', force=True)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        buckets.create(name='py-test-files', stage='in')

        tables = Tables(os.getenv('KBC_TEST_API_URL'),
                        os.getenv('KBC_TEST_TOKEN'))
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'ping', 'col2': 'pong'})
        os.close(file)
        table_id = tables.create(name='some-table',
                                 file_path=path,
                                 bucket_id='in.c-py-test-files')
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow({'col1': 'foo', 'col2': 'bar'})
        os.close(file)
        tables.load(table_id=table_id, file_path=path, is_incremental=True)
        file_id = tables.export(table_id=table_id)
        temp_path = tempfile.TemporaryDirectory()
        local_path = self.files.download(file_id, temp_path.name)
        with open(local_path, mode='rt') as file:
            lines = file.readlines()
        self.assertEqual(['"foo","bar"\n', '"ping","pong"\n'], sorted(lines))