Ejemplo n.º 1
0
    def export(self,
               table_id,
               limit=None,
               file_format='rfc',
               changed_since=None,
               changed_until=None,
               columns=None,
               where_column=None,
               where_values=None,
               where_operator='eq',
               is_gzip=False):
        """
        Export data from a table to a Storage file

        Args:
            table_id (str): Table id
            limit (int): Number of rows to export.
            file_format (str): 'rfc', 'escaped' or 'raw'
            changed_until (str): Filtering by import date
                Both until and since values can be a unix timestamp or any
                date accepted by strtotime.
            changed_since (str): Filtering by import date
                Both until and since values can be a unix timestamp or any
                date accepted by strtotime.
            where_column (str): Column for exporting only matching rows
            where_operator (str): 'eq' or 'neq'
            where_values (list): Values for exporting only matching rows
            columns (list): List of columns to display
            is_gzip (bool): Result will be gzipped

        Returns:
            response_body: File id of the table export

        Raises:
            requests.HTTPError: If the API request fails.
        """

        job = self.export_raw(table_id=table_id,
                              limit=limit,
                              file_format=file_format,
                              changed_since=changed_since,
                              changed_until=changed_until,
                              columns=columns,
                              where_column=where_column,
                              where_values=where_values,
                              where_operator=where_operator,
                              is_gzip=is_gzip)
        jobs = Jobs(self.root_url, self.token)
        job = jobs.block_until_completed(job['id'])
        if job['status'] == 'error':
            raise RuntimeError(job['error']['message'])
        return job['results']['file']['id']
Ejemplo n.º 2
0
    def load(self,
             table_id,
             file_path,
             is_incremental=False,
             delimiter=',',
             enclosure='"',
             escaped_by='',
             columns=None,
             without_headers=False):
        """
        Load data into an existing table

        Args:
            table_id (str): Table id
            file_path (str): Path to local CSV file.
            is_incremental (bool): Load incrementally (do not truncate table).
            delimiter (str): Field delimiter used in the CSV file.
            enclosure (str): Field enclosure used in the CSV file.
            escaped_by (str): Escape character used in the CSV file.
            columns (list): List of columns
            without_headers (bool): CSV does not contain headers

        Returns:
            response_body: The parsed json from the HTTP response
                containing write results

        Raises:
            requests.HTTPError: If the API request fails.
        """
        files = Files(self.root_url, self.token)
        file_id = files.upload_file(file_path=file_path,
                                    tags=['file-import'],
                                    do_notify=False,
                                    is_public=False)
        job = self.load_raw(table_id=table_id,
                            data_file_id=file_id,
                            delimiter=delimiter,
                            enclosure=enclosure,
                            escaped_by=escaped_by,
                            is_incremental=is_incremental,
                            columns=columns,
                            without_headers=without_headers)
        jobs = Jobs(self.root_url, self.token)
        job = jobs.block_until_completed(job['id'])
        if job['status'] == 'error':
            raise RuntimeError(job['error']['message'])
        return job['results']
Ejemplo n.º 3
0
    def create(self,
               bucket_id,
               name,
               file_path,
               delimiter=',',
               enclosure='"',
               escaped_by='',
               primary_key=None):
        """
        Create a new table from CSV file.

        Args:
            bucket_id (str): Bucket id where table is created
            name (str): The new table name (only alphanumeric and underscores)
            file_path (str): Path to local CSV file.
            delimiter (str): Field delimiter used in the CSV file.
            enclosure (str): Field enclosure used in the CSV file.
            escaped_by (str): Escape character used in the CSV file.
            primary_key (list): Primary key of a table.

        Returns:
            table_id (str): Id of the created table.

        Raises:
            requests.HTTPError: If the API request fails.
        """
        files = Files(self.root_url, self.token)
        file_id = files.upload_file(file_path=file_path,
                                    tags=['file-import'],
                                    do_notify=False,
                                    is_public=False)
        job = self.create_raw(bucket_id=bucket_id,
                              name=name,
                              data_file_id=file_id,
                              delimiter=delimiter,
                              enclosure=enclosure,
                              escaped_by=escaped_by,
                              primary_key=primary_key)
        jobs = Jobs(self.root_url, self.token)
        job = jobs.block_until_completed(job['id'])
        if job['status'] == 'error':
            raise RuntimeError(job['error']['message'])
        return job['results']['id']
Ejemplo n.º 4
0
    def export_to_file(self,
                       table_id,
                       path_name,
                       limit=None,
                       file_format='rfc',
                       changed_since=None,
                       changed_until=None,
                       columns=None,
                       where_column=None,
                       where_values=None,
                       where_operator='eq',
                       is_gzip=True):
        """
        Export data from a table to a local file

        Args:
            table_id (str): Table id
            path_name (str): Destination path for file.
            limit (int): Number of rows to export.
            file_format (str): 'rfc', 'escaped' or 'raw'
            changed_until (str): Filtering by import date
                Both until and since values can be a unix timestamp or any
                date accepted by strtotime.
            changed_since (str): Filtering by import date
                Both until and since values can be a unix timestamp or any
                date accepted by strtotime.
            where_column (str): Column for exporting only matching rows
            where_operator (str): 'eq' or 'neq'
            where_values (list): Values for exporting only matching rows
            columns (list): List of columns to display
            is_gzip (bool): Result will be gzipped

        Returns:
            destination_file: Local file with exported data

        Raises:
            requests.HTTPError: If the API request fails.
        """

        table_detail = self.detail(table_id)
        job = self.export_raw(table_id=table_id,
                              limit=limit,
                              file_format=file_format,
                              changed_since=changed_since,
                              changed_until=changed_until,
                              columns=columns,
                              where_column=where_column,
                              where_values=where_values,
                              where_operator=where_operator,
                              is_gzip=is_gzip)
        jobs = Jobs(self.root_url, self.token)
        job = jobs.block_until_completed(job['id'])
        if job['status'] == 'error':
            raise RuntimeError(job['error']['message'])
        files = Files(self.root_url, self.token)
        temp_path = tempfile.TemporaryDirectory()
        local_file = files.download(file_id=job['results']['file']['id'],
                                    local_path=temp_path.name)
        destination_file = os.path.join(path_name, table_detail['name'])
        # the file containing table export is always without headers (it is
        # always sliced on Snowflake and Redshift
        if is_gzip:
            import gzip
            import shutil
            with gzip.open(local_file, 'rb') as f_in, \
                    open(local_file + '.un', 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
            os.remove(local_file)
            local_file = local_file + '.un'

        with open(local_file, mode='rb') as in_file, \
                open(destination_file, mode='wb') as out_file:
            if columns is None:
                columns = table_detail['columns']
            columns = ['"{}"'.format(col) for col in columns]
            header = ",".join(columns) + '\n'
            out_file.write(header.encode('utf-8'))
            for line in in_file:
                out_file.write(line)
        return destination_file
Ejemplo n.º 5
0
class TestJobsEndpointWithMocks(unittest.TestCase):
    """
    Test the methods of a Jobs endpoint instance with mock HTTP responses
    """
    def setUp(self):
        token = 'dummy_token'
        base_url = 'https://connection.keboola.com/'
        self.jobs = Jobs(base_url, token)

    @responses.activate
    def test_list(self):
        """
        Jobs mocks list correctly
        """
        responses.add(
            responses.Response(
                method='GET',
                url='https://connection.keboola.com/v2/storage/jobs',
                json=list_response
            )
        )
        jobs_list = self.jobs.list()
        assert isinstance(jobs_list, list)

    @responses.activate
    def test_detail_by_id(self):
        """
        Jobs Endpoint can mock detail by integer id
        """
        responses.add(
            responses.Response(
                method='GET',
                url='https://connection.keboola.com/v2/storage/jobs/22077337',
                json=detail_response
            )
        )
        job_id = 22077337
        job_detail = self.jobs.detail(job_id)
        assert job_detail['id'] == 22077337

    @responses.activate
    def test_job_status(self):
        """
        Jobs mock status works correctly.
        """
        responses.add(
            responses.Response(
                method='GET',
                url='https://connection.keboola.com/v2/storage/jobs/22077337',
                json=detail_response
            )
        )
        job_id = 22077337
        job_status = self.jobs.status(job_id)
        assert job_status == 'success'

    @responses.activate
    def test_job_completion(self):
        """
        Jobs mock completion check works correctly.
        """
        responses.add(
            responses.Response(
                method='GET',
                url='https://connection.keboola.com/v2/storage/jobs/22077337',
                json=detail_response
            )
        )
        job_id = 22077337
        job_completed = self.jobs.completed(job_id)
        assert job_completed is True

    @responses.activate
    def test_job_blocking(self):
        """
        Jobs mock blocking polls until completion.
        """
        for _ in range(2):
            responses.add(
                responses.Response(
                    method='GET',
                    url=('https://connection.keboola.com/v2/storage/jobs/'
                         '22077337'),
                    json={'status': 'processing'}
                )
            )
        responses.add(
            responses.Response(
                method='GET',
                url='https://connection.keboola.com/v2/storage/jobs/22077337',
                json=detail_response
            )
        )
        job_id = '22077337'
        self.jobs.block_until_completed(job_id)
        assert True

    @responses.activate
    def test_success_blocking_if_success(self):
        """
        Jobs mock blocking polls until completion.
        """
        for _ in range(2):
            responses.add(
                responses.Response(
                    method='GET',
                    url=('https://connection.keboola.com/v2/storage/jobs/'
                         '22077337'),
                    json={'status': 'processing'}
                )
            )
        responses.add(
            responses.Response(
                method='GET',
                url='https://connection.keboola.com/v2/storage/jobs/22077337',
                json={'status': 'success'}
            )
        )
        job_id = '22077337'
        success = self.jobs.block_for_success(job_id)
        assert success is True

    @responses.activate
    def test_success_blocking_if_error(self):
        """
        Jobs mock blocking polls until completion.
        """
        for _ in range(2):
            responses.add(
                responses.Response(
                    method='GET',
                    url=('https://connection.keboola.com/v2/storage/jobs/'
                         '22077337'),
                    json={'status': 'processing'}
                )
            )
        responses.add(
            responses.Response(
                method='GET',
                url='https://connection.keboola.com/v2/storage/jobs/22077337',
                json={'status': 'error'}
            )
        )
        job_id = '22077337'
        success = self.jobs.block_for_success(job_id)
        assert success is False
Ejemplo n.º 6
0
class TestWorkspaces(unittest.TestCase):
    def setUp(self):
        self.workspaces = Workspaces(os.getenv('KBC_TEST_API_URL'),
                                     os.getenv('KBC_TEST_TOKEN'))
        self.buckets = Buckets(os.getenv('KBC_TEST_API_URL'),
                               os.getenv('KBC_TEST_TOKEN'))
        self.jobs = Jobs(os.getenv('KBC_TEST_API_URL'),
                         os.getenv('KBC_TEST_TOKEN'))
        self.tables = Tables(os.getenv('KBC_TEST_API_URL'),
                             os.getenv('KBC_TEST_TOKEN'))
        self.files = Files(os.getenv('KBC_TEST_API_URL'),
                           os.getenv('KBC_TEST_TOKEN'))
        try:
            file_list = self.files.list(tags=['sapi-client-python-tests'])
            for file in file_list:
                self.files.delete(file['id'])
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        try:
            self.buckets.delete('in.c-py-test-buckets', force=True)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        # https://github.com/boto/boto3/issues/454
        warnings.simplefilter("ignore", ResourceWarning)

    def tearDown(self):
        try:
            if hasattr(self, 'workspace_id'):
                self.workspaces.delete(self.workspace_id)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise
        try:
            self.buckets.delete('in.c-py-test-tables', force=True)
        except exceptions.HTTPError as e:
            if e.response.status_code != 404:
                raise

    def test_create_workspace(self):
        workspace = self.workspaces.create()
        self.workspace_id = workspace['id']
        with self.subTest():
            self.assertTrue('id' in workspace)
        with self.subTest():
            self.assertTrue('type' in workspace)
            self.assertTrue(workspace['type'] in ['table', 'file'])
        with self.subTest():
            self.assertTrue('name' in workspace)
        with self.subTest():
            self.assertTrue('component' in workspace)
        with self.subTest():
            self.assertTrue('configurationId' in workspace)
        with self.subTest():
            self.assertTrue('created' in workspace)
        with self.subTest():
            self.assertTrue('connection' in workspace)
        with self.subTest():
            self.assertTrue('backend' in workspace['connection'])
        with self.subTest():
            self.assertTrue('creatorToken' in workspace)

    def test_load_tables_to_workspace(self):
        bucket_id = self.buckets.create('py-test-tables')['id']
        table1_id = self.__create_table(bucket_id, 'test-table-1', {
            'col1': 'ping',
            'col2': 'pong'
        })
        table2_id = self.__create_table(bucket_id, 'test-table-2', {
            'col1': 'king',
            'col2': 'kong'
        })
        workspace = self.workspaces.create()
        self.workspace_id = workspace['id']
        job = self.workspaces.load_tables(workspace['id'], {
            table1_id: 'destination_1',
            table2_id: 'destination_2'
        })
        self.jobs.block_until_completed(job['id'])

        job = self.tables.create_raw(bucket_id,
                                     'back-and-forth-table',
                                     data_workspace_id=workspace['id'],
                                     data_table_name='destination_1')
        self.jobs.block_until_completed(job['id'])

        new_table = self.tables.detail(bucket_id + '.back-and-forth-table')
        self.assertEqual('back-and-forth-table', new_table['name'])

    # test load files into an abs workspace
    def test_load_files_to_workspace(self):
        if (os.getenv('SKIP_ABS_TEST')):
            self.skipTest(
                'Skipping ABS test because env var SKIP_ABS_TESTS was set')
        # put a test file to storage
        file, path = tempfile.mkstemp(prefix='sapi-test')
        os.write(file, bytes('fooBar', 'utf-8'))
        os.close(file)

        # We'll put 2 files with the same tag to test multiple results
        file1_id = self.files.upload_file(
            path, tags=['sapi-client-python-tests', 'file1'])
        file2_id = self.files.upload_file(
            path, tags=['sapi-client-python-tests', 'file2'])

        file1 = self.files.detail(file1_id)
        file2 = self.files.detail(file2_id)
        # create a workspace and load the file to it
        workspace = self.workspaces.create('abs')
        self.workspace_id = workspace['id']
        self.workspaces.load_files(workspace['id'], {
            'tags': ['sapi-client-python-tests'],
            'destination': 'data/in/files'
        })

        # assert that the file was loaded to the workspace
        blob_service_client = BlobServiceClient.from_connection_string(
            workspace['connection']['connectionString'])
        blob_client_1 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/files/%s/%s' % (file1['name'], str(file1['id'])))
        self.assertEqual(
            'fooBar',
            blob_client_1.download_blob().readall().decode('utf-8'))

        blob_client_2 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/files/%s/%s' % (file2['name'], str(file2['id'])))
        self.assertEqual(
            'fooBar',
            blob_client_2.download_blob().readall().decode('utf-8'))

        # now let's test that we can use the 'and' operator.  in this case file2 should not get loaded
        self.workspaces.load_files(
            workspace['id'], {
                'tags': ['sapi-client-python-tests', 'file1'],
                'operator': 'and',
                'destination': 'data/in/and_files'
            })
        # file 1 should be there
        blob_client_1 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/and_files/%s/%s' % (file1['name'], str(file1['id'])))
        self.assertEqual(
            'fooBar',
            blob_client_1.download_blob().readall().decode('utf-8'))

        # file 2 should not
        blob_client_2 = blob_service_client.get_blob_client(
            container=workspace['connection']['container'],
            blob='data/in/and_files/%s/%s' % (file2['name'], str(file2['id'])))
        with self.assertRaises(ResourceNotFoundError) as context:
            blob_client_2.download_blob().readall().decode('utf-8')

        self.assertTrue(
            'The specified blob does not exist' in str(context.exception))

    def __create_table(self, bucket_id, table_name, row):
        file, path = tempfile.mkstemp(prefix='sapi-test')
        with open(path, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=['col1', 'col2'],
                                    lineterminator='\n',
                                    delimiter=',',
                                    quotechar='"')
            writer.writeheader()
            writer.writerow(row)
        return self.tables.create(name=table_name,
                                  file_path=path,
                                  bucket_id=bucket_id)