Beispiel #1
0
    def test_copy_job_creation(self, _):
        # given
        copy_job_request = CopyJobRequest(
            task_name_suffix='task-name-suffix',
            copy_job_type_id="backups",
            source_big_query_table=BigQueryTable('source_project',
                                                 'source_dataset',
                                                 'source_table'),
            target_big_query_table=BigQueryTable('target_project',
                                                 'target_dataset',
                                                 'target_table'),
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY",
            post_copy_action_request=PostCopyActionRequest(url="/my/url", data={
                "key1": "value1"})
        )
        # when
        TaskCreator.create_copy_job(
            copy_job_request=copy_job_request
        )

        # then
        expected_queue_name = 'backups-copy-job'
        executed_tasks = self.taskqueue_stub.get_filtered_tasks(
            queue_names=expected_queue_name
        )

        self.assertEqual(len(executed_tasks), 1,
                         "Should create one task in queue")
        executed_task = executed_tasks[0]
        self.assertEqual(json.dumps(copy_job_request, cls=RequestEncoder),
                         executed_task.extract_params()['copyJobRequest'])
        self.assertEqual('POST', executed_task.method)
        self.assertEqual('task_name', executed_task.name)
        self.assertEqual(executed_task.url, '/tasks/copy_job_async/copy_job')
Beispiel #2
0
 def __init__(self, project_id, dataset_id, table_id):
     self.big_query_table = BigQueryTable(project_id, dataset_id, table_id)
     self.service = googleapiclient.discovery.build(
         'bigquery',
         'v2',
         credentials=self._create_credentials(),
         http=self._create_http()
     )
Beispiel #3
0
 def from_bq_table(bq_table):
     table_id, partition_id = BigQueryTable.split_table_and_partition_id(
         bq_table.table_id)
     return TableReference(project_id=bq_table.project_id,
                           dataset_id=bq_table.dataset_id,
                           table_id=table_id,
                           partition_id=partition_id)
Beispiel #4
0
    def setUp(self):
        self.testbed = testbed.Testbed()
        self.testbed.activate()
        ndb.get_context().clear_cache()
        patch('googleapiclient.discovery.build').start()
        patch(
            'oauth2client.client.GoogleCredentials.get_application_default') \
            .start()
        self._create_http = patch.object(BigQuery, '_create_http').start()

        self.example_source_bq_table = BigQueryTable('source_project_id_1',
                                                     'source_dataset_id_1',
                                                     'source_table_id_1')
        self.example_target_bq_table = BigQueryTable('target_project_id_1',
                                                     'target_dataset_id_1',
                                                     'target_table_id_1')
    def test_should_disable_partition_expiration_if_backup_table_has_it(
            self, disable_partition_expiration, _, _1, _2, _3, _4, _5):
        # given
        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)

        # then
        self.assertEqual(response.status_int, 200)
        disable_partition_expiration.assert_called_once()
Beispiel #6
0
class DataStreamer(object):

    def __init__(self, project_id, dataset_id, table_id):
        self.big_query_table = BigQueryTable(project_id, dataset_id, table_id)
        self.service = googleapiclient.discovery.build(
            'bigquery',
            'v2',
            credentials=self._create_credentials(),
            http=self._create_http()
        )

    @staticmethod
    def _create_credentials():
        return GoogleCredentials.get_application_default()

    @staticmethod
    def _create_http():
        return None

    def stream_stats(self, rows):
        insert_all_data = {
            'rows': [{
                'json': data
            } for data in rows]
        }
        logging.info("Streaming data to table %s", self.big_query_table)
        insert_all_response = self._stream_metadata(insert_all_data)
        if 'insertErrors' in insert_all_response:
            logging.debug("Sent json: \n%s", json.dumps(insert_all_data))
            error_message = "Error during streaming metadata to BigQuery: \n{}"\
                .format(json.dumps(insert_all_response['insertErrors']))
            logging.error(error_message)
            ErrorReporting().report(error_message)
        else:
            logging.debug("Stats have been sent successfully to %s table",
                          self.big_query_table)

    @retry(Error, tries=2, delay=2, backoff=2)
    def _stream_metadata(self, insert_all_data):
        partition = datetime.datetime.now().strftime("%Y%m%d")
        return self.service.tabledata().insertAll(
            projectId=self.big_query_table.get_project_id(),
            datasetId=self.big_query_table.get_dataset_id(),
            tableId='{}${}'.format(self.big_query_table.get_table_id(),
                                   partition),
            body=insert_all_data).execute(num_retries=3)
    def from_json(cls, json):
        source_big_query_table = BigQueryTable.from_json(
            json["source_big_query_table"])
        target_big_query_table = BigQueryTable.from_json(
            json["target_big_query_table"])
        post_copy_action_request = PostCopyActionRequest.from_json(
            json["post_copy_action_request"])

        return CopyJobRequest(
            task_name_suffix=json["task_name_suffix"],
            copy_job_type_id=json["copy_job_type_id"],
            source_big_query_table=source_big_query_table,
            target_big_query_table=target_big_query_table,
            create_disposition=json["create_disposition"],
            write_disposition=json["write_disposition"],
            retry_count=json["retry_count"],
            post_copy_action_request=post_copy_action_request)
 def test_source_table_reference(self):
     # given
     copy_job_result = CopyJobResult(JobResultExample.DONE)
     # when
     source_bq_table = copy_job_result.source_bq_table
     # then
     self.assertEqual(
         source_bq_table,
         BigQueryTable(project_id="source_project_id",
                       dataset_id="source_dataset_id",
                       table_id="source_table_id$123"))
 def test_target_table_reference(self):
     # given
     copy_job_result = CopyJobResult(JobResultExample.DONE)
     # when
     target_bq_table = copy_job_result.target_bq_table
     # then
     self.assertEqual(
         target_bq_table,
         BigQueryTable(project_id="target_project_id",
                       dataset_id="target_dataset_id",
                       table_id="target_table_id"))
    def test_that_async_copy_job_is_called_with_correct_parameters_when_creating_new_backup(  # pylint: disable=C0301
            self, async_copy):

        # given
        table_to_backup = Table(project_id="src_project",
                                dataset_id="src_dataset",
                                table_id="src_table",
                                partition_id="20180416")
        source_bq_table = BigQueryTable("src_project", "src_dataset",
                                        "src_table$20180416")
        destination_bq_table = BigQueryTable(
            "bkup_storage_project", "2018_16_US_src_project",
            "20180416_000000_src_project_src_dataset_src_table_partition_20180416"
        )  # pylint: disable=C0301
        under_test = BackupCreator(datetime.datetime.utcnow())

        # when
        under_test.create_backup(table_to_backup, BigQueryTableMetadata({}))

        # then
        async_copy.assert_called_with(source_bq_table, destination_bq_table)
    def test_happy_path(self, copy_table_mock):
        # given
        source_big_query_table = BigQueryTable("source_project_id",
                                               "source_dataset_id",
                                               "source_table_id")
        target_big_query_table = BigQueryTable("target_project_id",
                                               "target_dataset_id",
                                               "target_table_id")

        post_copy_action_request = PostCopyActionRequest(
            url="/my/url", data={"key1": "value1"})

        url = '/tasks/copy_job_async/copy_job'
        # when
        self.under_test.post(
            url=url,
            params={
                "copyJobRequest":
                json.dumps(CopyJobRequest(
                    task_name_suffix=None,
                    copy_job_type_id=None,
                    source_big_query_table=source_big_query_table,
                    target_big_query_table=target_big_query_table,
                    create_disposition="CREATE_IF_NEEDED",
                    write_disposition="WRITE_EMPTY",
                    retry_count=0,
                    post_copy_action_request=post_copy_action_request),
                           cls=RequestEncoder)
            })

        # then
        copy_table_mock.assert_called_with(
            CopyJobRequest(task_name_suffix=None,
                           copy_job_type_id=None,
                           source_big_query_table=source_big_query_table,
                           target_big_query_table=target_big_query_table,
                           create_disposition="CREATE_IF_NEEDED",
                           write_disposition="WRITE_EMPTY",
                           retry_count=0,
                           post_copy_action_request=post_copy_action_request))
    def test_return_none_if_calculated_name_is_too_long(self):
        # given
        task_name_suffix = "x" * 501

        copy_job_request = CopyJobRequest(
            task_name_suffix=task_name_suffix,
            copy_job_type_id="unknown-copying",
            source_big_query_table=BigQueryTable('source_project',
                                                 'source_dataset',
                                                 'source_table'),
            target_big_query_table=BigQueryTable('target_project',
                                                 'target_dataset',
                                                 'target_table'),
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY",
            retry_count=0)

        # when
        copy_job_task_name = CopyJobTaskName(copy_job_request).create()

        # then
        self.assertIsNone(copy_job_task_name)
Beispiel #13
0
    def create_backup(self, source_table_entity, bq_table_metadata):
        logging.info(
            'Scheduling copy job for backup, request correlation id:'
            ' %s', request_correlation_id.get())

        target_project_id = configuration.backup_project_id
        target_dataset_id = DatasetIdCreator.create(
            self.now, bq_table_metadata.get_location(),
            source_table_entity.project_id)
        target_table_id = self.__create_table_id(source_table_entity)

        source_table_id_with_partition_id = BigQueryTableMetadata\
            .get_table_id_with_partition_id(source_table_entity.table_id, source_table_entity.partition_id)

        source_bq_table = BigQueryTable(source_table_entity.project_id,
                                        source_table_entity.dataset_id,
                                        source_table_id_with_partition_id)
        destination_bq_table = BigQueryTable(target_project_id,
                                             target_dataset_id,
                                             target_table_id)

        self.__copy_table_async(source_bq_table, destination_bq_table)
Beispiel #14
0
    def test_copy_job_creation_throws_error_on_unknown_queue(self, _):
        # when
        with self.assertRaises(UnknownQueueError) as error:
            TaskCreator.create_copy_job(
                copy_job_request=CopyJobRequest(
                    task_name_suffix=None,
                    copy_job_type_id="unknown-copying",
                    source_big_query_table=BigQueryTable('source_project',
                                                         'source_dataset',
                                                         'source_table'),
                    target_big_query_table=BigQueryTable('target_project',
                                                         'target_dataset',
                                                         'target_table'),
                    create_disposition="CREATE_IF_NEEDED",
                    write_disposition="WRITE_EMPTY"
                )
            )

        self.assertEqual(
            error.exception.message, "There is no queue "
                                     "'unknown-copying-copy-job'. Please add "
                                     "it to your queue.yaml definition.")
    def test_creating_task_name(self):
        # given
        copy_job_request = CopyJobRequest(
            task_name_suffix='task_name_suffix',
            copy_job_type_id="unknown-copying",
            source_big_query_table=BigQueryTable('source_project',
                                                 'source_dataset',
                                                 'source_table'),
            target_big_query_table=BigQueryTable('target_project',
                                                 'target_dataset',
                                                 'target_table'),
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY",
            retry_count=0)

        # when
        copy_job_task_name = CopyJobTaskName(copy_job_request).create()

        # then
        self.assertEqual(
            copy_job_task_name,
            '2017-12-06_source_project_source_dataset_source_table_0_task_name_suffix'
        )
    def test_should_create_datastore_backup_entity(self, _create_http, _):
        # given
        _create_http.return_value = HttpMockSequence([
            ({
                'status': '200'
            }, content('tests/json_samples/bigquery_v2_test_schema.json')),
            ({
                'status': '200'
            },
             content('tests/json_samples/table_get/'
                     'bigquery_partitioned_table_get.json'))
        ])

        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)
        copy_job_result = CopyJobResult(json.loads(payload).get('jobJson'))

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)
        backup = table_entity.last_backup

        # then
        self.assertEqual(response.status_int, 200)
        self.assertEqual(backup.dataset_id, "target_dataset_id")
        self.assertEqual(backup.table_id, "target_table_id")
        self.assertTrue(isinstance(backup.created, datetime))
        self.assertEqual(backup.created, copy_job_result.end_time)

        self.assertTrue(isinstance(backup.last_modified, datetime))
        self.assertEqual(backup.last_modified, copy_job_result.start_time)
Beispiel #17
0
    def create_table(self, projectId, datasetId, body):
        table = BigQueryTable(projectId, datasetId,
                              body.get("tableReference").get("tableId"))

        logging.info("Creating table %s", table)
        logging.info("BODY: %s", json.dumps(body))

        try:
            self.service.tables().insert(projectId=projectId,
                                         datasetId=datasetId,
                                         body=body).execute()
        except HttpError as error:
            if error.resp.status == 409:
                logging.info('Table already exists %s', table)
            else:
                raise
    def test_should_not_create_backups_entity_if_backup_table_doesnt_exist(
            self, _create_http, error_reporting, _):
        # given
        _create_http.return_value = HttpMockSequence([
            ({
                'status': '200'
            }, content('tests/json_samples/bigquery_v2_test_schema.json')),
            (
                {
                    'status': '404'
                },  # Table not found
                content('tests/json_samples/table_get/'
                        'bigquery_partitioned_table_get.json'))
        ])

        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)
        backup = table_entity.last_backup

        # then
        self.assertEqual(response.status_int, 200)
        self.assertIsNone(backup)
        error_reporting.assert_called_once()
Beispiel #19
0
    def get_table(self, project_id, dataset_id, table_id, log_table=True):
        logging.info("getting table %s",
                     BigQueryTable(project_id, dataset_id, table_id))
        try:
            table = self.service.tables().get(
                projectId=project_id, datasetId=dataset_id,
                tableId=table_id).execute(num_retries=3)

            if log_table and table:
                self.__log_table(table)

            return table

        except HttpError as ex:
            if ex.resp.status == 404:
                logging.info("Table '%s' Not Found",
                             TableReference(project_id, dataset_id, table_id))
                return None
            raise ex
Beispiel #20
0
 def target_bq_table(self):
     return BigQueryTable(self.target_project_id, self.target_dataset_id,
                          self.target_table_id)
Beispiel #21
0
 def create_big_query_table(self):
     return BigQueryTable(self.project_id,
                          self.dataset_id,
                          self.get_table_id_with_partition_id())
Beispiel #22
0
class TestCopyJobService(unittest.TestCase):
    def setUp(self):
        self.testbed = testbed.Testbed()
        self.testbed.activate()
        ndb.get_context().clear_cache()
        patch('googleapiclient.discovery.build').start()
        patch(
            'oauth2client.client.GoogleCredentials.get_application_default') \
            .start()
        self._create_http = patch.object(BigQuery, '_create_http').start()

        self.example_source_bq_table = BigQueryTable('source_project_id_1',
                                                     'source_dataset_id_1',
                                                     'source_table_id_1')
        self.example_target_bq_table = BigQueryTable('target_project_id_1',
                                                     'target_dataset_id_1',
                                                     'target_table_id_1')

    def tearDown(self):
        patch.stopall()
        self.testbed.deactivate()

    @patch.object(BigQuery,
                  'insert_job',
                  return_value=BigQueryJobReference(project_id='test_project',
                                                    job_id='job123',
                                                    location='EU'))
    @patch.object(TaskCreator, 'create_copy_job_result_check')
    def test_that_post_copy_action_request_is_passed(
            self, create_copy_job_result_check, _):
        # given
        post_copy_action_request = \
            PostCopyActionRequest(url='/my/url', data={'key1': 'value1'})

        # when
        CopyJobService().run_copy_job_request(
            CopyJobRequest(task_name_suffix='task_name_suffix',
                           copy_job_type_id='test-process',
                           source_big_query_table=self.example_source_bq_table,
                           target_big_query_table=self.example_target_bq_table,
                           create_disposition="CREATE_IF_NEEDED",
                           write_disposition="WRITE_EMPTY",
                           retry_count=0,
                           post_copy_action_request=post_copy_action_request))

        # then
        create_copy_job_result_check.assert_called_once_with(
            ResultCheckRequest(
                task_name_suffix='task_name_suffix',
                copy_job_type_id='test-process',
                job_reference=BigQueryJobReference(project_id='test_project',
                                                   job_id='job123',
                                                   location='EU'),
                retry_count=0,
                post_copy_action_request=post_copy_action_request))

    @patch.object(BigQuery,
                  'insert_job',
                  return_value=BigQueryJobReference(project_id='test_project',
                                                    job_id='job123',
                                                    location='EU'))
    @patch.object(TaskCreator, 'create_copy_job_result_check')
    def test_that_create_and_write_disposition_are_passed_to_result_check(
            self, create_copy_job_result_check, _):
        # given
        create_disposition = "SOME_CREATE_DISPOSITION"
        write_disposition = "SOME_WRITE_DISPOSITION"

        # when
        CopyJobService().run_copy_job_request(
            CopyJobRequest(task_name_suffix='task_name_suffix',
                           copy_job_type_id='test-process',
                           source_big_query_table=self.example_source_bq_table,
                           target_big_query_table=self.example_target_bq_table,
                           create_disposition=create_disposition,
                           write_disposition=write_disposition,
                           retry_count=0,
                           post_copy_action_request=None))

        # then
        create_copy_job_result_check.assert_called_once_with(
            ResultCheckRequest(task_name_suffix='task_name_suffix',
                               copy_job_type_id='test-process',
                               job_reference=BigQueryJobReference(
                                   project_id='test_project',
                                   job_id='job123',
                                   location='EU'),
                               retry_count=0,
                               post_copy_action_request=None))

    @patch.object(BigQuery, 'insert_job')
    @patch('time.sleep', side_effect=lambda _: None)
    def test_that_copy_table_should_throw_error_after_exception_not_being_http_error_thrown_on_copy_job_creation(
            self, _, insert_job):
        # given
        error_message = 'test exception'
        insert_job.side_effect = Exception(error_message)
        request = CopyJobRequest(
            task_name_suffix=None,
            copy_job_type_id=None,
            source_big_query_table=self.example_source_bq_table,
            target_big_query_table=self.example_target_bq_table,
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY")

        # when
        with self.assertRaises(Exception) as context:
            CopyJobService().run_copy_job_request(request)

        # then
        self.assertTrue(error_message in context.exception)

    @patch.object(BigQuery, 'insert_job')
    @patch('time.sleep', side_effect=lambda _: None)
    def test_that_copy_table_should_throw_unhandled_errors(
            self, _, insert_job):
        # given
        exception = HttpError(Mock(status=500), 'internal error')
        exception._get_reason = Mock(return_value='internal error')

        insert_job.side_effect = exception
        request = CopyJobRequest(
            task_name_suffix=None,
            copy_job_type_id=None,
            source_big_query_table=self.example_source_bq_table,
            target_big_query_table=self.example_target_bq_table,
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY")

        # when
        with self.assertRaises(HttpError) as context:
            CopyJobService().run_copy_job_request(request)

        # then
        self.assertEqual(context.exception, exception)

    @patch.object(BigQuery, 'insert_job')
    @patch.object(TaskCreator, 'create_post_copy_action')
    def test_that_copy_table_should_create_correct_post_copy_action_if_404_http_error_thrown_on_copy_job_creation(
            self, create_post_copy_action, insert_job):
        # given
        error = HttpError(Mock(status=404), 'not found')
        error._get_reason = Mock(return_value='not found')

        insert_job.side_effect = error
        post_copy_action_request = PostCopyActionRequest(
            url='/my/url', data={'key1': 'value1'})
        request = CopyJobRequest(
            task_name_suffix='task_name_suffix',
            copy_job_type_id='test-process',
            source_big_query_table=self.example_source_bq_table,
            target_big_query_table=self.example_target_bq_table,
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY",
            retry_count=0,
            post_copy_action_request=post_copy_action_request)

        # when
        CopyJobService().run_copy_job_request(request)

        # then
        create_post_copy_action.assert_called_once_with(
            copy_job_type_id='test-process',
            post_copy_action_request=post_copy_action_request,
            job_json={
                'status': {
                    'state':
                    'DONE',
                    'errors': [{
                        'reason':
                        'Invalid',
                        'message':
                        ("404 while creating Copy Job from {} to {}".format(
                            self.example_source_bq_table,
                            self.example_target_bq_table))
                    }]
                },
                'configuration': {
                    'copy': {
                        'sourceTable': {
                            'projectId':
                            self.example_source_bq_table.get_project_id(),
                            'tableId':
                            self.example_source_bq_table.get_table_id(),
                            'datasetId':
                            self.example_source_bq_table.get_dataset_id()
                        },
                        'destinationTable': {
                            'projectId':
                            self.example_target_bq_table.get_project_id(),
                            'tableId':
                            self.example_target_bq_table.get_table_id(),
                            'datasetId':
                            self.example_target_bq_table.get_dataset_id()
                        }
                    }
                }
            })

    @patch.object(BigQuery, 'insert_job')
    @patch.object(TaskCreator, 'create_post_copy_action')
    def test_that_copy_table_should_create_correct_post_copy_action_if_access_denied_http_error_thrown_on_copy_job_creation(
            self, create_post_copy_action, insert_job):
        # given
        http_error_content = "{\"error\": " \
                             "  {\"errors\": [" \
                             "    {\"reason\": \"Access Denied\"," \
                             "     \"message\": \"Access Denied\"," \
                             "     \"location\": \"US\"" \
                             "  }]," \
                             "  \"code\": 403," \
                             "  \"message\": \"Access Denied\"}}"
        insert_job.side_effect = HttpError(Mock(status=403),
                                           http_error_content)
        post_copy_action_request = PostCopyActionRequest(
            url='/my/url', data={'key1': 'value1'})
        request = CopyJobRequest(
            task_name_suffix='task_name_suffix',
            copy_job_type_id='test-process',
            source_big_query_table=self.example_source_bq_table,
            target_big_query_table=self.example_target_bq_table,
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY",
            retry_count=0,
            post_copy_action_request=post_copy_action_request)

        # when
        CopyJobService().run_copy_job_request(request)

        # then
        create_post_copy_action.assert_called_once_with(
            copy_job_type_id='test-process',
            post_copy_action_request=post_copy_action_request,
            job_json={
                'status': {
                    'state':
                    'DONE',
                    'errors': [{
                        'reason':
                        'Invalid',
                        'message':
                        ("Access Denied while creating Copy Job from {} to {}".
                         format(self.example_source_bq_table,
                                self.example_target_bq_table))
                    }]
                },
                'configuration': {
                    'copy': {
                        'sourceTable': {
                            'projectId':
                            self.example_source_bq_table.get_project_id(),
                            'tableId':
                            self.example_source_bq_table.get_table_id(),
                            'datasetId':
                            self.example_source_bq_table.get_dataset_id()
                        },
                        'destinationTable': {
                            'projectId':
                            self.example_target_bq_table.get_project_id(),
                            'tableId':
                            self.example_target_bq_table.get_table_id(),
                            'datasetId':
                            self.example_target_bq_table.get_dataset_id()
                        }
                    }
                }
            })

    @patch.object(BigQuery, 'get_job')
    @patch.object(BigQuery, 'insert_job')
    @patch.object(TaskCreator, 'create_copy_job_result_check')
    def test_that_copy_table_will_try_to_wait_if_deadline_exceeded(
            self, create_copy_job_result_check, insert_job, get_job):
        # given
        http_error_content = "{\"error\": " \
                             "  {\"errors\": [" \
                             "    {\"reason\": \"Deadline exceeded\"," \
                             "     \"message\": \"Deadline exceeded\"," \
                             "     \"location\": \"US\"" \
                             "  }]," \
                             "  \"code\": 500," \
                             "  \"message\": \"Deadline exceeded\"}}"
        successful_job_json = {
            'status': {
                'state': 'DONE'
            },
            'jobReference': {
                'projectId': self.example_target_bq_table.get_project_id(),
                'location': 'EU',
                'jobId': 'job123',
            },
            'configuration': {
                'copy': {
                    'sourceTable': {
                        'projectId':
                        self.example_source_bq_table.get_project_id(),
                        'tableId': self.example_source_bq_table.get_table_id(),
                        'datasetId':
                        self.example_source_bq_table.get_dataset_id()
                    },
                    'destinationTable': {
                        'projectId':
                        self.example_target_bq_table.get_project_id(),
                        'tableId': self.example_target_bq_table.get_table_id(),
                        'datasetId':
                        self.example_target_bq_table.get_dataset_id()
                    }
                }
            }
        }

        insert_job.side_effect = HttpError(Mock(status=500),
                                           http_error_content)
        get_job.return_value = successful_job_json

        request = CopyJobRequest(
            task_name_suffix='task_name_suffix',
            copy_job_type_id='test-process',
            source_big_query_table=self.example_source_bq_table,
            target_big_query_table=self.example_target_bq_table,
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY",
            retry_count=0,
            post_copy_action_request=None)

        # when
        CopyJobService().run_copy_job_request(request)

        # then
        create_copy_job_result_check.assert_called_once_with(
            ResultCheckRequest(
                task_name_suffix='task_name_suffix',
                copy_job_type_id='test-process',
                job_reference=BigQueryJobReference(
                    project_id=self.example_target_bq_table.get_project_id(),
                    job_id='job123',
                    location='EU'),
                retry_count=0,
                post_copy_action_request=None))

    @patch(
        'src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata')
    @patch.object(TaskCreator, 'create_copy_job_result_check')
    @patch.object(CopyJobService,
                  '_create_random_job_id',
                  return_value='random_job_123')
    @patch.object(BigQuery,
                  'insert_job',
                  side_effect=[
                      HttpError(Mock(status=503), 'internal error'),
                      HttpError(Mock(status=409), 'job exists')
                  ])
    @patch('time.sleep', side_effect=lambda _: None)
    def test_bug_regression_job_already_exists_after_internal_error(
            self, _, insert_job, _create_random_job_id,
            create_copy_job_result_check, table_metadata):
        # given
        post_copy_action_request = \
            PostCopyActionRequest(url='/my/url', data={'key1': 'value1'})
        table_metadata._BigQueryTableMetadata__get_table_or_partition.return_value.get_location.return_value = 'EU'

        # when
        CopyJobService().run_copy_job_request(
            CopyJobRequest(task_name_suffix='task_name_suffix',
                           copy_job_type_id='test-process',
                           source_big_query_table=self.example_source_bq_table,
                           target_big_query_table=self.example_target_bq_table,
                           create_disposition="CREATE_IF_NEEDED",
                           write_disposition="WRITE_EMPTY",
                           retry_count=0,
                           post_copy_action_request=post_copy_action_request))

        # then
        self.assertEqual(insert_job.call_count, 2)
        create_copy_job_result_check.assert_called_once_with(
            ResultCheckRequest(
                task_name_suffix='task_name_suffix',
                copy_job_type_id='test-process',
                job_reference=BigQueryJobReference(
                    project_id='target_project_id_1',
                    job_id='random_job_123',
                    location='EU'),
                retry_count=0,
                post_copy_action_request=post_copy_action_request))
 def create_example_target_bq_table(self):
     return BigQueryTable("target_project_id_1", "target_dataset_id_1",
                          "target_table_id_1")
 def create_example_source_bq_table(self):
     return BigQueryTable("source_project_id_1", "source_dataset_id_1",
                          "source_table_id_1")
Beispiel #25
0
 def source_bq_table(self):
     return BigQueryTable(self.source_project_id, self.source_dataset_id,
                          self.source_table_id)