コード例 #1
0
    def test_copy_job_creation(self, _):
        # given
        copy_job_request = CopyJobRequest(
            task_name_suffix='task-name-suffix',
            copy_job_type_id="backups",
            source_big_query_table=BigQueryTable('source_project',
                                                 'source_dataset',
                                                 'source_table'),
            target_big_query_table=BigQueryTable('target_project',
                                                 'target_dataset',
                                                 'target_table'),
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY",
            post_copy_action_request=PostCopyActionRequest(url="/my/url", data={
                "key1": "value1"})
        )
        # when
        TaskCreator.create_copy_job(
            copy_job_request=copy_job_request
        )

        # then
        expected_queue_name = 'backups-copy-job'
        executed_tasks = self.taskqueue_stub.get_filtered_tasks(
            queue_names=expected_queue_name
        )

        self.assertEqual(len(executed_tasks), 1,
                         "Should create one task in queue")
        executed_task = executed_tasks[0]
        self.assertEqual(json.dumps(copy_job_request, cls=RequestEncoder),
                         executed_task.extract_params()['copyJobRequest'])
        self.assertEqual('POST', executed_task.method)
        self.assertEqual('task_name', executed_task.name)
        self.assertEqual(executed_task.url, '/tasks/copy_job_async/copy_job')
コード例 #2
0
    def setUp(self):
        self.testbed = testbed.Testbed()
        self.testbed.activate()
        ndb.get_context().clear_cache()
        patch('googleapiclient.discovery.build').start()
        patch(
            'oauth2client.client.GoogleCredentials.get_application_default') \
            .start()
        self._create_http = patch.object(BigQuery, '_create_http').start()

        self.example_source_bq_table = BigQueryTable('source_project_id_1',
                                                     'source_dataset_id_1',
                                                     'source_table_id_1')
        self.example_target_bq_table = BigQueryTable('target_project_id_1',
                                                     'target_dataset_id_1',
                                                     'target_table_id_1')
コード例 #3
0
    def test_should_disable_partition_expiration_if_backup_table_has_it(
            self, disable_partition_expiration, _, _1, _2, _3, _4, _5):
        # given
        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)

        # then
        self.assertEqual(response.status_int, 200)
        disable_partition_expiration.assert_called_once()
コード例 #4
0
 def __init__(self, project_id, dataset_id, table_id):
     self.big_query_table = BigQueryTable(project_id, dataset_id, table_id)
     self.service = googleapiclient.discovery.build(
         'bigquery',
         'v2',
         credentials=self._create_credentials(),
         http=self._create_http()
     )
コード例 #5
0
 def test_source_table_reference(self):
     # given
     copy_job_result = CopyJobResult(JobResultExample.DONE)
     # when
     source_bq_table = copy_job_result.source_bq_table
     # then
     self.assertEqual(
         source_bq_table,
         BigQueryTable(project_id="source_project_id",
                       dataset_id="source_dataset_id",
                       table_id="source_table_id$123"))
コード例 #6
0
 def test_target_table_reference(self):
     # given
     copy_job_result = CopyJobResult(JobResultExample.DONE)
     # when
     target_bq_table = copy_job_result.target_bq_table
     # then
     self.assertEqual(
         target_bq_table,
         BigQueryTable(project_id="target_project_id",
                       dataset_id="target_dataset_id",
                       table_id="target_table_id"))
コード例 #7
0
    def test_that_async_copy_job_is_called_with_correct_parameters_when_creating_new_backup(  # pylint: disable=C0301
            self, async_copy):

        # given
        table_to_backup = Table(project_id="src_project",
                                dataset_id="src_dataset",
                                table_id="src_table",
                                partition_id="20180416")
        source_bq_table = BigQueryTable("src_project", "src_dataset",
                                        "src_table$20180416")
        destination_bq_table = BigQueryTable(
            "bkup_storage_project", "2018_16_US_src_project",
            "20180416_000000_src_project_src_dataset_src_table_partition_20180416"
        )  # pylint: disable=C0301
        under_test = BackupCreator(datetime.datetime.utcnow())

        # when
        under_test.create_backup(table_to_backup, BigQueryTableMetadata({}))

        # then
        async_copy.assert_called_with(source_bq_table, destination_bq_table)
コード例 #8
0
    def test_happy_path(self, copy_table_mock):
        # given
        source_big_query_table = BigQueryTable("source_project_id",
                                               "source_dataset_id",
                                               "source_table_id")
        target_big_query_table = BigQueryTable("target_project_id",
                                               "target_dataset_id",
                                               "target_table_id")

        post_copy_action_request = PostCopyActionRequest(
            url="/my/url", data={"key1": "value1"})

        url = '/tasks/copy_job_async/copy_job'
        # when
        self.under_test.post(
            url=url,
            params={
                "copyJobRequest":
                json.dumps(CopyJobRequest(
                    task_name_suffix=None,
                    copy_job_type_id=None,
                    source_big_query_table=source_big_query_table,
                    target_big_query_table=target_big_query_table,
                    create_disposition="CREATE_IF_NEEDED",
                    write_disposition="WRITE_EMPTY",
                    retry_count=0,
                    post_copy_action_request=post_copy_action_request),
                           cls=RequestEncoder)
            })

        # then
        copy_table_mock.assert_called_with(
            CopyJobRequest(task_name_suffix=None,
                           copy_job_type_id=None,
                           source_big_query_table=source_big_query_table,
                           target_big_query_table=target_big_query_table,
                           create_disposition="CREATE_IF_NEEDED",
                           write_disposition="WRITE_EMPTY",
                           retry_count=0,
                           post_copy_action_request=post_copy_action_request))
コード例 #9
0
    def create_backup(self, source_table_entity, bq_table_metadata):
        logging.info(
            'Scheduling copy job for backup, request correlation id:'
            ' %s', request_correlation_id.get())

        target_project_id = configuration.backup_project_id
        target_dataset_id = DatasetIdCreator.create(
            self.now, bq_table_metadata.get_location(),
            source_table_entity.project_id)
        target_table_id = self.__create_table_id(source_table_entity)

        source_table_id_with_partition_id = BigQueryTableMetadata\
            .get_table_id_with_partition_id(source_table_entity.table_id, source_table_entity.partition_id)

        source_bq_table = BigQueryTable(source_table_entity.project_id,
                                        source_table_entity.dataset_id,
                                        source_table_id_with_partition_id)
        destination_bq_table = BigQueryTable(target_project_id,
                                             target_dataset_id,
                                             target_table_id)

        self.__copy_table_async(source_bq_table, destination_bq_table)
コード例 #10
0
    def test_return_none_if_calculated_name_is_too_long(self):
        # given
        task_name_suffix = "x" * 501

        copy_job_request = CopyJobRequest(
            task_name_suffix=task_name_suffix,
            copy_job_type_id="unknown-copying",
            source_big_query_table=BigQueryTable('source_project',
                                                 'source_dataset',
                                                 'source_table'),
            target_big_query_table=BigQueryTable('target_project',
                                                 'target_dataset',
                                                 'target_table'),
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY",
            retry_count=0)

        # when
        copy_job_task_name = CopyJobTaskName(copy_job_request).create()

        # then
        self.assertIsNone(copy_job_task_name)
コード例 #11
0
    def test_copy_job_creation_throws_error_on_unknown_queue(self, _):
        # when
        with self.assertRaises(UnknownQueueError) as error:
            TaskCreator.create_copy_job(
                copy_job_request=CopyJobRequest(
                    task_name_suffix=None,
                    copy_job_type_id="unknown-copying",
                    source_big_query_table=BigQueryTable('source_project',
                                                         'source_dataset',
                                                         'source_table'),
                    target_big_query_table=BigQueryTable('target_project',
                                                         'target_dataset',
                                                         'target_table'),
                    create_disposition="CREATE_IF_NEEDED",
                    write_disposition="WRITE_EMPTY"
                )
            )

        self.assertEqual(
            error.exception.message, "There is no queue "
                                     "'unknown-copying-copy-job'. Please add "
                                     "it to your queue.yaml definition.")
コード例 #12
0
    def test_creating_task_name(self):
        # given
        copy_job_request = CopyJobRequest(
            task_name_suffix='task_name_suffix',
            copy_job_type_id="unknown-copying",
            source_big_query_table=BigQueryTable('source_project',
                                                 'source_dataset',
                                                 'source_table'),
            target_big_query_table=BigQueryTable('target_project',
                                                 'target_dataset',
                                                 'target_table'),
            create_disposition="CREATE_IF_NEEDED",
            write_disposition="WRITE_EMPTY",
            retry_count=0)

        # when
        copy_job_task_name = CopyJobTaskName(copy_job_request).create()

        # then
        self.assertEqual(
            copy_job_task_name,
            '2017-12-06_source_project_source_dataset_source_table_0_task_name_suffix'
        )
コード例 #13
0
    def test_should_create_datastore_backup_entity(self, _create_http, _):
        # given
        _create_http.return_value = HttpMockSequence([
            ({
                'status': '200'
            }, content('tests/json_samples/bigquery_v2_test_schema.json')),
            ({
                'status': '200'
            },
             content('tests/json_samples/table_get/'
                     'bigquery_partitioned_table_get.json'))
        ])

        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)
        copy_job_result = CopyJobResult(json.loads(payload).get('jobJson'))

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)
        backup = table_entity.last_backup

        # then
        self.assertEqual(response.status_int, 200)
        self.assertEqual(backup.dataset_id, "target_dataset_id")
        self.assertEqual(backup.table_id, "target_table_id")
        self.assertTrue(isinstance(backup.created, datetime))
        self.assertEqual(backup.created, copy_job_result.end_time)

        self.assertTrue(isinstance(backup.last_modified, datetime))
        self.assertEqual(backup.last_modified, copy_job_result.start_time)
コード例 #14
0
    def create_table(self, projectId, datasetId, body):
        table = BigQueryTable(projectId, datasetId,
                              body.get("tableReference").get("tableId"))

        logging.info("Creating table %s", table)
        logging.info("BODY: %s", json.dumps(body))

        try:
            self.service.tables().insert(projectId=projectId,
                                         datasetId=datasetId,
                                         body=body).execute()
        except HttpError as error:
            if error.resp.status == 409:
                logging.info('Table already exists %s', table)
            else:
                raise
コード例 #15
0
    def test_should_not_create_backups_entity_if_backup_table_doesnt_exist(
            self, _create_http, error_reporting, _):
        # given
        _create_http.return_value = HttpMockSequence([
            ({
                'status': '200'
            }, content('tests/json_samples/bigquery_v2_test_schema.json')),
            (
                {
                    'status': '404'
                },  # Table not found
                content('tests/json_samples/table_get/'
                        'bigquery_partitioned_table_get.json'))
        ])

        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)
        backup = table_entity.last_backup

        # then
        self.assertEqual(response.status_int, 200)
        self.assertIsNone(backup)
        error_reporting.assert_called_once()
コード例 #16
0
    def get_table(self, project_id, dataset_id, table_id, log_table=True):
        logging.info("getting table %s",
                     BigQueryTable(project_id, dataset_id, table_id))
        try:
            table = self.service.tables().get(
                projectId=project_id, datasetId=dataset_id,
                tableId=table_id).execute(num_retries=3)

            if log_table and table:
                self.__log_table(table)

            return table

        except HttpError as ex:
            if ex.resp.status == 404:
                logging.info("Table '%s' Not Found",
                             TableReference(project_id, dataset_id, table_id))
                return None
            raise ex
コード例 #17
0
 def target_bq_table(self):
     return BigQueryTable(self.target_project_id, self.target_dataset_id,
                          self.target_table_id)
コード例 #18
0
 def source_bq_table(self):
     return BigQueryTable(self.source_project_id, self.source_dataset_id,
                          self.source_table_id)
コード例 #19
0
 def create_example_target_bq_table(self):
     return BigQueryTable("target_project_id_1", "target_dataset_id_1",
                          "target_table_id_1")
コード例 #20
0
 def create_example_source_bq_table(self):
     return BigQueryTable("source_project_id_1", "source_dataset_id_1",
                          "source_table_id_1")
コード例 #21
0
 def create_big_query_table(self):
     return BigQueryTable(self.project_id,
                          self.dataset_id,
                          self.get_table_id_with_partition_id())