def exists(self, sli_table):
        table_reference = self.query_specification.to_table_reference(
            sli_table)
        table = self.big_query.get_table(project_id=table_reference.project_id,
                                         dataset_id=table_reference.dataset_id,
                                         table_id=table_reference.table_id)

        table_metadata = BigQueryTableMetadata(table)

        if not table_metadata.table_exists():
            logging.info("Table doesn't exist anymore: %s", table_reference)
            return False

        if not table_metadata.is_schema_defined():
            logging.info("Table doesn't have schema. Ignoring table: %s",
                         table_reference)
            return False

        if not table_reference.is_partition() or \
           not table_metadata.has_time_partitioning():
            logging.info("Non-partitioned table exist: %s", table_reference)
            return True

        if self.__is_partition_exists(table_reference):
            logging.info("Table partition exist: %s", table_reference)
            return True

        logging.info("Partition doesn't exist anymore: %s", table_reference)
        return False
class TestOnDemandTableBackup(unittest.TestCase):
    @patch.object(Table, "get_table", return_value=None)
    @patch(
        'src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata.get_table_by_reference',
        return_value=BigQueryTableMetadata({
            "tableReference": {
                "projectId": "test-project",
                "datasetId": "test-dataset",
                "tableId": "test-table-without-partition"
            },
            "timePartitioning": {
                "type": "DAY"
            }
        }))
    def test_should_throw_parameter_validation_exception_if_table_is_partitioned_but_partition_number_was_not_given(
            self, _1, _2):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id="")

        # when-then
        with self.assertRaises(ParameterValidationException):
            OnDemandTableBackup.start(table_reference)
 def is_empty(self, sli_table_entry):
     table_reference = LatencyQuerySpecification.to_table_reference(sli_table_entry)
     table = self.big_query.get_table(
         project_id=table_reference.project_id,
         dataset_id=table_reference.dataset_id,
         table_id=table_reference.table_id)
     return BigQueryTableMetadata(table).is_empty()
Ejemplo n.º 4
0
    def __process(self, request_body_json):
        copy_job_results = CopyJobResult(request_body_json.get('jobJson'))
        data = request_body_json.get('data')

        if copy_job_results.has_errors():
            error_message = "Copy job failed with errors: {} ." \
                            "Backup for source: {}, target: {} " \
                            "has not been done. " \
                .format(copy_job_results.error_message,
                        data["sourceBqTable"], data["targetBqTable"])
            ErrorReporting().report(error_message)
            return

        backup_table_metadata = BigQueryTableMetadata.get_table_by_big_query_table(
            copy_job_results.target_bq_table)

        if backup_table_metadata.table_exists():
            self.__create_backup(backup_table_metadata, copy_job_results)
            if backup_table_metadata.has_partition_expiration():
                self.__disable_partition_expiration(
                    TableReference.from_bq_table(
                        copy_job_results.target_bq_table))
        else:
            pass
            ErrorReporting().report(
                "Backup table {0} not exist. Backup entity is not created".
                format(copy_job_results.target_bq_table))
Ejemplo n.º 5
0
    def is_recreated(self, sli_table_entry):
        table_reference = LatencyQuerySpecification.to_table_reference(
            sli_table_entry)
        table = self.big_query.get_table(project_id=table_reference.project_id,
                                         dataset_id=table_reference.dataset_id,
                                         table_id=table_reference.table_id)
        table_metadata = BigQueryTableMetadata(table)

        is_table_recreated = table_metadata.get_creation_time(
        ) > datetime.datetime.utcfromtimestamp(sli_table_entry["creationTime"])

        if is_table_recreated:
            logging.info("Table is recreated till last census snapshot")
            return True
        else:
            logging.info("Table is not recreated till last census snapshot")
            return False
Ejemplo n.º 6
0
    def start(table_reference):
        big_query_table_metadata = BigQueryTableMetadata.get_table_by_reference(
            table_reference)

        BackupProcess(
            table_reference=table_reference,
            big_query=BigQuery(),
            big_query_table_metadata=big_query_table_metadata,
            should_backup_predicate=OnDemandBackupPredicate()).start()
Ejemplo n.º 7
0
 def __source_table_exists(self, table_reference):
     try:
         return BigQueryTableMetadata.get_table_by_reference(
             table_reference).table_exists()
     except HttpError as error:
         if self.__is_getting_partition_from_non_partitioned_error(
                 table_reference, error):
             return False
         raise error
    def is_modified_since_last_census_snapshot(self, sli_table_entry):
        table_reference = QualityQuerySpecification.to_table_reference(
            sli_table_entry)
        table = self.big_query.get_table(
            project_id=table_reference.get_project_id(),
            dataset_id=table_reference.get_dataset_id(),
            table_id=table_reference.get_table_id_with_partition_id())
        table_metadata = BigQueryTableMetadata(table)

        is_table_modified = table_metadata.get_last_modified_datetime(
        ) > datetime.datetime.utcfromtimestamp(
            sli_table_entry["lastModifiedTime"])

        if is_table_modified:
            logging.info("Table was modified till last census snapshot")
        else:
            logging.info("Table wasn't modified till last census snapshot")

        return is_table_modified
    def is_not_seen_by_census(self, sli_table):
        backup_table_reference = self.query_specification.to_backup_table_reference(sli_table)
        backup_table_metadata = BigQueryTableMetadata(
            self.big_query.get_table(
                project_id=backup_table_reference.project_id,
                dataset_id=backup_table_reference.dataset_id,
                table_id=backup_table_reference.table_id)
        )

        if not backup_table_metadata.table_exists():
            logging.info("Backup table doesn't exist: %s",
                         backup_table_reference)
            return False

        if not sli_table['backupLastModifiedTime']:
            if backup_table_metadata.table_size_in_bytes() == sli_table['backupEntityNumBytes']:
                logging.info(
                    "Backup table: %s exists although Census doesn't see it yet. "
                    "Backup table have the same number of bytes as saved in datastore.",
                    backup_table_reference)
                return True

        return False
Ejemplo n.º 10
0
    def start(table_reference):
        big_query_table_metadata = BigQueryTableMetadata.get_table_by_reference(
            table_reference)

        if big_query_table_metadata.is_daily_partitioned(
        ) and not big_query_table_metadata.is_partition():
            raise ParameterValidationException(
                "Partition id is required for partitioned table in on-demand mode"
            )

        BackupProcess(
            table_reference=table_reference,
            big_query=BigQuery(),
            big_query_table_metadata=big_query_table_metadata,
            should_backup_predicate=OnDemandBackupPredicate()).start()
Ejemplo n.º 11
0
    def setUp(self):
        self.initTestBedForDatastore()

        self.table = Table(
            project_id="p1",
            dataset_id="d1",
            table_id="t1"
        )

        self.big_query_table_metadata = BigQueryTableMetadata({})
        patch('src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata.is_empty',
              return_value=False).start()
        patch('src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata.'
              'is_external_or_view_type', return_value=False).start()
        patch('src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata.'
              'is_schema_defined', return_value=True).start()
Ejemplo n.º 12
0
    def start(table_reference):
        big_query = BigQuery()

        big_query_table_metadata = BigQueryTableMetadata.get_table_by_reference(
            table_reference)

        if big_query_table_metadata.is_daily_partitioned() and \
                not big_query_table_metadata.is_partition():
            logging.info('Table (%s/%s/%s) is partitioned',
                         table_reference.get_project_id(),
                         table_reference.get_dataset_id(),
                         table_reference.get_table_id())
            TablePartitionsBackupScheduler(table_reference, big_query).start()
        else:
            BackupProcess(
                table_reference=table_reference,
                big_query=big_query,
                big_query_table_metadata=big_query_table_metadata).start()
Ejemplo n.º 13
0
    def start(table_reference):
        big_query = BigQuery()

        big_query_table_metadata = BigQueryTableMetadata.get_table_by_reference(
            table_reference)

        if big_query_table_metadata.is_daily_partitioned() and \
            not big_query_table_metadata.is_partition():
            logging.info(u'Table %s:%s.%s is partitioned',
                         table_reference.get_project_id(),
                         table_reference.get_dataset_id(),
                         table_reference.get_table_id())
            TableBackup._schedule_partitioned_table_backup_scheduler_task(
                table_reference)

        else:
            BackupProcess(
                table_reference=table_reference,
                big_query=big_query,
                big_query_table_metadata=big_query_table_metadata).start()
Ejemplo n.º 14
0
    def test_that_async_copy_job_is_called_with_correct_parameters_when_creating_new_backup(  # pylint: disable=C0301
            self, async_copy):

        # given
        table_to_backup = Table(project_id="src_project",
                                dataset_id="src_dataset",
                                table_id="src_table",
                                partition_id="20180416")
        source_bq_table = BigQueryTable("src_project", "src_dataset",
                                        "src_table$20180416")
        destination_bq_table = BigQueryTable(
            "bkup_storage_project", "2018_16_US_src_project",
            "20180416_000000_src_project_src_dataset_src_table_partition_20180416"
        )  # pylint: disable=C0301
        under_test = BackupCreator(datetime.datetime.utcnow())

        # when
        under_test.create_backup(table_to_backup, BigQueryTableMetadata({}))

        # then
        async_copy.assert_called_with(source_bq_table, destination_bq_table)
Ejemplo n.º 15
0
class TestTableBackup(unittest.TestCase):
    def setUp(self):
        self.testbed = testbed.Testbed()
        self.testbed.activate()
        self.testbed.init_datastore_v3_stub()
        self.testbed.init_memcache_stub()
        self.testbed.init_app_identity_stub()
        self.testbed.init_taskqueue_stub()
        ndb.get_context().clear_cache()

    def tearDown(self):
        self.testbed.deactivate()

    @patch('src.commons.big_query.big_query.BigQuery.__init__',
           Mock(return_value=None))
    @patch.object(BackupProcess, 'start')
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(BigQueryTableMetadata,
                  'is_daily_partitioned',
                  return_value=True)
    @patch.object(BigQueryTableMetadata, 'is_partition', return_value=True)
    @patch.object(BigQueryTableMetadata, 'is_empty', return_value=False)
    def test_that_backup_are_scheduled_for_non_empty_single_partition(
            self, _, _1, _2, _3, backup_start):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id="20170303")

        # when
        TableBackup.start(table_reference)

        # then
        backup_start.assert_called_once()

    @patch('src.commons.big_query.big_query.BigQuery.__init__',
           Mock(return_value=None))
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(BigQueryTableMetadata,
                  'is_daily_partitioned',
                  return_value=False)
    @patch.object(BackupProcess, 'start')
    def test_that_table_backup_is_scheduled_for_not_partitioned_table(
            self, backup_start, _, _1):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id=None)
        # when
        TableBackup.start(table_reference)

        # then
        backup_start.assert_called_once()

    @patch('src.commons.big_query.big_query.BigQuery.__init__',
           Mock(return_value=None))
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(BigQueryTableMetadata,
                  'is_daily_partitioned',
                  return_value=True)
    @patch.object(BigQueryTableMetadata, 'is_partition', return_value=False)
    @patch.object(BigQueryTableMetadata, 'is_empty', return_value=False)
    @patch.object(TablePartitionsBackupScheduler, 'start')
    def test_that_backup_for_partitions_is_scheduled_for_partitioned_table(
            self, table_partitions_backup_scheduler, _, _1, _2, _3):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id=None)

        # when
        TableBackup.start(table_reference)

        # then
        table_partitions_backup_scheduler.assert_called_once()

    @patch('src.commons.big_query.big_query.BigQuery.__init__',
           Mock(return_value=None))
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(BigQueryTableMetadata,
                  'is_daily_partitioned',
                  return_value=True)
    @patch.object(BigQueryTableMetadata, 'is_partition', return_value=False)
    @patch.object(BigQueryTableMetadata, 'is_empty', return_value=True)
    @patch.object(TablePartitionsBackupScheduler, 'start')
    def test_that_backup_for_partitions_is_scheduled_for_empty_partitioned_table(
            self, table_partitions_backup_scheduler, _, _1, _2, _3):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id=None)

        # when
        TableBackup.start(table_reference)

        # then
        table_partitions_backup_scheduler.assert_called_once()
class TestRestoreWorkspaceCreator(TestCase):
    def setUp(self):
        self.testbed = testbed.Testbed()
        self.testbed.activate()
        self.testbed.init_app_identity_stub()
        self.testbed.init_memcache_stub()
        ndb.get_context().clear_cache()
        self.BQ = patch(
            'src.restore.async_batch_restore_service.BigQuery').start()
        patch.object(BigQueryTableMetadata,
                     "get_table_by_reference_cached").start()

    def tearDown(self):
        patch.stopall()
        self.testbed.deactivate()

    def test_should_create_dataset_if_not_exist(self):
        # given
        source, target = self.__create_partitioned_table_references()
        self.BQ.get_dataset_cached.return_value = None

        # when
        RestoreWorkspaceCreator(self.BQ).create_workspace(source, target)

        # then
        self.BQ.create_dataset.assert_called_once()

    def test_should_create_dataset_based_on_target_table_reference(self):
        # given
        source, target = self.__create_partitioned_table_references()
        self.BQ.get_dataset_cached.return_value = None

        # when
        RestoreWorkspaceCreator(self.BQ).create_workspace(source, target)

        # then
        self.BQ.create_dataset.assert_called_with(project_id=target.project_id,
                                                  dataset_id=target.dataset_id,
                                                  location=ANY)

    def test_should_create_dataset_with_location_of_source_table(self):
        # given
        source, target = self.__create_partitioned_table_references()
        self.BQ.get_dataset_cached.return_value = None
        self.BQ.get_dataset_location.return_value = 'UK'

        # when
        RestoreWorkspaceCreator(self.BQ).create_workspace(source, target)

        # then
        self.BQ.get_dataset_location.assert_called_with(
            project_id=source.project_id, dataset_id=source.dataset_id)
        self.BQ.create_dataset.assert_called_with(project_id=ANY,
                                                  dataset_id=ANY,
                                                  location='UK')

    def test_should_not_create_dataset_if_already_exist(self):
        # given
        enforcer = RestoreWorkspaceCreator(self.BQ)
        source, target = self.__create_partitioned_table_references()
        self.BQ.get_dataset_cached.return_value = "<DATASET METADATA RETURNED>"

        # when
        enforcer.create_workspace(source, target)

        # then
        self.BQ.create_dataset.assert_not_called()

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=False)
    @patch.object(BigQueryTableMetadata, 'create_the_same_empty_table')
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference_cached',
                  return_value=BigQueryTableMetadata({}))
    def test_should_create_table_if_is_partitioned_and_not_exist(
            self, _, create_the_same_empty_table, _1):
        # given
        enforcer = RestoreWorkspaceCreator(self.BQ)
        source, target = self.__create_partitioned_table_references()

        # when
        enforcer.create_workspace(source, target)

        # then
        create_the_same_empty_table.assert_called_with(target)

    @patch.object(BigQueryTableMetadata, 'create_the_same_empty_table')
    @patch.object(BigQueryTableMetadata, 'get_table_by_reference_cached')
    def test_should_not_create_table_if_is_not_partitioned(
            self, _, create_the_same_empty_table):
        # given
        enforcer = RestoreWorkspaceCreator(self.BQ)
        source, target = self.__create_non_partitioned_table_references()

        # when
        enforcer.create_workspace(source, target)

        # then
        create_the_same_empty_table.assert_not_called()

    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference_cached',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    def test_should_not_create_table_if_table_already_exist(self, _, _1):
        # given
        enforcer = RestoreWorkspaceCreator(self.BQ)
        source, target = self.__create_partitioned_table_references()

        # when
        enforcer.create_workspace(source, target)

        # then
        self.BQ.create_empty_partitioned_table.assert_not_called()

    @patch.object(BigQueryTableMetadata, 'get_table_by_reference_cached')
    def test_create_workspace_should_take_care_about_dataset_and_table(
            self, _):
        # given
        enforcer = RestoreWorkspaceCreator(self.BQ)
        source, target = self.__create_partitioned_table_references()
        self.BQ.get_dataset_cached.return_value = "<DATASET METADATA RETURNED>"

        # when
        enforcer.create_workspace(source, target)

        # then
        self.BQ.get_dataset_cached.assert_called_once()
        BigQueryTableMetadata.get_table_by_reference_cached.assert_called_once(
        )

    def __create_partitioned_table_references(self):
        source_table_reference = TableReference(SOURCE_PROJECT_ID,
                                                SOURCE_DATASET_ID,
                                                SOURCE_TABLE_ID,
                                                SOURCE_PARTITION_ID)
        target_table_reference = TableReference(TARGET_PROJECT_ID,
                                                TARGET_DATASET_ID,
                                                TARGET_TABLE_ID,
                                                TARGET_PARTITION_ID)
        return source_table_reference, target_table_reference

    def __create_non_partitioned_table_references(self):
        source_table_reference = TableReference(SOURCE_PROJECT_ID,
                                                SOURCE_DATASET_ID,
                                                SOURCE_TABLE_ID, None)
        target_table_reference = TableReference(TARGET_PROJECT_ID,
                                                TARGET_DATASET_ID,
                                                TARGET_TABLE_ID, None)
        return source_table_reference, target_table_reference
Ejemplo n.º 17
0
 def __create_empty_partitioned_table_if_not_exists(self, source_table_reference, target_table_reference):
     if target_table_reference.is_partition():
         target_table_metadata = BigQueryTableMetadata.get_table_by_reference_cached(target_table_reference)
         if not target_table_metadata.table_exists():
             source_table_metadata = BigQueryTableMetadata.get_table_by_reference_cached(source_table_reference)
             source_table_metadata.create_the_same_empty_table(target_table_reference)
Ejemplo n.º 18
0
class TestRetentionPolicy(unittest.TestCase):
    def setUp(self):
        patch('googleapiclient.discovery.build').start()
        patch('oauth2client.client.GoogleCredentials.get_application_default') \
            .start()
        self.under_test = RetentionPolicy()

    def tearDown(self):
        patch.stopall()

    @freeze_time("2017-08-20")
    def test_should_not_delete_if_single_young_backup(self):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        single_young_backup = create_backup(datetime(2017, 8, 1))
        all_backups = list([single_young_backup])

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(all_backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([], eligible_for_deletion)

    @freeze_time("2017-08-20")
    def test_should_not_delete_if_less_than_10_young_backups(self):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        young_backups = create_backup_daily_sequence(3,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(young_backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([], eligible_for_deletion)

    @freeze_time("2017-08-20")
    def test_should_not_delete_if_10_young_backups(self):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        young_backups = create_backup_daily_sequence(10,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(young_backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([], eligible_for_deletion)

    @freeze_time("2017-08-20")
    def test_should_delete_oldest_if_11_young_backups(self):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        young_backups = create_backup_daily_sequence(11,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(young_backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([young_backups[10]], eligible_for_deletion)

    @freeze_time("2017-08-20")
    def test_should_delete_same_day_duplicates_backups(self):
        #given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')

        first_5_backups = create_backup_daily_sequence(5,
                                                       start_date=datetime(
                                                           2017, 6, 1, 12))
        second_5_backups = create_backup_daily_sequence(5,
                                                        start_date=datetime(
                                                            2017, 6, 6, 12))
        first_5_backups_duplicated = create_backup_daily_sequence(
            5, start_date=datetime(2017, 6, 1, 14))

        backups = list(first_5_backups + second_5_backups +
                       first_5_backups_duplicated)
        backups_expected_for_deletion = list(first_5_backups)

        #when
        eligible_for_deletion = \
            self.under_test.get_backups_eligible_for_deletion(
                backups=list(backups),
                table_reference=reference)
        #then
        self.sortAndAssertListEqual(backups_expected_for_deletion,
                                    eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_delete_many_today_duplicates_and_11th_young_version_after_deduplication_and_retain_old_backup(
            self, _1, _2, _3):
        #given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')

        young_backups = create_backup_daily_sequence(10,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        newest_duplicated_backup = create_backup(datetime(2017, 8, 19, 10))

        today_duplicated_backups = [
            newest_duplicated_backup,
            create_backup(datetime(2017, 8, 19, 9)),
            create_backup(datetime(2017, 8, 19, 8)),
            create_backup(datetime(2017, 8, 19, 7))
        ]

        old_backup = create_backup(datetime(2016, 8, 19, 10))

        backups = list(young_backups + today_duplicated_backups + [old_backup])
        backups_expected_for_deletion = list([young_backups[9]] +
                                             today_duplicated_backups[1:])

        #when
        eligible_for_deletion = \
            self.under_test.get_backups_eligible_for_deletion(
                backups=list(backups),
                table_reference=reference)

        #then
        self.sortAndAssertListEqual(backups_expected_for_deletion,
                                    eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_not_delete_if_single_old_backup(self, _1, _2, _3):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')

        single_old_backup = create_backup(datetime(2016, 8, 1))

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list([single_old_backup]),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([], eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_delete_older_backup_if_two_old_backups(self, _1, _2, _3):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        older_old_backup = create_backup(datetime(2016, 7, 31))
        old_backup = create_backup(datetime(2016, 8, 1))
        all_backups = list([older_old_backup, old_backup])

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=all_backups,
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([older_old_backup], eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_not_delete_if_single_young_backup_and_single_old_backup(
            self, _1, _2, _3):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        young_backup = create_backup(datetime(2017, 8, 1))
        old_backup = create_backup(datetime(2016, 8, 1))
        all_backups = [young_backup, old_backup]

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(all_backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([], eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_not_delete_if_less_than_10_young_backup_and_single_old_backup(
            self, _1, _2, _3):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        young_backups = create_backup_daily_sequence(3,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        old_backup = create_backup(datetime(2016, 8, 1))
        all_backups = list(young_backups + [old_backup])

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(all_backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([], eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_not_delete_if_10_young_backup_and_single_old_backup(
            self, _1, _2, _3):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        young_backups = create_backup_daily_sequence(10,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        old_backup = create_backup(datetime(2016, 8, 1))
        all_backups = list(young_backups + [old_backup])

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(all_backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([], eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_delete_oldest_young_backup_if_11_young_backup_and_single_old_backup(
            self, _1, _2, _3):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        young_backups = create_backup_daily_sequence(11,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        old_backup = create_backup(datetime(2016, 8, 1))
        all_backups = list(young_backups + [old_backup])

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(all_backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([young_backups[10]], eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_delete_older_old_backup_if_two_old_and_single_young_backups(
            self, _1, _2, _3):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        older_old_backup = create_backup(datetime(2016, 7, 31))
        old_backup = create_backup(datetime(2016, 8, 1))
        young_backup = create_backup(datetime(2017, 8, 1))
        all_backups = list([older_old_backup, old_backup, young_backup])

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=all_backups,
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([older_old_backup], eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_delete_older_old_backup_if_two_old_and_less_than_10_young_backups(
            self, _1, _2, _3):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        older_old_backup = create_backup(datetime(2016, 7, 31))
        old_backup = create_backup(datetime(2016, 8, 1))
        young_backups = create_backup_daily_sequence(3,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        all_backups = list(young_backups + [older_old_backup, old_backup])

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=all_backups,
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([older_old_backup], eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_delete_older_old_backup_if_two_old_and_10_young_backups(
            self, _1, _2, _3):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        older_old_backup = create_backup(datetime(2016, 7, 31))
        old_backup = create_backup(datetime(2016, 8, 1))
        young_backups = create_backup_daily_sequence(10,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        all_backups = list(young_backups + [older_old_backup, old_backup])

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=all_backups,
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([older_old_backup], eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_delete_older_old_and_oldest_young_backup_if_two_old_and_11_young_backups(
            self, _1, _2, _3):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        older_old_backup = create_backup(datetime(2016, 7, 31))
        old_backup = create_backup(datetime(2016, 8, 1))
        young_backups = create_backup_daily_sequence(11,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        all_backups = list(young_backups + [older_old_backup, old_backup])

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=all_backups,
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual([older_old_backup, young_backups[10]],
                                    eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 19)))
    @freeze_time("2017-08-20")
    def test_should_leave_youngest_backup_from_the_same_day_when_source_data_exists(
            self, _, _1, _2):  # nopep8 pylint: disable=C0301
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        b1 = create_backup(datetime(2017, 1, 1, hour=13, minute=15))
        b2 = create_backup(datetime(2017, 1, 1, hour=16, minute=30))
        backups = [b1, b2]
        backups_expected_for_deletion = [b1]

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual(backups_expected_for_deletion,
                                    eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=False)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 1, 19)))
    @freeze_time("2017-08-20")
    def test_remove_all_backups_if_source_table_doesnt_exists_for_min_7_months(
            self, _, _1, _2):  # nopep8 pylint: disable=C0301
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        b1 = create_backup(datetime(2017, 1, 17))
        b2 = create_backup(datetime(2017, 1, 18))
        backups = [b1, b2]
        backups_expected_for_deletion = [b1, b2]

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual(backups_expected_for_deletion,
                                    eligible_for_deletion)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=False)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 8, 1)))
    @freeze_time("2017-08-20")
    def test_should_not_remove_any_backups_if_source_table_was_deleted_less_than_seven_months_ago(
            self, _, _1, _2):  # nopep8 pylint: disable=C0301
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        young_backup = create_backup(datetime(2017, 8, 1))
        old_backup = create_backup(datetime(2016, 1, 17))

        backups = [young_backup, old_backup]
        backups_expected_for_deletion = []

        # when
        eligible_for_deletion = self.under_test \
            .get_backups_eligible_for_deletion(backups=list(backups),
                                               table_reference=reference)
        # then
        self.sortAndAssertListEqual(backups_expected_for_deletion,
                                    eligible_for_deletion)

    @freeze_time("2017-08-20")
    def test_should_remove_above_last_10_young_backups(self):
        #given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        oldest_backup = create_backup(datetime(2017, 6, 1))
        _10_last_backups = create_backup_daily_sequence(10,
                                                        start_date=datetime(
                                                            2017, 6, 2))

        backups = list(_10_last_backups)
        backups.append(oldest_backup)

        backups_expected_for_deletion = [oldest_backup]

        #when
        eligible_for_deletion = \
            self.under_test.get_backups_eligible_for_deletion(
                backups=list(backups),
                table_reference=reference)
        #then
        self.sortAndAssertListEqual(backups_expected_for_deletion,
                                    eligible_for_deletion)

    def sortAndAssertListEqual(self, backup_list1, backup_list2):
        sorted_list1 = Backup.sort_backups_by_create_time_desc(backup_list1)
        sorted_list2 = Backup.sort_backups_by_create_time_desc(backup_list2)
        self.assertListEqual(sorted_list1, sorted_list2)
Ejemplo n.º 19
0
class TestTableBackup(unittest.TestCase):
    def setUp(self):
        self.testbed = testbed.Testbed()
        self.testbed.activate()
        self.testbed.init_datastore_v3_stub()
        self.testbed.init_memcache_stub()
        self.testbed.init_app_identity_stub()
        self.testbed.init_taskqueue_stub(
            root_path=os.path.join(os.path.dirname(__file__), 'resources'))
        self.task_queue_stub = utils.init_testbed_queue_stub(self.testbed)
        ndb.get_context().clear_cache()

    def tearDown(self):
        self.testbed.deactivate()

    @patch('src.commons.big_query.big_query.BigQuery.__init__',
           Mock(return_value=None))
    @patch.object(BackupProcess, 'start')
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(BigQueryTableMetadata,
                  'is_daily_partitioned',
                  return_value=True)
    @patch.object(BigQueryTableMetadata, 'is_partition', return_value=True)
    @patch.object(BigQueryTableMetadata, 'is_empty', return_value=False)
    def test_that_backup_are_scheduled_for_non_empty_single_partition(
            self, _, _1, _2, _3, backup_start):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id="20170303")

        # when
        TableBackup.start(table_reference)

        # then
        backup_start.assert_called_once()

    @patch('src.commons.big_query.big_query.BigQuery.__init__',
           Mock(return_value=None))
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(BigQueryTableMetadata,
                  'is_daily_partitioned',
                  return_value=False)
    @patch.object(BackupProcess, 'start')
    def test_that_table_backup_is_scheduled_for_not_partitioned_table(
            self, backup_start, _, _1):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id=None)
        # when
        TableBackup.start(table_reference)

        # then
        backup_start.assert_called_once()

    @patch.object(request_correlation_id, 'get', return_value='correlation-id')
    @patch('src.commons.big_query.big_query.BigQuery.__init__',
           Mock(return_value=None))
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(BigQueryTableMetadata,
                  'is_daily_partitioned',
                  return_value=True)
    @patch.object(BigQueryTableMetadata, 'is_partition', return_value=False)
    @patch.object(BigQueryTableMetadata, 'is_empty', return_value=False)
    def test_that_backup_for_partitions_is_scheduled_for_partitioned_table(
            self, _, _1, _2, _3, _4):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id=None)

        # when
        TableBackup.start(table_reference)

        # then
        tasks = self.task_queue_stub.get_filtered_tasks(
            queue_names='backup-scheduler')

        self.assertEqual(len(tasks), 1)
        self.assertEqual(
            tasks[0].payload,
            'projectId=test-project&tableId=test-table&datasetId=test-dataset')

    @patch.object(request_correlation_id, 'get', return_value='correlation-id')
    @patch('src.commons.big_query.big_query.BigQuery.__init__',
           Mock(return_value=None))
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(BigQueryTableMetadata,
                  'is_daily_partitioned',
                  return_value=True)
    @patch.object(BigQueryTableMetadata, 'is_partition', return_value=False)
    @patch.object(BigQueryTableMetadata, 'is_empty', return_value=True)
    def test_that_backup_for_partitions_is_scheduled_for_empty_partitioned_table(
            self, _, _1, _2, _3, _4):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id=None)

        # when
        TableBackup.start(table_reference)

        # then
        tasks = self.task_queue_stub.get_filtered_tasks(
            queue_names='backup-scheduler')

        self.assertEqual(len(tasks), 1)
        self.assertEqual(
            tasks[0].payload,
            'projectId=test-project&tableId=test-table&datasetId=test-dataset')
Ejemplo n.º 20
0
class TestGracePeriodAfterDeletionFilter(unittest.TestCase):
    def setUp(self):
        self.testbed = testbed.Testbed()
        self.testbed.activate()
        self.testbed.init_datastore_v3_stub()
        self.testbed.init_memcache_stub()
        ndb.get_context().clear_cache()
        patch('googleapiclient.discovery.build').start()
        patch('oauth2client.client.GoogleCredentials.get_application_default') \
            .start()
        self.under_test = GracePeriodAfterDeletionFilter()

    def tearDown(self):
        self.testbed.deactivate()
        patch.stopall()

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=False)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 7, 01)))
    @freeze_time("2017-08-20")
    def test_should_keep_old_backup_when_source_table_was_deleted_only_recently(
            self, _, _1, _2):  # nopep8 pylint: disable=C0301
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        b1 = create_backup(datetime(2015, 06, 01))
        # when
        backups_to_retain = self.under_test.filter(backups=[b1],
                                                   table_reference=reference)
        # then
        self.assertListEqual([b1], backups_to_retain)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=False)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2017, 7, 01)))
    @freeze_time("2017-08-20")
    def test_should_keep_young_backups_even_if_source_table_is_deleted(
            self, _, _1, _2):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        b1 = create_backup(datetime(2017, 06, 01))
        # when
        backups_to_retain = self.under_test.filter(backups=[b1],
                                                   table_reference=reference)
        # then
        self.assertListEqual([b1], backups_to_retain)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=False)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2015, 7, 01)))
    @freeze_time("2017-08-20")
    def test_should_delete_old_backups_if_source_table_is_gone_for_long(
            self, _, _1, _2):  # nopep8 pylint: disable=C0301
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        b1 = create_backup(datetime(2015, 06, 01))
        # when
        backups_to_retain = self.under_test.filter(backups=[b1],
                                                   table_reference=reference)
        # then
        self.assertFalse(backups_to_retain)

    is_not_parititoned_http_error_response = '''{
                             "error": {
                              "code": 400,
                              "message": "Cannot read partition information from a table that is not partitioned: sit-cymes-euw1-mlservices:MarketingRecommendations_Derived_Restricted.DeliveredOrder$2420180805",
                              "errors": [
                               {
                                "message": "Cannot read partition information from a table that is not partitioned: sit-cymes-euw1-mlservices:MarketingRecommendations_Derived_Restricted.DeliveredOrder$2420180805",
                                "domain": "global",
                                "reason": "invalid"
                               }
                              ],
                              "status": "INVALID_ARGUMENT"
                             }
                            } '''

    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  side_effect=HttpError(
                      Mock(status=400),
                      is_not_parititoned_http_error_response))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2015, 7, 01)))
    @freeze_time("2017-08-20")
    def test_should_delete_old_backups_if_source_partitioned_table_is_gone_for_long_and_new_table_with_the_same_name_is_not_partitioned(
            self, _, _1):  # nopep8 pylint: disable=C0301
        # given
        partitioned_reference = TableReference('example-project-id',
                                               'example-dataset-id',
                                               'example-table-id',
                                               'example-partition-id')
        b1 = create_backup(datetime(2015, 06, 01))
        # when
        backups_to_retain = self.under_test.filter(
            backups=[b1], table_reference=partitioned_reference)
        # then
        self.assertFalse(backups_to_retain)

    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  side_effect=HttpError(
                      Mock(status=400),
                      is_not_parititoned_http_error_response))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2015, 7, 01)))
    @freeze_time("2017-08-20")
    def test_should_raise_exception_if_source_non_partitioned_table_is_gone_for_long_and_new_table_with_the_same_name_is_not_partitioned(
            self, _, _1):  # nopep8 pylint: disable=C0301
        # given
        non_partitioned_reference = TableReference('example-project-id',
                                                   'example-dataset-id',
                                                   'example-table-id')
        b1 = create_backup(datetime(2015, 06, 01))

        # when &then
        self.assertRaises(HttpError, self.under_test.filter, [b1],
                          non_partitioned_reference)

    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_reference',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(Backup,
                  'get_table',
                  return_value=Table(last_checked=datetime(2015, 7, 01)))
    @freeze_time("2017-08-20")
    def test_should_retain_backups_if_source_table_still_exists(
            self, _, _1, _2):  # nopep8 pylint: disable=C0301
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        b1 = create_backup(datetime(2015, 06, 01))
        # when
        backups_to_retain = self.under_test.filter(backups=[b1],
                                                   table_reference=reference)
        # then
        self.assertListEqual([b1], backups_to_retain)

    def test_should_gracefully_deal_with_empty_backup_list(self):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        # when
        backups_to_retain = self.under_test.filter(backups=[],
                                                   table_reference=reference)
        # then
        self.assertFalse(backups_to_retain)
class TestAfterBackupActionHandler(unittest.TestCase):
    def setUp(self):
        self.under_test = webtest.TestApp(after_backup_action_handler.app)
        self.testbed = testbed.Testbed()
        self.testbed.activate()
        self.testbed.init_memcache_stub()
        self.testbed.init_datastore_v3_stub()
        self.testbed.init_app_identity_stub()
        self.taskqueue_stub = utils.init_testbed_queue_stub(self.testbed)

    def tearDown(self):
        self.testbed.deactivate()

    @patch.object(BigQuery, '_create_credentials', return_value=None)
    @patch.object(BigQuery, '_create_http')
    def test_should_create_datastore_backup_entity(self, _create_http, _):
        # given
        _create_http.return_value = HttpMockSequence([
            ({
                'status': '200'
            }, content('tests/json_samples/bigquery_v2_test_schema.json')),
            ({
                'status': '200'
            },
             content('tests/json_samples/table_get/'
                     'bigquery_partitioned_table_get.json'))
        ])

        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)
        copy_job_result = CopyJobResult(json.loads(payload).get('jobJson'))

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)
        backup = table_entity.last_backup

        # then
        self.assertEqual(response.status_int, 200)
        self.assertEqual(backup.dataset_id, "target_dataset_id")
        self.assertEqual(backup.table_id, "target_table_id")
        self.assertTrue(isinstance(backup.created, datetime))
        self.assertEqual(backup.created, copy_job_result.end_time)

        self.assertTrue(isinstance(backup.last_modified, datetime))
        self.assertEqual(backup.last_modified, copy_job_result.start_time)

    @patch.object(BigQuery, '_create_credentials', return_value=None)
    @patch('src.backup.after_backup_action_handler.ErrorReporting')
    @patch.object(BigQuery, '_create_http')
    def test_should_not_create_backups_entity_if_copy_job_failed(
            self, _create_http, error_reporting, _):
        # given
        _create_http.return_value = HttpMockSequence([
            ({
                'status': '200'
            }, content('tests/json_samples/bigquery_v2_test_schema.json')),
        ])
        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps(
            {
                "data": data,
                "jobJson": JobResultExample.DONE_WITH_NOT_REPETITIVE_ERRORS
            },
            cls=RequestEncoder)

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)
        backup = table_entity.last_backup

        # then
        self.assertEqual(response.status_int, 200)
        self.assertIsNone(backup)
        error_reporting.assert_called_once()

    @patch.object(BigQuery, '_create_credentials', return_value=None)
    @patch('src.backup.after_backup_action_handler.ErrorReporting')
    @patch.object(BigQuery, '_create_http')
    def test_should_not_create_backups_entity_if_backup_table_doesnt_exist(
            self, _create_http, error_reporting, _):
        # given
        _create_http.return_value = HttpMockSequence([
            ({
                'status': '200'
            }, content('tests/json_samples/bigquery_v2_test_schema.json')),
            (
                {
                    'status': '404'
                },  # Table not found
                content('tests/json_samples/table_get/'
                        'bigquery_partitioned_table_get.json'))
        ])

        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)
        backup = table_entity.last_backup

        # then
        self.assertEqual(response.status_int, 200)
        self.assertIsNone(backup)
        error_reporting.assert_called_once()

    @patch('src.commons.big_query.big_query.BigQuery.__init__',
           Mock(return_value=None))
    @patch.object(BigQueryTableMetadata,
                  'get_table_by_big_query_table',
                  return_value=BigQueryTableMetadata(None))
    @patch.object(BigQueryTableMetadata, 'table_exists', return_value=True)
    @patch.object(BigQueryTableMetadata,
                  'get_last_modified_datetime',
                  return_value=datetime.utcnow())
    @patch.object(BigQueryTableMetadata,
                  'table_size_in_bytes',
                  return_value=123)
    @patch.object(BigQueryTableMetadata,
                  'has_partition_expiration',
                  return_value=True)
    @patch.object(BigQuery, '_create_credentials', return_value=None)
    @patch.object(BigQuery, 'disable_partition_expiration')
    def test_should_disable_partition_expiration_if_backup_table_has_it(
            self, disable_partition_expiration, _, _1, _2, _3, _4, _5):
        # given
        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)

        # then
        self.assertEqual(response.status_int, 200)
        disable_partition_expiration.assert_called_once()

    @patch.object(BigQuery, '_create_credentials', return_value=None)
    @patch.object(BigQuery, '_create_http')
    def test_should_return_400_for_wrong_data(self, _create_http, _):
        # given
        _create_http.return_value = test_utils.create_bq_generic_mock()

        payload = '{"data": <ILikeXML/>, "jobJson": {"state": "DONE"}}'
        expected_error = "{\"status\": \"failed\", \"message\": \"No JSON " \
                         "object could be decoded\", \"httpStatus\": 400}"

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table',
            params=payload,
            expect_errors=True)

        # then
        self.assertEquals(400, response.status_int)
        self.assertEquals(response.body, expected_error)

    @patch.object(BigQuery, '_create_credentials', return_value=None)
    @patch.object(BigQuery, '_create_http')
    def test_should_return_400_for_incomplete_data_json(self, _create_http, _):
        # given
        _create_http.return_value = test_utils.create_bq_generic_mock()
        payload = '{"data": {}, "jobJson": {"state": "DONE"}}'
        expected_error = \
            "{\"status\": \"failed\", \"message\": " \
            "\"JSON has no sourceBqTable or targetBqTable parameters\", " \
            "\"httpStatus\": 400}"

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table',
            params=payload,
            expect_errors=True)

        # then
        self.assertEquals(400, response.status_int)
        self.assertEquals(response.body, expected_error)