class TestTenYoungBackupVersionsFilter(unittest.TestCase):
    def setUp(self):
        self.under_test = TenYoungBackupVersionsFilter()

    @parameterized.expand([[0], [1], [2], [5], [7], [10]])
    @freeze_time("2019-08-01")
    def test_should_not_filter_out_if_there_is_10_or_less_young_table_backups(
            self, count):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        backups = backup_utils.create_backup_daily_sequence(
            count, start_date=datetime(2019, 7, 1))

        # when
        backups_to_retain = self.under_test.filter(list(backups), reference)
        # then
        self.assertListEqual(backups_to_retain, backups)

    @parameterized.expand([[
        TableReference('example-project-id', 'example-dataset-id',
                       'example-table-id')
    ],
                           [
                               TableReference('example-project-id',
                                              'example-dataset-id',
                                              'example-table-id', '20170601')
                           ]])
    @freeze_time("2019-08-01")
    def test_should_filter_out_young_backups_above_10_version(self, reference):
        # given
        backups = backup_utils.create_backup_daily_sequence(
            14, start_date=datetime(2019, 7, 1))
        expected_retained_backups = backups[:10]
        shuffle(backups)

        # when
        backups_to_retain = self.under_test.filter(backups, reference)
        # then
        self.assertListEqual(backups_to_retain, expected_retained_backups)

    @freeze_time("2019-09-02")
    def test_should_filter_out_young_backups_above_10_version_but_retain_old_backups(
            self):
        # given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')
        young_backups = backup_utils.create_backup_daily_sequence(
            14, start_date=datetime(2019, 8, 15))
        old_backups = backup_utils.create_backup_daily_sequence(
            3, start_date=datetime(2019, 1, 1))
        all_backups = list(young_backups + old_backups)

        expected_retained_backups = list(young_backups[:10] + old_backups)
        shuffle(all_backups)

        # when
        backups_to_retain = self.under_test.filter(all_backups, reference)
        # then
        self.assertListEqual(backups_to_retain, expected_retained_backups)
 def __create_non_partitioned_table_references(self):
     source_table_reference = TableReference(SOURCE_PROJECT_ID,
                                             SOURCE_DATASET_ID,
                                             SOURCE_TABLE_ID, None)
     target_table_reference = TableReference(TARGET_PROJECT_ID,
                                             TARGET_DATASET_ID,
                                             TARGET_TABLE_ID, None)
     return source_table_reference, target_table_reference
Esempio n. 3
0
    def test_parse_tab_ref(self):
        #when
        actual_table_ref = TableReference.parse_tab_ref(
            "proj321:dataset123.tableabc")

        #then
        self.assertEqual(TableReference("proj321", "dataset123", "tableabc"),
                         actual_table_ref)
Esempio n. 4
0
    def test_parse_tab_ref_for_partitioned_table(self):
        #when
        actual_table_ref = TableReference.parse_tab_ref(
            "proj321:dataset123.tableabc$20180226")

        #then
        self.assertEqual(
            TableReference("proj321", "dataset123", "tableabc", "20180226"),
            actual_table_ref)
 def table_reference(self):
     table_reference = self.table_metadata['tableReference']
     if self.is_partition():
         return TableReference(table_reference['projectId'],
                               table_reference['datasetId'],
                               self.get_table_id(), self.get_partition_id())
     else:
         return TableReference(table_reference['projectId'],
                               table_reference['datasetId'],
                               table_reference['tableId'])
Esempio n. 6
0
    def test_restore_item_default_state_is_in_progress(self):
        # given
        source_table = TableReference(project_id='source_project_id',
                                      dataset_id='source_dataset_id',
                                      table_id='source_table_id')
        target_table = TableReference(project_id='target_project_id',
                                      dataset_id='target_dataset_id',
                                      table_id='target_table_id')

        # when
        result = RestoreItem.create(source_table, target_table)

        # then
        self.assertEqual(RestoreItem.STATUS_IN_PROGRESS, result.status)
Esempio n. 7
0
 def __create_restore_items(count=1):
     result = []
     for i in range(0, count):
         source_table_reference = TableReference(
             "source_project_id_" + str(i), "source_dataset_id_" + str(i),
             "source_table_id_" + str(i), "source_partition_id_" + str(i))
         target_table_reference = TableReference(
             "target_project_id_" + str(i), "target_dataset_id_" + str(i),
             "target_table_id_" + str(i), "target_partition_id_" + str(i))
         restore_item = RestoreItem.create(source_table_reference,
                                           target_table_reference)
         result.append(
             (restore_item, source_table_reference, target_table_reference))
     return result
    def test_on_demand_request_for_non_partitioned_table_is_properly_parsed(
            self, on_demand_table_backup_start):
        # given
        table_reference = TableReference('example-proj-name',
                                         'example-dataset-name',
                                         'example-table-name')
        url = '/tasks/backups/on_demand/table/{}/{}/{}'.format(
            table_reference.get_project_id(), table_reference.get_dataset_id(),
            table_reference.get_table_id())

        # when
        self.under_test.get(url)

        # then
        on_demand_table_backup_start.assert_called_with(table_reference)
    def test_on_demand_request_for_partitioned_but_without_passing_partition_should_casue_400(
            self, on_demand_table_backup_start):
        # given
        table_reference = TableReference('example-proj-name',
                                         'example-dataset-name',
                                         'example-table-name')
        url = '/tasks/backups/on_demand/table/{}/{}/{}'.format(
            table_reference.get_project_id(), table_reference.get_dataset_id(),
            table_reference.get_table_id())

        # when
        response = self.under_test.get(url, expect_errors=True)

        # then
        self.assertEquals(400, response.status_int)
Esempio n. 10
0
 def test_table_str(self):
     # given
     table = TableReference("project1", "dataset1", "table1")
     # when
     table_string = str(table)
     # then
     self.assertEqual(table_string, "project1:dataset1.table1")
    def test_should_disable_partition_expiration_if_backup_table_has_it(
            self, disable_partition_expiration, _, _1, _2, _3, _4, _5):
        # given
        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)

        # then
        self.assertEqual(response.status_int, 200)
        disable_partition_expiration.assert_called_once()
Esempio n. 12
0
    def test_schedule(self, schedule_tasks_for_partition_backup):
        # given
        project_id = "test-project"
        dataset_id = "test-dataset"
        table_id = "test-table"
        partition_id_1 = "20170330"
        partition_id_2 = "20170331"

        table_reference = TableReference(project_id=project_id,
                                         dataset_id=dataset_id,
                                         table_id=table_id,
                                         partition_id=None)

        big_query = Mock()

        # when
        big_query.list_table_partitions.return_value = [{
            "partitionId":
            partition_id_1
        }, {
            "partitionId":
            partition_id_2
        }]

        TablePartitionsBackupScheduler(table_reference, big_query).start()

        # then
        schedule_tasks_for_partition_backup.assert_has_calls([
            call(project_id, dataset_id, table_id,
                 [partition_id_1, partition_id_2])
        ])
    def get(self, project_id, dataset_id, table_id):
        partition_id = self.request.get('partitionId', None)
        is_restore_to_source_project = self.request.get(
            'isRestoreToSourceProject', None)
        target_dataset_id = self.request.get('targetDatasetId', None)
        create_disposition = self.request.get('createDisposition', None)
        write_disposition = self.request.get('writeDisposition', None)

        target_project_id = None if is_restore_to_source_project \
            else configuration.default_restoration_project_id

        validators.validate_restore_request_params(
            target_project_id=target_project_id,
            target_dataset_id=target_dataset_id,
            create_disposition=create_disposition,
            write_disposition=write_disposition)

        restoration_datetime = self.__get_restoration_datetime()

        table_reference = TableReference(project_id, dataset_id, table_id,
                                         partition_id)

        restore_data = TableRestoreService.restore(
            table_reference, target_project_id, target_dataset_id,
            create_disposition, write_disposition, restoration_datetime)
        self._finish_with_success(restore_data)
Esempio n. 14
0
    def __process(self, request_body_json):
        copy_job_results = CopyJobResult(request_body_json.get('jobJson'))
        data = request_body_json.get('data')

        if copy_job_results.has_errors():
            error_message = "Copy job failed with errors: {} ." \
                            "Backup for source: {}, target: {} " \
                            "has not been done. " \
                .format(copy_job_results.error_message,
                        data["sourceBqTable"], data["targetBqTable"])
            ErrorReporting().report(error_message)
            return

        backup_table_metadata = BigQueryTableMetadata.get_table_by_big_query_table(
            copy_job_results.target_bq_table)

        if backup_table_metadata.table_exists():
            self.__create_backup(backup_table_metadata, copy_job_results)
            if backup_table_metadata.has_partition_expiration():
                self.__disable_partition_expiration(
                    TableReference.from_bq_table(
                        copy_job_results.target_bq_table))
        else:
            pass
            ErrorReporting().report(
                "Backup table {0} not exist. Backup entity is not created".
                format(copy_job_results.target_bq_table))
Esempio n. 15
0
    def test_should_delete_same_day_duplicates_backups(self):
        #given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')

        first_5_backups = create_backup_daily_sequence(5,
                                                       start_date=datetime(
                                                           2017, 6, 1, 12))
        second_5_backups = create_backup_daily_sequence(5,
                                                        start_date=datetime(
                                                            2017, 6, 6, 12))
        first_5_backups_duplicated = create_backup_daily_sequence(
            5, start_date=datetime(2017, 6, 1, 14))

        backups = list(first_5_backups + second_5_backups +
                       first_5_backups_duplicated)
        backups_expected_for_deletion = list(first_5_backups)

        #when
        eligible_for_deletion = \
            self.under_test.get_backups_eligible_for_deletion(
                backups=list(backups),
                table_reference=reference)
        #then
        self.sortAndAssertListEqual(backups_expected_for_deletion,
                                    eligible_for_deletion)
    def generate_restore_items(cls, project_id, dataset_id, target_project_id,
                               target_dataset_id, max_partition_days):
        if max_partition_days:
            table_entities = Table \
                .get_tables_with_max_partition_days(project_id, dataset_id,
                                                    max_partition_days)
        else:
            table_entities = Table.get_tables(project_id, dataset_id)

        for table_entity_sublist in paginated(1000, table_entities):
            restore_items = []
            for table_entity, backup_entity in Table.get_last_backup_for_tables(
                    table_entity_sublist):
                if backup_entity is not None:
                    source_table_reference = \
                        RestoreTableReference.backup_table_reference(
                            table_entity, backup_entity)

                    target_table_reference = TableReference(
                        target_project_id,
                        target_dataset_id,
                        table_entity.table_id,
                        table_entity.partition_id
                    )

                    restore_item = RestoreItem.create(source_table_reference,
                                                      target_table_reference)
                    restore_items.append(restore_item)

            logging.info("Restore items generator yields %s restore items",
                         len(restore_items))
            yield restore_items
Esempio n. 17
0
    def test_should_fill_deleted_field_in_backup_entity_if_table_not_found_error_during_deletion(
            self, _):
        # given
        table = Table(project_id='example-proj-name',
                      dataset_id='example-dataset-name',
                      table_id='example-table-name',
                      last_checked=datetime.datetime(2017, 2, 1, 16, 30))
        table.put()
        reference = TableReference.from_table_entity(table)
        backup1 = backup_utils.create_backup(datetime.datetime(
            2017, 2, 1, 16, 30),
                                             table,
                                             table_id="backup1")
        backup2 = backup_utils.create_backup(datetime.datetime(
            2017, 2, 2, 16, 30),
                                             table,
                                             table_id="backup2")
        ndb.put_multi([backup1, backup2])
        self.policy.get_backups_eligible_for_deletion = Mock(
            return_value=[backup1, backup2])

        # when
        self.under_test.perform_retention(reference, table.key.urlsafe())

        # then
        self.assertTrue(Backup.get_by_key(backup1.key).deleted is not None)
        self.assertTrue(Backup.get_by_key(backup2.key).deleted is not None)
Esempio n. 18
0
 def __update_last_check(self, table_entity):
     logging.info(
         "Updating last_check in entity entity for %s",
         TableReference(self.project_id, self.dataset_id, self.table_id,
                        self.partition_id))
     table_entity.last_checked = self.now
     table_entity.put()
Esempio n. 19
0
 def target_table_reference(self):
     return TableReference(
         project_id=self.target_table.project_id,
         dataset_id=self.target_table.dataset_id,
         table_id=self.target_table.table_id,
         partition_id=self.target_table.partition_id
     )
Esempio n. 20
0
    def __delete_backup_in_bq_and_update_datastore(self, backup):
        try:
            table_reference = TableReference(configuration.backup_project_id,
                                             backup.dataset_id,
                                             backup.table_id)

            self.big_query_service.delete_table(table_reference)
            logging.debug(
                u"Table %s deleted from BigQuery. "
                u"Updating datastore. Retention policy used: '%s'",
                table_reference,
                type(self.policy).__name__)
            Backup.mark_backup_deleted(backup.key)
        except TableNotFoundException:
            Backup.mark_backup_deleted(backup.key)
            logging.warning(
                u"Table '%s' was not found. But we updated datastore anyway",
                backup.table_id)
        except HttpError as ex:
            error_message = u"Unexpected HttpError occurred while deleting " \
                            u"table '{}', error: {}: {}"\
                .format(backup.table_id, type(ex), ex)
            logging.exception(error_message)
        except Exception as ex:
            error_message = u"Could not delete backup '{}' error: {}: {}"\
                .format(backup.table_id, type(ex), ex)
            logging.exception(error_message)
Esempio n. 21
0
    def test_should_delete_many_today_duplicates_and_11th_young_version_after_deduplication_and_retain_old_backup(
            self, _1, _2, _3):
        #given
        reference = TableReference('example-project-id', 'example-dataset-id',
                                   'example-table-id')

        young_backups = create_backup_daily_sequence(10,
                                                     start_date=datetime(
                                                         2017, 8, 1))
        newest_duplicated_backup = create_backup(datetime(2017, 8, 19, 10))

        today_duplicated_backups = [
            newest_duplicated_backup,
            create_backup(datetime(2017, 8, 19, 9)),
            create_backup(datetime(2017, 8, 19, 8)),
            create_backup(datetime(2017, 8, 19, 7))
        ]

        old_backup = create_backup(datetime(2016, 8, 19, 10))

        backups = list(young_backups + today_duplicated_backups + [old_backup])
        backups_expected_for_deletion = list([young_backups[9]] +
                                             today_duplicated_backups[1:])

        #when
        eligible_for_deletion = \
            self.under_test.get_backups_eligible_for_deletion(
                backups=list(backups),
                table_reference=reference)

        #then
        self.sortAndAssertListEqual(backups_expected_for_deletion,
                                    eligible_for_deletion)
Esempio n. 22
0
    def test_that_last_checked_date_is_updated_even_if_table_should_not_be_backed_up( # nopep8 pylint: disable=C0301
            self, copy_table, _1, _2):
        # given
        table = Table(project_id="test-project",
                      dataset_id="test-dataset",
                      table_id="test-table",
                      last_checked=datetime.datetime(2017, 3, 3))

        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table")

        # when
        table.put()

        BackupProcess(table_reference, self.big_query,
                      self.big_query_table_metadata).start()

        table_entity = Table.get_table("test-project", "test-dataset",
                                       "test-table")

        # then
        self.assertEqual(table_entity.last_checked,
                         datetime.datetime(2017, 04, 4))
        copy_table.assert_not_called()
 def to_table_reference(table):
     partition_id = table[
         'partitionId'] if table['partitionId'] != "None" else None
     return TableReference(project_id=table['projectId'],
                           dataset_id=table['datasetId'],
                           table_id=table['tableId'],
                           partition_id=partition_id)
Esempio n. 24
0
 def test_partition_str(self):  # given
     table_partition = TableReference("project1", "dataset1", "table1",
                                      "partition1")
     # when
     table_partition_string = str(table_partition)
     # then
     self.assertEqual(table_partition_string,
                      "project1:dataset1.table1$partition1")
Esempio n. 25
0
 def test_should_gracefully_deal_with_empty_backup_list(self):
     # given
     reference = TableReference('example-project-id', 'example-dataset-id',
                                'example-table-id')
     # when
     backups_to_retain = self.under_test.filter(backups=[],
                                                table_reference=reference)
     # then
     self.assertFalse(backups_to_retain)
    def test_default_parameters_for_table_restoration(self, restore):
        # given & when
        self.under_test.get(RESTORE_TABLE_URL + '?')

        # then
        expected_table_reference = \
            TableReference('project-id', 'dataset_id', 'table_id')
        restore.assert_called_once_with(expected_table_reference, '', None,
                                        None, None, None)
    def __generate_expected_restore_item(
            table,
            target_project_id=RESTORATION_PROJECT_ID,
            custom_target_dataset=None):
        expected_source = TableReference(
            project_id=BACKUP_PROJECT_ID,
            dataset_id=table.last_backup.dataset_id,
            table_id=table.last_backup.table_id,
            partition_id=table.partition_id)

        target_dataset = TestDatasetRestoreItemsGenerator.__create_target_dataset(
            custom_target_dataset)
        expected_target = TableReference(project_id=target_project_id,
                                         dataset_id=target_dataset,
                                         table_id=table.table_id,
                                         partition_id=table.partition_id)
        expected_restore_item = RestoreItem.create(expected_source,
                                                   expected_target)
        return expected_restore_item
Esempio n. 28
0
    def __create_target_table_reference(restore_request, source_entity):
        target_project_id = restore_request.target_project_id
        target_dataset_id = restore_request.target_dataset_id

        if target_project_id is None:
            target_project_id = source_entity.project_id
        if target_dataset_id is None:
            target_dataset_id = source_entity.dataset_id
        return TableReference(target_project_id, target_dataset_id,
                              source_entity.table_id,
                              source_entity.partition_id)
    def test_should_throw_parameter_validation_exception_if_table_is_partitioned_but_partition_number_was_not_given(
            self, _1, _2):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id="")

        # when-then
        with self.assertRaises(ParameterValidationException):
            OnDemandTableBackup.start(table_reference)
Esempio n. 30
0
    def test_that_dataset_will_not_be_unnecessary_created_twice(self,
                                                                _, _1, _2, _3):
        # given
        table_reference_1 = TableReference(project_id="test-project",
                                           dataset_id="test-dataset",
                                           table_id="test-table-1")
        table_reference_2 = TableReference(project_id="test-project",
                                           dataset_id="test-dataset",
                                           table_id="test-table-2")

        # when
        self.big_query.create_dataset = MagicMock()

        BackupProcess(table_reference_1, self.big_query,
                      self.big_query_table_metadata).start()
        BackupProcess(table_reference_2, self.big_query,
                      self.big_query_table_metadata).start()

        # then
        self.big_query.create_dataset.assert_called_once()