Example #1
0
    def test_should_return_the_same_parent_table_for_child_backups(self):
        # given
        table = Table(project_id='example-proj-name',
                      dataset_id='example-dataset-name',
                      table_id='example-table-name',
                      last_checked=datetime(2017, 02, 1, 16, 30))
        table.put()
        backup_one = Backup(parent=table.key,
                            last_modified=datetime(2017, 02, 1, 16, 30),
                            created=datetime(2017, 02, 1, 16, 30),
                            dataset_id='targetDatasetId',
                            table_id='targetTableId',
                            numBytes=1234)
        backup_two = Backup(parent=table.key,
                            last_modified=datetime(2018, 03, 2, 00, 00),
                            created=datetime(2018, 03, 2, 00, 00),
                            dataset_id='targetDatasetId',
                            table_id='targetTableId',
                            numBytes=1234)

        # when
        table1 = Backup.get_table(backup_one)
        table2 = Backup.get_table(backup_two)

        # then
        self.assertEqual(table, table1)
        self.assertEqual(table1, table2)
    def __create_valid_backups():
        backup_1 = Backup(table_id='table_id_1', dataset_id='dataset_id_1')
        backup_2 = Backup(table_id='table_id_2', dataset_id='dataset_id_1')
        backup_3 = Backup(table_id='table_id_3', dataset_id='dataset_id_1')
        backup_4 = Backup(table_id='table_id_4', dataset_id='dataset_id_1')

        return [backup_1, backup_2, backup_3, backup_4]
    def __create_backups_with_part_of_referencing_same_table_in_bq():
        backup_1 = Backup(table_id='table_id_1', dataset_id='dataset_id_1')
        backup_2 = Backup(table_id='table_id_2', dataset_id='dataset_id_1')
        backup_3 = Backup(table_id='table_id_3', dataset_id='dataset_id_1')
        backup_4 = Backup(table_id='table_id_3', dataset_id='dataset_id_1')

        return [backup_4, backup_2, backup_3, backup_1]
    def test_should_fill_deleted_field_in_backup_entity_if_table_not_found_error_during_deletion(
            self, _):
        # given
        table = Table(project_id='example-proj-name',
                      dataset_id='example-dataset-name',
                      table_id='example-table-name',
                      last_checked=datetime.datetime(2017, 2, 1, 16, 30))
        table.put()
        reference = TableReference.from_table_entity(table)
        backup1 = backup_utils.create_backup(datetime.datetime(
            2017, 2, 1, 16, 30),
                                             table,
                                             table_id="backup1")
        backup2 = backup_utils.create_backup(datetime.datetime(
            2017, 2, 2, 16, 30),
                                             table,
                                             table_id="backup2")
        ndb.put_multi([backup1, backup2])
        self.policy.get_backups_eligible_for_deletion = Mock(
            return_value=[backup1, backup2])

        # when
        self.under_test.perform_retention(reference, table.key.urlsafe())

        # then
        self.assertTrue(Backup.get_by_key(backup1.key).deleted is not None)
        self.assertTrue(Backup.get_by_key(backup2.key).deleted is not None)
Example #5
0
    def __delete_backup_in_bq_and_update_datastore(self, backup):
        try:
            table_reference = TableReference(configuration.backup_project_id,
                                             backup.dataset_id,
                                             backup.table_id)

            self.big_query_service.delete_table(table_reference)
            logging.debug(
                u"Table %s deleted from BigQuery. "
                u"Updating datastore. Retention policy used: '%s'",
                table_reference,
                type(self.policy).__name__)
            Backup.mark_backup_deleted(backup.key)
        except TableNotFoundException:
            Backup.mark_backup_deleted(backup.key)
            logging.warning(
                u"Table '%s' was not found. But we updated datastore anyway",
                backup.table_id)
        except HttpError as ex:
            error_message = u"Unexpected HttpError occurred while deleting " \
                            u"table '{}', error: {}: {}"\
                .format(backup.table_id, type(ex), ex)
            logging.exception(error_message)
        except Exception as ex:
            error_message = u"Could not delete backup '{}' error: {}: {}"\
                .format(backup.table_id, type(ex), ex)
            logging.exception(error_message)
Example #6
0
 def test_that_get_all_backups_sorted_will_return_only_these_with_null_deleted_column(
         self):  # nopep8 pylint: disable=C0301, W0613
     # given
     table = Table(project_id='example-proj-name',
                   dataset_id='example-dataset-name',
                   table_id='example-table-name',
                   last_checked=datetime(2017, 02, 1, 16, 30))
     table.put()
     backup1 = Backup(
         parent=table.key,
         last_modified=datetime(2017, 02, 1, 16, 30),
         created=datetime(2017, 02, 1, 16, 30),
         dataset_id='backup_dataset',
         table_id='backup1',
         numBytes=1234,
     )
     backup1.put()
     backup2 = Backup(parent=table.key,
                      last_modified=datetime(2017, 02, 1, 16, 30),
                      created=datetime(2017, 02, 1, 16, 30),
                      dataset_id='backup_dataset',
                      table_id='backup2',
                      numBytes=1234,
                      deleted=datetime(2017, 02, 10, 16, 30))
     backup2.put()
     # when
     existing_backups = Backup.get_all_backups_sorted(table.key)
     # then
     self.assertTrue(backup1 in existing_backups)
     self.assertTrue(backup2 not in existing_backups)
Example #7
0
    def test_should_not_sort_in_place(self):
        # given
        b1 = backup_utils.create_backup(datetime(2017, 2, 3, 16, 30))
        b2 = backup_utils.create_backup(datetime(2017, 2, 2, 16, 30))
        b3 = backup_utils.create_backup(datetime(2017, 2, 1, 16, 30))
        expected_list = [b1, b3, b2]
        actual_list = [b1, b3, b2]

        # when
        Backup.sort_backups_by_create_time_desc(actual_list)

        # then
        self.assertListEqual(expected_list, actual_list)
Example #8
0
    def test_should_return_true_if_table_was_changed_after_last_backup(self, _):
        # given
        backup = Backup(
            parent=self.table.key,
            last_modified=datetime(2016, 11, 13, 15, 00)
        )
        backup.put()
        predicate = OnDemandBackupPredicate()

        # when
        result = predicate.test(self.big_query_table_metadata, self.table)
        # then
        self.assertTrue(result, "OnDemandShouldBackupPredicate should return TRUE "
                                "if table was changed after last backup")
Example #9
0
    def test_should_return_true_if_table_was_changed_before_last_backup(self, _):  # nopep8 pylint: disable=C0301
        # given
        backup = Backup(
            parent=self.table.key,
            last_modified=datetime(2016, 11, 13, 15, 00)
        )
        backup.put()
        predicate = OnDemandBackupPredicate()

        # when
        result = predicate.test(self.big_query_table_metadata, self.table)
        # then
        self.assertTrue(result, "OnDemandShouldBackupPredicate should return FALSE "
                                "if table was changed before "
                                "last backup was made")
Example #10
0
    def test_should_return_false_if_table_was_changed_at_the_same_time_when_last_backup(
            self, _1, _2):  # nopep8 pylint: disable=C0301
        # given
        backup = Backup(parent=self.table.key,
                        last_modified=datetime(2016, 11, 13, 15, 00),
                        numBytes=123)
        backup.put()
        predicate = DefaultBackupPredicate()

        # when
        result = predicate.test(self.big_query_table_metadata, self.table)
        # then
        self.assertFalse(
            result, "ShouldBackupPredicate should return False "
            "if table was change at the same time when "
            "last backup was made")
Example #11
0
    def test_should_return_true_if_changed_table_is_empty_and_last_backup_is_also_empty(
            self, _1, _2):  # nopep8 pylint: disable=C0301
        # given
        backup = Backup(parent=self.table.key,
                        last_modified=datetime(2016, 11, 13, 15, 00),
                        numBytes=0)
        backup.put()
        predicate = DefaultBackupPredicate()

        # when
        result = predicate.test(self.big_query_table_metadata, self.table)

        # then
        self.assertTrue(
            result, "ShouldBackupPredicate should return True "
            "if table was changed after last backup was made,"
            "but source table is empty and bbq has also empty last backup")
Example #12
0
    def test_should_return_true_if_table_was_changed_before_last_backup_but_table_size_is_different(
            self, _1, _2):  # nopep8 pylint: disable=C0301
        # given
        backup = Backup(parent=self.table.key,
                        last_modified=datetime(2016, 11, 13, 15, 00),
                        numBytes=123)
        backup.put()
        predicate = DefaultBackupPredicate()

        # when
        result = predicate.test(self.big_query_table_metadata, self.table)
        # then
        self.assertTrue(
            result, "ShouldBackupPredicate should return TRUE "
            "if table was changed before "
            "last backup was made but "
            "backup has different size than source table")
Example #13
0
    def test_should_not_insert_two_backup_entities_for_the_same_backup_table(
            self):  # nopep8 pylint: disable=C0301
        # given
        table = Table(project_id='example-proj-name',
                      dataset_id='example-dataset-name',
                      table_id='example-table-name',
                      last_checked=datetime(2017, 02, 1, 16, 30))
        table.put()
        backup_one = Backup(parent=table.key,
                            last_modified=datetime(2017, 02, 1, 16, 30),
                            created=datetime(2017, 02, 1, 16, 30),
                            dataset_id='targetDatasetId',
                            table_id='targetTableId',
                            numBytes=1234)
        backup_two = Backup(parent=table.key,
                            last_modified=datetime(2018, 03, 2, 00, 00),
                            created=datetime(2018, 03, 2, 00, 00),
                            dataset_id='targetDatasetId',
                            table_id='targetTableId',
                            numBytes=1234)

        # when
        Backup.insert_if_absent(backup_one)
        Backup.insert_if_absent(backup_two)
        backups = list(Backup.get_all())

        # then
        self.assertEqual(len(backups), 1)
        self.assertEqual(backup_one.created, backups[0].created)
Example #14
0
 def test_that_not_deleted_backup_doesnt_have_created_field(self):
     # given
     table = Table(project_id='example-proj-name',
                   dataset_id='example-dataset-name',
                   table_id='example-table-name',
                   last_checked=datetime(2017, 02, 1, 16, 30))
     backup = Backup(parent=table.key,
                     last_modified=datetime(2017, 02, 1, 16, 30),
                     created=datetime(2017, 02, 1, 16, 30),
                     dataset_id='targetDatasetId',
                     table_id='targetTableId',
                     numBytes=1234)
     backup.put()
     # then
     backup_to_check = Backup.get_by_key(backup.key)
     self.assertEqual(backup_to_check.deleted, None)
     self.assertEqual(backup_to_check.created,
                      datetime(2017, 02, 1, 16, 30))
Example #15
0
 def test_should_retrieve_table_using_backup(self):
     # given
     table = Table(project_id='example-proj-name',
                   dataset_id='example-dataset-name',
                   table_id='example-table-name',
                   last_checked=datetime(2017, 02, 1, 16, 30))
     table.put()
     backup = Backup(parent=table.key,
                     last_modified=datetime(2017, 02, 1, 16, 30),
                     created=datetime(2017, 02, 1, 16, 30),
                     dataset_id='targetDatasetId',
                     table_id='targetTableId',
                     numBytes=1234)
     backup.put()
     # then
     backup_entity = Backup.get_by_key(backup.key)
     table_entity = Table.get_table_from_backup(backup_entity)
     self.assertEqual(table_entity, table)
Example #16
0
def create_backup(created_datetime, table=Table(), table_id=None):
    if not table_id:
        table_id = 'targetTable' + str(random.randint(1, 1000000))
    backup_size = random.randint(1, 1000)
    backup = Backup(parent=table.key,
                    created=created_datetime,
                    dataset_id='targetDataset',
                    table_id=table_id,
                    numBytes=backup_size)
    return backup
Example #17
0
def create_backup_daily_sequence(count,
                                 table=Table(),
                                 start_date=datetime(2017, 2, 1, 16, 30)):
    backups = []
    for _ in range(count):
        backup = create_backup(start_date, table)
        backups.append(backup)
        start_date += timedelta(days=1)

    return Backup.sort_backups_by_create_time_desc(backups)
Example #18
0
    def __create_backup(backup_table_metadata, copy_job_results):

        table_entity = Table.get_table_by_reference(
            TableReference.from_bq_table(copy_job_results.source_bq_table))

        if table_entity is None:
            raise DatastoreTableGetRetriableException()

        backup = Backup(parent=table_entity.key,
                        last_modified=copy_job_results.start_time,
                        created=copy_job_results.end_time,
                        dataset_id=copy_job_results.target_dataset_id,
                        table_id=copy_job_results.target_table_id,
                        numBytes=backup_table_metadata.table_size_in_bytes())
        logging.debug(
            "Saving backup to datastore, source:{0}, target:{1}".format(
                copy_job_results.source_bq_table,
                copy_job_results.target_bq_table))

        backup.insert_if_absent(backup)
    def filter(self, backups, table_reference):
        sorted_backups = Backup.sort_backups_by_create_time_desc(backups)

        young_backups, old_backups = BackupAgeDivider.divide_backups_by_age(
            sorted_backups)

        backups_to_retain = young_backups
        if old_backups:
            backups_to_retain.append(old_backups[0])

        return backups_to_retain
Example #20
0
    def test_should_sort_backups_by_create_time_desc(self):
        # given
        b1 = backup_utils.create_backup(datetime(2017, 2, 3, 16, 30))
        b2 = backup_utils.create_backup(datetime(2017, 2, 2, 16, 30))
        b3 = backup_utils.create_backup(datetime(2017, 2, 1, 16, 30))
        expected_list = [b1, b2, b3]

        # when
        sorted_backups = Backup.sort_backups_by_create_time_desc([b1, b3, b2])

        # then
        self.assertListEqual(expected_list, sorted_backups)
Example #21
0
    def perform_retention(self, table_reference, table_key):
        backups = Backup.get_all_backups_sorted(ndb.Key(urlsafe=table_key))
        logging.debug("Fetched %s backups for the table: %s", len(backups),
                      table_reference)

        if not ShouldPerformRetentionPredicate.test(backups):
            return

        logging.info("Retention policy used for table '%s': '%s'",
                     table_reference, type(self.policy).__name__)

        for backup in self.policy\
                .get_backups_eligible_for_deletion(backups=backups,
                                                   table_reference=table_reference):
            self.__delete_backup_in_bq_and_update_datastore(backup)
Example #22
0
 def test_deleting_backup_is_adding_current_timestamp_in_deleted_field(
         self):
     # given
     table = Table(project_id='example-proj-name',
                   dataset_id='example-dataset-name',
                   table_id='example-table-name',
                   last_checked=datetime(2017, 02, 1, 16, 30))
     backup = Backup(parent=table.key,
                     last_modified=datetime(2017, 02, 1, 16, 30),
                     created=datetime(2017, 02, 1, 16, 30),
                     dataset_id='targetDatasetId',
                     table_id='targetTableId',
                     numBytes=1234)
     backup.put()
     # when
     Backup.mark_backup_deleted(backup.key)
     # then
     deleted_backup = Backup.get_by_key(backup.key)
     self.assertEqual(deleted_backup.deleted, datetime(2017, 02, 3, 16, 30))
Example #23
0
 def sortAndAssertListEqual(self, backup_list1, backup_list2):
     sorted_list1 = Backup.sort_backups_by_create_time_desc(backup_list1)
     sorted_list2 = Backup.sort_backups_by_create_time_desc(backup_list2)
     self.assertListEqual(sorted_list1, sorted_list2)
Example #24
0
 def __create_backup_entity(source_table_entity, dataset_id, table_id):
     return Backup(dataset_id=dataset_id,
                   table_id=table_id,
                   parent=source_table_entity.key)
Example #25
0
 def __last_backup_async(self):
     return Backup.query(ancestor=self.key) \
         .filter(Backup.deleted == None) \
         .order(-Backup.created) \
         .get_async()
Example #26
0
 def last_backup_not_newer_than(self, datetime):
     return Backup.query(ancestor=self.key) \
         .filter(Backup.deleted == None) \
         .filter(Backup.created <= datetime) \
         .order(-Backup.created).get()