def test_should_fill_deleted_field_in_backup_entity_if_table_not_found_error_during_deletion(
            self, _):
        # given
        table = Table(project_id='example-proj-name',
                      dataset_id='example-dataset-name',
                      table_id='example-table-name',
                      last_checked=datetime.datetime(2017, 2, 1, 16, 30))
        table.put()
        reference = TableReference.from_table_entity(table)
        backup1 = backup_utils.create_backup(datetime.datetime(
            2017, 2, 1, 16, 30),
                                             table,
                                             table_id="backup1")
        backup2 = backup_utils.create_backup(datetime.datetime(
            2017, 2, 2, 16, 30),
                                             table,
                                             table_id="backup2")
        ndb.put_multi([backup1, backup2])
        self.policy.get_backups_eligible_for_deletion = Mock(
            return_value=[backup1, backup2])

        # when
        self.under_test.perform_retention(reference, table.key.urlsafe())

        # then
        self.assertTrue(Backup.get_by_key(backup1.key).deleted is not None)
        self.assertTrue(Backup.get_by_key(backup2.key).deleted is not None)
Example #2
0
    def test_should_not_insert_two_backup_entities_for_the_same_backup_table(
            self):  # nopep8 pylint: disable=C0301
        # given
        table = Table(project_id='example-proj-name',
                      dataset_id='example-dataset-name',
                      table_id='example-table-name',
                      last_checked=datetime(2017, 02, 1, 16, 30))
        table.put()
        backup_one = Backup(parent=table.key,
                            last_modified=datetime(2017, 02, 1, 16, 30),
                            created=datetime(2017, 02, 1, 16, 30),
                            dataset_id='targetDatasetId',
                            table_id='targetTableId',
                            numBytes=1234)
        backup_two = Backup(parent=table.key,
                            last_modified=datetime(2018, 03, 2, 00, 00),
                            created=datetime(2018, 03, 2, 00, 00),
                            dataset_id='targetDatasetId',
                            table_id='targetTableId',
                            numBytes=1234)

        # when
        Backup.insert_if_absent(backup_one)
        Backup.insert_if_absent(backup_two)
        backups = list(Backup.get_all())

        # then
        self.assertEqual(len(backups), 1)
        self.assertEqual(backup_one.created, backups[0].created)
    def generate_restore_items(cls, project_id, dataset_id, target_project_id,
                               target_dataset_id, max_partition_days):
        if max_partition_days:
            table_entities = Table \
                .get_tables_with_max_partition_days(project_id, dataset_id,
                                                    max_partition_days)
        else:
            table_entities = Table.get_tables(project_id, dataset_id)

        for table_entity_sublist in paginated(1000, table_entities):
            restore_items = []
            for table_entity, backup_entity in Table.get_last_backup_for_tables(
                    table_entity_sublist):
                if backup_entity is not None:
                    source_table_reference = \
                        RestoreTableReference.backup_table_reference(
                            table_entity, backup_entity)

                    target_table_reference = TableReference(
                        target_project_id,
                        target_dataset_id,
                        table_entity.table_id,
                        table_entity.partition_id
                    )

                    restore_item = RestoreItem.create(source_table_reference,
                                                      target_table_reference)
                    restore_items.append(restore_item)

            logging.info("Restore items generator yields %s restore items",
                         len(restore_items))
            yield restore_items
Example #4
0
    def test_should_return_the_same_parent_table_for_child_backups(self):
        # given
        table = Table(project_id='example-proj-name',
                      dataset_id='example-dataset-name',
                      table_id='example-table-name',
                      last_checked=datetime(2017, 02, 1, 16, 30))
        table.put()
        backup_one = Backup(parent=table.key,
                            last_modified=datetime(2017, 02, 1, 16, 30),
                            created=datetime(2017, 02, 1, 16, 30),
                            dataset_id='targetDatasetId',
                            table_id='targetTableId',
                            numBytes=1234)
        backup_two = Backup(parent=table.key,
                            last_modified=datetime(2018, 03, 2, 00, 00),
                            created=datetime(2018, 03, 2, 00, 00),
                            dataset_id='targetDatasetId',
                            table_id='targetTableId',
                            numBytes=1234)

        # when
        table1 = Backup.get_table(backup_one)
        table2 = Backup.get_table(backup_two)

        # then
        self.assertEqual(table, table1)
        self.assertEqual(table1, table2)
    def test_should_disable_partition_expiration_if_backup_table_has_it(
            self, disable_partition_expiration, _, _1, _2, _3, _4, _5):
        # given
        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)

        # then
        self.assertEqual(response.status_int, 200)
        disable_partition_expiration.assert_called_once()
Example #6
0
 def test_that_get_all_backups_sorted_will_return_only_these_with_null_deleted_column(
         self):  # nopep8 pylint: disable=C0301, W0613
     # given
     table = Table(project_id='example-proj-name',
                   dataset_id='example-dataset-name',
                   table_id='example-table-name',
                   last_checked=datetime(2017, 02, 1, 16, 30))
     table.put()
     backup1 = Backup(
         parent=table.key,
         last_modified=datetime(2017, 02, 1, 16, 30),
         created=datetime(2017, 02, 1, 16, 30),
         dataset_id='backup_dataset',
         table_id='backup1',
         numBytes=1234,
     )
     backup1.put()
     backup2 = Backup(parent=table.key,
                      last_modified=datetime(2017, 02, 1, 16, 30),
                      created=datetime(2017, 02, 1, 16, 30),
                      dataset_id='backup_dataset',
                      table_id='backup2',
                      numBytes=1234,
                      deleted=datetime(2017, 02, 10, 16, 30))
     backup2.put()
     # when
     existing_backups = Backup.get_all_backups_sorted(table.key)
     # then
     self.assertTrue(backup1 in existing_backups)
     self.assertTrue(backup2 not in existing_backups)
Example #7
0
    def test_that_last_checked_date_is_updated_even_if_table_should_not_be_backed_up( # nopep8 pylint: disable=C0301
            self, copy_table, _1, _2):
        # given
        table = Table(project_id="test-project",
                      dataset_id="test-dataset",
                      table_id="test-table",
                      last_checked=datetime.datetime(2017, 3, 3))

        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table")

        # when
        table.put()

        BackupProcess(table_reference, self.big_query,
                      self.big_query_table_metadata).start()

        table_entity = Table.get_table("test-project", "test-dataset",
                                       "test-table")

        # then
        self.assertEqual(table_entity.last_checked,
                         datetime.datetime(2017, 04, 4))
        copy_table.assert_not_called()
 def __create_table_without_backup(project_id, dataset_id):
     table_without_backup = Table(project_id=project_id,
                                  dataset_id=dataset_id,
                                  table_id='table_id_without_backup',
                                  partition_id=None,
                                  last_checked=datetime.now())
     table_without_backup.put()
 def __get_tables(self, project_id, dataset_id, max_partition_days):
     if max_partition_days is None:
         return Table.get_tables(project_id, dataset_id, page_size=20)
     else:
         return Table.get_tables_with_max_partition_days(project_id,
                                                         dataset_id,
                                                         max_partition_days,
                                                         page_size=20)
 def __create_table_without_backups():
     partition_id = "partitionIdWithoutBackup"
     table = Table(project_id=PROJECT_ID,
                   dataset_id=DATASET_ID,
                   table_id=TABLE_ID,
                   partition_id=partition_id,
                   last_checked=NOW)
     table.put()
Example #11
0
 def _create_table_entity(table_id,
                          partition_id=None,
                          last_checked=datetime.datetime.now()):
     non_partitioned_table = Table(project_id='example-proj-name',
                                   dataset_id='example-dataset-name',
                                   table_id=table_id,
                                   partition_id=partition_id,
                                   last_checked=last_checked)
     non_partitioned_table.put()
    def __create_table_with_two_backups():
        table = Table(project_id=PROJECT_ID,
                      dataset_id=DATASET_ID,
                      table_id=TABLE_ID,
                      partition_id=PARTITION_ID,
                      last_checked=NOW)
        table.put()

        backup_utils.create_backup(NOW, table, BACKUP_TABLE_ID_FROM_NOW).put()
        backup_utils.create_backup(OLD_TIME, table,
                                   BACKUP_TABLE_ID_FROM_OLD_TIME).put()
Example #13
0
 def __create_table_entity(self):
     logging.info(
         "Creating table entity for %s",
         TableReference(self.project_id, self.dataset_id, self.table_id,
                        self.partition_id))
     table_entity = Table(project_id=self.project_id,
                          dataset_id=self.dataset_id,
                          table_id=self.table_id,
                          partition_id=self.partition_id,
                          last_checked=self.now)
     table_entity.put()
     return table_entity
    def test_should_not_perform_retention_if_no_backups(self, delete_table):
        # given
        table = Table(project_id='example-proj-name',
                      dataset_id='example-dataset-name',
                      table_id='example-table-name',
                      last_checked=datetime.datetime(2017, 2, 1, 16, 30))
        table.put()
        reference = TableReference.from_table_entity(table)

        # when
        self.under_test.perform_retention(reference, table.key.urlsafe())

        # then
        delete_table.assert_not_called()
    def test_should_create_datastore_backup_entity(self, _create_http, _):
        # given
        _create_http.return_value = HttpMockSequence([
            ({
                'status': '200'
            }, content('tests/json_samples/bigquery_v2_test_schema.json')),
            ({
                'status': '200'
            },
             content('tests/json_samples/table_get/'
                     'bigquery_partitioned_table_get.json'))
        ])

        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)
        copy_job_result = CopyJobResult(json.loads(payload).get('jobJson'))

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)
        backup = table_entity.last_backup

        # then
        self.assertEqual(response.status_int, 200)
        self.assertEqual(backup.dataset_id, "target_dataset_id")
        self.assertEqual(backup.table_id, "target_table_id")
        self.assertTrue(isinstance(backup.created, datetime))
        self.assertEqual(backup.created, copy_job_result.end_time)

        self.assertTrue(isinstance(backup.last_modified, datetime))
        self.assertEqual(backup.last_modified, copy_job_result.start_time)
Example #16
0
def create_and_insert_table_with_one_backup(project_id,
                                            dataset_id,
                                            table_id,
                                            date,
                                            partition_id=None):
    table = Table(project_id=project_id,
                  dataset_id=dataset_id,
                  table_id=table_id,
                  partition_id=partition_id,
                  last_checked=date)

    table.put()
    backup_utils.create_backup(date, table,
                               table_id + date.strftime('%Y%m%d')).put()

    return table
Example #17
0
    def test_should_schedule_using_cursor(self):
        # given
        self._create_table_entity('non_partitioned_table1')
        self._create_table_entity('non_partitioned_table2')

        age_threshold_datetime = datetime.datetime.today() - relativedelta(
            months=(configuration.
                    grace_period_after_source_table_deletion_in_months + 1))

        _, cursor, _1 = Table.query() \
            .filter(Table.last_checked >= age_threshold_datetime) \
            .order(Table.last_checked, Table.key) \
            .fetch_page(
            page_size=1,
        )

        # when
        OrganizationRetention.schedule_retention_tasks_starting_from_cursor(
            cursor)
        # then
        tasks = self.taskqueue_stub.get_filtered_tasks()
        self.assertEqual(len(tasks), 1)
        self.assertTrue(tasks[0].url.startswith(
            '/tasks/retention/table'
            '?projectId=example-proj-name'),
                        msg='Actual url: {}'.format(tasks[0].url))
Example #18
0
 def __get_source_table_entity(backup_entity):
     source_table_entity = Table.get_table_from_backup(backup_entity)
     if not source_table_entity:
         error_message = "Backup ancestor doesn't exists: '{}:{}'. " \
             .format(backup_entity.dataset_id,
                     backup_entity.table_id)
         raise ParameterValidationException(error_message)
     return source_table_entity
    def test_should_not_create_backups_entity_if_backup_table_doesnt_exist(
            self, _create_http, error_reporting, _):
        # given
        _create_http.return_value = HttpMockSequence([
            ({
                'status': '200'
            }, content('tests/json_samples/bigquery_v2_test_schema.json')),
            (
                {
                    'status': '404'
                },  # Table not found
                content('tests/json_samples/table_get/'
                        'bigquery_partitioned_table_get.json'))
        ])

        table_entity = Table(project_id="source_project_id",
                             dataset_id="source_dataset_id",
                             table_id="source_table_id",
                             partition_id="123")
        table_entity.put()

        source_bq_table = TableReference.from_table_entity(
            table_entity).create_big_query_table()
        destination_bq_table = BigQueryTable("target_project_id",
                                             "target_dataset_id",
                                             "target_table_id")
        data = {
            "sourceBqTable": source_bq_table,
            "targetBqTable": destination_bq_table
        }
        payload = json.dumps({
            "data": data,
            "jobJson": JobResultExample.DONE
        },
                             cls=RequestEncoder)

        # when
        response = self.under_test.post(
            '/callback/backup-created/project/dataset/table', params=payload)
        backup = table_entity.last_backup

        # then
        self.assertEqual(response.status_int, 200)
        self.assertIsNone(backup)
        error_reporting.assert_called_once()
Example #20
0
 def test_should_retrieve_table_using_backup(self):
     # given
     table = Table(project_id='example-proj-name',
                   dataset_id='example-dataset-name',
                   table_id='example-table-name',
                   last_checked=datetime(2017, 02, 1, 16, 30))
     table.put()
     backup = Backup(parent=table.key,
                     last_modified=datetime(2017, 02, 1, 16, 30),
                     created=datetime(2017, 02, 1, 16, 30),
                     dataset_id='targetDatasetId',
                     table_id='targetTableId',
                     numBytes=1234)
     backup.put()
     # then
     backup_entity = Backup.get_by_key(backup.key)
     table_entity = Table.get_table_from_backup(backup_entity)
     self.assertEqual(table_entity, table)
Example #21
0
def create_backup(created_datetime, table=Table(), table_id=None):
    if not table_id:
        table_id = 'targetTable' + str(random.randint(1, 1000000))
    backup_size = random.randint(1, 1000)
    backup = Backup(parent=table.key,
                    created=created_datetime,
                    dataset_id='targetDataset',
                    table_id=table_id,
                    numBytes=backup_size)
    return backup
Example #22
0
def create_backup_daily_sequence(count,
                                 table=Table(),
                                 start_date=datetime(2017, 2, 1, 16, 30)):
    backups = []
    for _ in range(count):
        backup = create_backup(start_date, table)
        backups.append(backup)
        start_date += timedelta(days=1)

    return Backup.sort_backups_by_create_time_desc(backups)
Example #23
0
 def __get_table_entity(table_reference):
     table = Table.get_table(table_reference.project_id,
                             table_reference.dataset_id,
                             table_reference.table_id,
                             table_reference.partition_id)
     if table is None:
         raise NotFoundException(
             'Table not found in datastore: {}'.format(table_reference))
     logging.info("Datastore table: %s", table)
     return table
Example #24
0
    def test_copy_job_and_entity_in_datastore_for_single_partition_of_a_table(
            self, _, _1, _2):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id="20170330")

        # when
        BackupProcess(table_reference, self.big_query,
                      self.big_query_table_metadata).start()

        ancestor_of_partition = Table.get_table("test-project", "test-dataset",
                                                "test-table")
        partition = Table.get_table("test-project", "test-dataset",
                                    "test-table", "20170330")

        # then
        self.assertIsNotNone(partition)
        self.assertIsNone(ancestor_of_partition)
Example #25
0
def create_multiple_table_entities(quantity,
                                   project_id,
                                   partition_id,
                                   dataset_id='example-dataset-name'):
    tables = []
    for i in range(1, quantity + 1):
        table = Table(project_id=project_id,
                      dataset_id=dataset_id,
                      table_id='example-table-name-{}'.format(i),
                      partition_id=partition_id,
                      last_checked=datetime(2017, 12, 5))
        tables.append(table)
    ndb.put_multi(tables)
Example #26
0
    def start(self):
        self.now = datetime.datetime.utcnow()

        table_entity = Table.get_table(self.project_id, self.dataset_id,
                                       self.table_id, self.partition_id)

        if self.__backup_ever_done(table_entity):
            self.__update_last_check(table_entity)
            if self.__should_backup(table_entity):
                self.__create_backup(table_entity)
        else:
            if self.__should_backup(table_entity):
                table_entity = self.__create_table_entity()
                self.__create_backup(table_entity)
Example #27
0
    def test_that_last_checked_date_is_updated_when_backup_is_processed(
            self, _, _1, _2):
        # given
        table = Table(project_id="test-project",
                      dataset_id="test-dataset",
                      table_id="test-table",
                      last_checked=datetime.datetime(2017, 3, 3))

        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table")

        # when
        table.put()

        BackupProcess(table_reference, self.big_query,
                      self.big_query_table_metadata).start()

        table_entity = Table.get_table("test-project", "test-dataset",
                                       "test-table")

        # then
        self.assertEqual(table_entity.last_checked,
                         datetime.datetime(2017, 04, 4))
    def setUp(self):
        self.initTestBedForDatastore()

        self.table = Table(
            project_id="p1",
            dataset_id="d1",
            table_id="t1"
        )

        self.big_query_table_metadata = BigQueryTableMetadata({})
        patch('src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata.is_empty',
              return_value=False).start()
        patch('src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata.'
              'is_external_or_view_type', return_value=False).start()
        patch('src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata.'
              'is_schema_defined', return_value=True).start()
Example #29
0
    def test_copy_job_and_entity_in_datastore_for_not_partitioned_table(
            self, _, _1, _2):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table")

        # when
        BackupProcess(table_reference, self.big_query,
                      self.big_query_table_metadata).start()

        table_entity = Table.get_table("test-project", "test-dataset",
                                       "test-table")

        # then
        self.assertIsNotNone(table_entity)
Example #30
0
    def test_that_copy_job_and_entity_in_datastore_is_created_if_empty_partitioned_table( # nopep8 pylint: disable=C0301
            self, create_backup, _, _1):
        # given
        table_reference = TableReference(project_id="test-project",
                                         dataset_id="test-dataset",
                                         table_id="test-table",
                                         partition_id=None)

        # when
        BackupProcess(table_reference, self.big_query,
                      self.big_query_table_metadata).start()
        table_in_datastore = Table.get_table("test-project", "test-dataset",
                                             "test-table")

        # then
        create_backup.assert_called_once()
        self.assertIsNotNone(table_in_datastore)