def test_should_fill_deleted_field_in_backup_entity_if_table_not_found_error_during_deletion( self, _): # given table = Table(project_id='example-proj-name', dataset_id='example-dataset-name', table_id='example-table-name', last_checked=datetime.datetime(2017, 2, 1, 16, 30)) table.put() reference = TableReference.from_table_entity(table) backup1 = backup_utils.create_backup(datetime.datetime( 2017, 2, 1, 16, 30), table, table_id="backup1") backup2 = backup_utils.create_backup(datetime.datetime( 2017, 2, 2, 16, 30), table, table_id="backup2") ndb.put_multi([backup1, backup2]) self.policy.get_backups_eligible_for_deletion = Mock( return_value=[backup1, backup2]) # when self.under_test.perform_retention(reference, table.key.urlsafe()) # then self.assertTrue(Backup.get_by_key(backup1.key).deleted is not None) self.assertTrue(Backup.get_by_key(backup2.key).deleted is not None)
def test_should_not_insert_two_backup_entities_for_the_same_backup_table( self): # nopep8 pylint: disable=C0301 # given table = Table(project_id='example-proj-name', dataset_id='example-dataset-name', table_id='example-table-name', last_checked=datetime(2017, 02, 1, 16, 30)) table.put() backup_one = Backup(parent=table.key, last_modified=datetime(2017, 02, 1, 16, 30), created=datetime(2017, 02, 1, 16, 30), dataset_id='targetDatasetId', table_id='targetTableId', numBytes=1234) backup_two = Backup(parent=table.key, last_modified=datetime(2018, 03, 2, 00, 00), created=datetime(2018, 03, 2, 00, 00), dataset_id='targetDatasetId', table_id='targetTableId', numBytes=1234) # when Backup.insert_if_absent(backup_one) Backup.insert_if_absent(backup_two) backups = list(Backup.get_all()) # then self.assertEqual(len(backups), 1) self.assertEqual(backup_one.created, backups[0].created)
def generate_restore_items(cls, project_id, dataset_id, target_project_id, target_dataset_id, max_partition_days): if max_partition_days: table_entities = Table \ .get_tables_with_max_partition_days(project_id, dataset_id, max_partition_days) else: table_entities = Table.get_tables(project_id, dataset_id) for table_entity_sublist in paginated(1000, table_entities): restore_items = [] for table_entity, backup_entity in Table.get_last_backup_for_tables( table_entity_sublist): if backup_entity is not None: source_table_reference = \ RestoreTableReference.backup_table_reference( table_entity, backup_entity) target_table_reference = TableReference( target_project_id, target_dataset_id, table_entity.table_id, table_entity.partition_id ) restore_item = RestoreItem.create(source_table_reference, target_table_reference) restore_items.append(restore_item) logging.info("Restore items generator yields %s restore items", len(restore_items)) yield restore_items
def test_should_return_the_same_parent_table_for_child_backups(self): # given table = Table(project_id='example-proj-name', dataset_id='example-dataset-name', table_id='example-table-name', last_checked=datetime(2017, 02, 1, 16, 30)) table.put() backup_one = Backup(parent=table.key, last_modified=datetime(2017, 02, 1, 16, 30), created=datetime(2017, 02, 1, 16, 30), dataset_id='targetDatasetId', table_id='targetTableId', numBytes=1234) backup_two = Backup(parent=table.key, last_modified=datetime(2018, 03, 2, 00, 00), created=datetime(2018, 03, 2, 00, 00), dataset_id='targetDatasetId', table_id='targetTableId', numBytes=1234) # when table1 = Backup.get_table(backup_one) table2 = Backup.get_table(backup_two) # then self.assertEqual(table, table1) self.assertEqual(table1, table2)
def test_should_disable_partition_expiration_if_backup_table_has_it( self, disable_partition_expiration, _, _1, _2, _3, _4, _5): # given table_entity = Table(project_id="source_project_id", dataset_id="source_dataset_id", table_id="source_table_id", partition_id="123") table_entity.put() source_bq_table = TableReference.from_table_entity( table_entity).create_big_query_table() destination_bq_table = BigQueryTable("target_project_id", "target_dataset_id", "target_table_id") data = { "sourceBqTable": source_bq_table, "targetBqTable": destination_bq_table } payload = json.dumps({ "data": data, "jobJson": JobResultExample.DONE }, cls=RequestEncoder) # when response = self.under_test.post( '/callback/backup-created/project/dataset/table', params=payload) # then self.assertEqual(response.status_int, 200) disable_partition_expiration.assert_called_once()
def test_that_get_all_backups_sorted_will_return_only_these_with_null_deleted_column( self): # nopep8 pylint: disable=C0301, W0613 # given table = Table(project_id='example-proj-name', dataset_id='example-dataset-name', table_id='example-table-name', last_checked=datetime(2017, 02, 1, 16, 30)) table.put() backup1 = Backup( parent=table.key, last_modified=datetime(2017, 02, 1, 16, 30), created=datetime(2017, 02, 1, 16, 30), dataset_id='backup_dataset', table_id='backup1', numBytes=1234, ) backup1.put() backup2 = Backup(parent=table.key, last_modified=datetime(2017, 02, 1, 16, 30), created=datetime(2017, 02, 1, 16, 30), dataset_id='backup_dataset', table_id='backup2', numBytes=1234, deleted=datetime(2017, 02, 10, 16, 30)) backup2.put() # when existing_backups = Backup.get_all_backups_sorted(table.key) # then self.assertTrue(backup1 in existing_backups) self.assertTrue(backup2 not in existing_backups)
def test_that_last_checked_date_is_updated_even_if_table_should_not_be_backed_up( # nopep8 pylint: disable=C0301 self, copy_table, _1, _2): # given table = Table(project_id="test-project", dataset_id="test-dataset", table_id="test-table", last_checked=datetime.datetime(2017, 3, 3)) table_reference = TableReference(project_id="test-project", dataset_id="test-dataset", table_id="test-table") # when table.put() BackupProcess(table_reference, self.big_query, self.big_query_table_metadata).start() table_entity = Table.get_table("test-project", "test-dataset", "test-table") # then self.assertEqual(table_entity.last_checked, datetime.datetime(2017, 04, 4)) copy_table.assert_not_called()
def __create_table_without_backup(project_id, dataset_id): table_without_backup = Table(project_id=project_id, dataset_id=dataset_id, table_id='table_id_without_backup', partition_id=None, last_checked=datetime.now()) table_without_backup.put()
def __get_tables(self, project_id, dataset_id, max_partition_days): if max_partition_days is None: return Table.get_tables(project_id, dataset_id, page_size=20) else: return Table.get_tables_with_max_partition_days(project_id, dataset_id, max_partition_days, page_size=20)
def __create_table_without_backups(): partition_id = "partitionIdWithoutBackup" table = Table(project_id=PROJECT_ID, dataset_id=DATASET_ID, table_id=TABLE_ID, partition_id=partition_id, last_checked=NOW) table.put()
def _create_table_entity(table_id, partition_id=None, last_checked=datetime.datetime.now()): non_partitioned_table = Table(project_id='example-proj-name', dataset_id='example-dataset-name', table_id=table_id, partition_id=partition_id, last_checked=last_checked) non_partitioned_table.put()
def __create_table_with_two_backups(): table = Table(project_id=PROJECT_ID, dataset_id=DATASET_ID, table_id=TABLE_ID, partition_id=PARTITION_ID, last_checked=NOW) table.put() backup_utils.create_backup(NOW, table, BACKUP_TABLE_ID_FROM_NOW).put() backup_utils.create_backup(OLD_TIME, table, BACKUP_TABLE_ID_FROM_OLD_TIME).put()
def __create_table_entity(self): logging.info( "Creating table entity for %s", TableReference(self.project_id, self.dataset_id, self.table_id, self.partition_id)) table_entity = Table(project_id=self.project_id, dataset_id=self.dataset_id, table_id=self.table_id, partition_id=self.partition_id, last_checked=self.now) table_entity.put() return table_entity
def test_should_not_perform_retention_if_no_backups(self, delete_table): # given table = Table(project_id='example-proj-name', dataset_id='example-dataset-name', table_id='example-table-name', last_checked=datetime.datetime(2017, 2, 1, 16, 30)) table.put() reference = TableReference.from_table_entity(table) # when self.under_test.perform_retention(reference, table.key.urlsafe()) # then delete_table.assert_not_called()
def test_should_create_datastore_backup_entity(self, _create_http, _): # given _create_http.return_value = HttpMockSequence([ ({ 'status': '200' }, content('tests/json_samples/bigquery_v2_test_schema.json')), ({ 'status': '200' }, content('tests/json_samples/table_get/' 'bigquery_partitioned_table_get.json')) ]) table_entity = Table(project_id="source_project_id", dataset_id="source_dataset_id", table_id="source_table_id", partition_id="123") table_entity.put() source_bq_table = TableReference.from_table_entity( table_entity).create_big_query_table() destination_bq_table = BigQueryTable("target_project_id", "target_dataset_id", "target_table_id") data = { "sourceBqTable": source_bq_table, "targetBqTable": destination_bq_table } payload = json.dumps({ "data": data, "jobJson": JobResultExample.DONE }, cls=RequestEncoder) copy_job_result = CopyJobResult(json.loads(payload).get('jobJson')) # when response = self.under_test.post( '/callback/backup-created/project/dataset/table', params=payload) backup = table_entity.last_backup # then self.assertEqual(response.status_int, 200) self.assertEqual(backup.dataset_id, "target_dataset_id") self.assertEqual(backup.table_id, "target_table_id") self.assertTrue(isinstance(backup.created, datetime)) self.assertEqual(backup.created, copy_job_result.end_time) self.assertTrue(isinstance(backup.last_modified, datetime)) self.assertEqual(backup.last_modified, copy_job_result.start_time)
def create_and_insert_table_with_one_backup(project_id, dataset_id, table_id, date, partition_id=None): table = Table(project_id=project_id, dataset_id=dataset_id, table_id=table_id, partition_id=partition_id, last_checked=date) table.put() backup_utils.create_backup(date, table, table_id + date.strftime('%Y%m%d')).put() return table
def test_should_schedule_using_cursor(self): # given self._create_table_entity('non_partitioned_table1') self._create_table_entity('non_partitioned_table2') age_threshold_datetime = datetime.datetime.today() - relativedelta( months=(configuration. grace_period_after_source_table_deletion_in_months + 1)) _, cursor, _1 = Table.query() \ .filter(Table.last_checked >= age_threshold_datetime) \ .order(Table.last_checked, Table.key) \ .fetch_page( page_size=1, ) # when OrganizationRetention.schedule_retention_tasks_starting_from_cursor( cursor) # then tasks = self.taskqueue_stub.get_filtered_tasks() self.assertEqual(len(tasks), 1) self.assertTrue(tasks[0].url.startswith( '/tasks/retention/table' '?projectId=example-proj-name'), msg='Actual url: {}'.format(tasks[0].url))
def __get_source_table_entity(backup_entity): source_table_entity = Table.get_table_from_backup(backup_entity) if not source_table_entity: error_message = "Backup ancestor doesn't exists: '{}:{}'. " \ .format(backup_entity.dataset_id, backup_entity.table_id) raise ParameterValidationException(error_message) return source_table_entity
def test_should_not_create_backups_entity_if_backup_table_doesnt_exist( self, _create_http, error_reporting, _): # given _create_http.return_value = HttpMockSequence([ ({ 'status': '200' }, content('tests/json_samples/bigquery_v2_test_schema.json')), ( { 'status': '404' }, # Table not found content('tests/json_samples/table_get/' 'bigquery_partitioned_table_get.json')) ]) table_entity = Table(project_id="source_project_id", dataset_id="source_dataset_id", table_id="source_table_id", partition_id="123") table_entity.put() source_bq_table = TableReference.from_table_entity( table_entity).create_big_query_table() destination_bq_table = BigQueryTable("target_project_id", "target_dataset_id", "target_table_id") data = { "sourceBqTable": source_bq_table, "targetBqTable": destination_bq_table } payload = json.dumps({ "data": data, "jobJson": JobResultExample.DONE }, cls=RequestEncoder) # when response = self.under_test.post( '/callback/backup-created/project/dataset/table', params=payload) backup = table_entity.last_backup # then self.assertEqual(response.status_int, 200) self.assertIsNone(backup) error_reporting.assert_called_once()
def test_should_retrieve_table_using_backup(self): # given table = Table(project_id='example-proj-name', dataset_id='example-dataset-name', table_id='example-table-name', last_checked=datetime(2017, 02, 1, 16, 30)) table.put() backup = Backup(parent=table.key, last_modified=datetime(2017, 02, 1, 16, 30), created=datetime(2017, 02, 1, 16, 30), dataset_id='targetDatasetId', table_id='targetTableId', numBytes=1234) backup.put() # then backup_entity = Backup.get_by_key(backup.key) table_entity = Table.get_table_from_backup(backup_entity) self.assertEqual(table_entity, table)
def create_backup(created_datetime, table=Table(), table_id=None): if not table_id: table_id = 'targetTable' + str(random.randint(1, 1000000)) backup_size = random.randint(1, 1000) backup = Backup(parent=table.key, created=created_datetime, dataset_id='targetDataset', table_id=table_id, numBytes=backup_size) return backup
def create_backup_daily_sequence(count, table=Table(), start_date=datetime(2017, 2, 1, 16, 30)): backups = [] for _ in range(count): backup = create_backup(start_date, table) backups.append(backup) start_date += timedelta(days=1) return Backup.sort_backups_by_create_time_desc(backups)
def __get_table_entity(table_reference): table = Table.get_table(table_reference.project_id, table_reference.dataset_id, table_reference.table_id, table_reference.partition_id) if table is None: raise NotFoundException( 'Table not found in datastore: {}'.format(table_reference)) logging.info("Datastore table: %s", table) return table
def test_copy_job_and_entity_in_datastore_for_single_partition_of_a_table( self, _, _1, _2): # given table_reference = TableReference(project_id="test-project", dataset_id="test-dataset", table_id="test-table", partition_id="20170330") # when BackupProcess(table_reference, self.big_query, self.big_query_table_metadata).start() ancestor_of_partition = Table.get_table("test-project", "test-dataset", "test-table") partition = Table.get_table("test-project", "test-dataset", "test-table", "20170330") # then self.assertIsNotNone(partition) self.assertIsNone(ancestor_of_partition)
def create_multiple_table_entities(quantity, project_id, partition_id, dataset_id='example-dataset-name'): tables = [] for i in range(1, quantity + 1): table = Table(project_id=project_id, dataset_id=dataset_id, table_id='example-table-name-{}'.format(i), partition_id=partition_id, last_checked=datetime(2017, 12, 5)) tables.append(table) ndb.put_multi(tables)
def start(self): self.now = datetime.datetime.utcnow() table_entity = Table.get_table(self.project_id, self.dataset_id, self.table_id, self.partition_id) if self.__backup_ever_done(table_entity): self.__update_last_check(table_entity) if self.__should_backup(table_entity): self.__create_backup(table_entity) else: if self.__should_backup(table_entity): table_entity = self.__create_table_entity() self.__create_backup(table_entity)
def test_that_last_checked_date_is_updated_when_backup_is_processed( self, _, _1, _2): # given table = Table(project_id="test-project", dataset_id="test-dataset", table_id="test-table", last_checked=datetime.datetime(2017, 3, 3)) table_reference = TableReference(project_id="test-project", dataset_id="test-dataset", table_id="test-table") # when table.put() BackupProcess(table_reference, self.big_query, self.big_query_table_metadata).start() table_entity = Table.get_table("test-project", "test-dataset", "test-table") # then self.assertEqual(table_entity.last_checked, datetime.datetime(2017, 04, 4))
def setUp(self): self.initTestBedForDatastore() self.table = Table( project_id="p1", dataset_id="d1", table_id="t1" ) self.big_query_table_metadata = BigQueryTableMetadata({}) patch('src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata.is_empty', return_value=False).start() patch('src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata.' 'is_external_or_view_type', return_value=False).start() patch('src.commons.big_query.big_query_table_metadata.BigQueryTableMetadata.' 'is_schema_defined', return_value=True).start()
def test_copy_job_and_entity_in_datastore_for_not_partitioned_table( self, _, _1, _2): # given table_reference = TableReference(project_id="test-project", dataset_id="test-dataset", table_id="test-table") # when BackupProcess(table_reference, self.big_query, self.big_query_table_metadata).start() table_entity = Table.get_table("test-project", "test-dataset", "test-table") # then self.assertIsNotNone(table_entity)
def test_that_copy_job_and_entity_in_datastore_is_created_if_empty_partitioned_table( # nopep8 pylint: disable=C0301 self, create_backup, _, _1): # given table_reference = TableReference(project_id="test-project", dataset_id="test-dataset", table_id="test-table", partition_id=None) # when BackupProcess(table_reference, self.big_query, self.big_query_table_metadata).start() table_in_datastore = Table.get_table("test-project", "test-dataset", "test-table") # then create_backup.assert_called_once() self.assertIsNotNone(table_in_datastore)