class TestTenYoungBackupVersionsFilter(unittest.TestCase): def setUp(self): self.under_test = TenYoungBackupVersionsFilter() @parameterized.expand([[0], [1], [2], [5], [7], [10]]) @freeze_time("2019-08-01") def test_should_not_filter_out_if_there_is_10_or_less_young_table_backups( self, count): # given reference = TableReference('example-project-id', 'example-dataset-id', 'example-table-id') backups = backup_utils.create_backup_daily_sequence( count, start_date=datetime(2019, 7, 1)) # when backups_to_retain = self.under_test.filter(list(backups), reference) # then self.assertListEqual(backups_to_retain, backups) @parameterized.expand([[ TableReference('example-project-id', 'example-dataset-id', 'example-table-id') ], [ TableReference('example-project-id', 'example-dataset-id', 'example-table-id', '20170601') ]]) @freeze_time("2019-08-01") def test_should_filter_out_young_backups_above_10_version(self, reference): # given backups = backup_utils.create_backup_daily_sequence( 14, start_date=datetime(2019, 7, 1)) expected_retained_backups = backups[:10] shuffle(backups) # when backups_to_retain = self.under_test.filter(backups, reference) # then self.assertListEqual(backups_to_retain, expected_retained_backups) @freeze_time("2019-09-02") def test_should_filter_out_young_backups_above_10_version_but_retain_old_backups( self): # given reference = TableReference('example-project-id', 'example-dataset-id', 'example-table-id') young_backups = backup_utils.create_backup_daily_sequence( 14, start_date=datetime(2019, 8, 15)) old_backups = backup_utils.create_backup_daily_sequence( 3, start_date=datetime(2019, 1, 1)) all_backups = list(young_backups + old_backups) expected_retained_backups = list(young_backups[:10] + old_backups) shuffle(all_backups) # when backups_to_retain = self.under_test.filter(all_backups, reference) # then self.assertListEqual(backups_to_retain, expected_retained_backups)
def __create_non_partitioned_table_references(self): source_table_reference = TableReference(SOURCE_PROJECT_ID, SOURCE_DATASET_ID, SOURCE_TABLE_ID, None) target_table_reference = TableReference(TARGET_PROJECT_ID, TARGET_DATASET_ID, TARGET_TABLE_ID, None) return source_table_reference, target_table_reference
def test_parse_tab_ref(self): #when actual_table_ref = TableReference.parse_tab_ref( "proj321:dataset123.tableabc") #then self.assertEqual(TableReference("proj321", "dataset123", "tableabc"), actual_table_ref)
def test_parse_tab_ref_for_partitioned_table(self): #when actual_table_ref = TableReference.parse_tab_ref( "proj321:dataset123.tableabc$20180226") #then self.assertEqual( TableReference("proj321", "dataset123", "tableabc", "20180226"), actual_table_ref)
def table_reference(self): table_reference = self.table_metadata['tableReference'] if self.is_partition(): return TableReference(table_reference['projectId'], table_reference['datasetId'], self.get_table_id(), self.get_partition_id()) else: return TableReference(table_reference['projectId'], table_reference['datasetId'], table_reference['tableId'])
def test_restore_item_default_state_is_in_progress(self): # given source_table = TableReference(project_id='source_project_id', dataset_id='source_dataset_id', table_id='source_table_id') target_table = TableReference(project_id='target_project_id', dataset_id='target_dataset_id', table_id='target_table_id') # when result = RestoreItem.create(source_table, target_table) # then self.assertEqual(RestoreItem.STATUS_IN_PROGRESS, result.status)
def __create_restore_items(count=1): result = [] for i in range(0, count): source_table_reference = TableReference( "source_project_id_" + str(i), "source_dataset_id_" + str(i), "source_table_id_" + str(i), "source_partition_id_" + str(i)) target_table_reference = TableReference( "target_project_id_" + str(i), "target_dataset_id_" + str(i), "target_table_id_" + str(i), "target_partition_id_" + str(i)) restore_item = RestoreItem.create(source_table_reference, target_table_reference) result.append( (restore_item, source_table_reference, target_table_reference)) return result
def test_on_demand_request_for_non_partitioned_table_is_properly_parsed( self, on_demand_table_backup_start): # given table_reference = TableReference('example-proj-name', 'example-dataset-name', 'example-table-name') url = '/tasks/backups/on_demand/table/{}/{}/{}'.format( table_reference.get_project_id(), table_reference.get_dataset_id(), table_reference.get_table_id()) # when self.under_test.get(url) # then on_demand_table_backup_start.assert_called_with(table_reference)
def test_on_demand_request_for_partitioned_but_without_passing_partition_should_casue_400( self, on_demand_table_backup_start): # given table_reference = TableReference('example-proj-name', 'example-dataset-name', 'example-table-name') url = '/tasks/backups/on_demand/table/{}/{}/{}'.format( table_reference.get_project_id(), table_reference.get_dataset_id(), table_reference.get_table_id()) # when response = self.under_test.get(url, expect_errors=True) # then self.assertEquals(400, response.status_int)
def test_table_str(self): # given table = TableReference("project1", "dataset1", "table1") # when table_string = str(table) # then self.assertEqual(table_string, "project1:dataset1.table1")
def test_should_disable_partition_expiration_if_backup_table_has_it( self, disable_partition_expiration, _, _1, _2, _3, _4, _5): # given table_entity = Table(project_id="source_project_id", dataset_id="source_dataset_id", table_id="source_table_id", partition_id="123") table_entity.put() source_bq_table = TableReference.from_table_entity( table_entity).create_big_query_table() destination_bq_table = BigQueryTable("target_project_id", "target_dataset_id", "target_table_id") data = { "sourceBqTable": source_bq_table, "targetBqTable": destination_bq_table } payload = json.dumps({ "data": data, "jobJson": JobResultExample.DONE }, cls=RequestEncoder) # when response = self.under_test.post( '/callback/backup-created/project/dataset/table', params=payload) # then self.assertEqual(response.status_int, 200) disable_partition_expiration.assert_called_once()
def test_schedule(self, schedule_tasks_for_partition_backup): # given project_id = "test-project" dataset_id = "test-dataset" table_id = "test-table" partition_id_1 = "20170330" partition_id_2 = "20170331" table_reference = TableReference(project_id=project_id, dataset_id=dataset_id, table_id=table_id, partition_id=None) big_query = Mock() # when big_query.list_table_partitions.return_value = [{ "partitionId": partition_id_1 }, { "partitionId": partition_id_2 }] TablePartitionsBackupScheduler(table_reference, big_query).start() # then schedule_tasks_for_partition_backup.assert_has_calls([ call(project_id, dataset_id, table_id, [partition_id_1, partition_id_2]) ])
def get(self, project_id, dataset_id, table_id): partition_id = self.request.get('partitionId', None) is_restore_to_source_project = self.request.get( 'isRestoreToSourceProject', None) target_dataset_id = self.request.get('targetDatasetId', None) create_disposition = self.request.get('createDisposition', None) write_disposition = self.request.get('writeDisposition', None) target_project_id = None if is_restore_to_source_project \ else configuration.default_restoration_project_id validators.validate_restore_request_params( target_project_id=target_project_id, target_dataset_id=target_dataset_id, create_disposition=create_disposition, write_disposition=write_disposition) restoration_datetime = self.__get_restoration_datetime() table_reference = TableReference(project_id, dataset_id, table_id, partition_id) restore_data = TableRestoreService.restore( table_reference, target_project_id, target_dataset_id, create_disposition, write_disposition, restoration_datetime) self._finish_with_success(restore_data)
def __process(self, request_body_json): copy_job_results = CopyJobResult(request_body_json.get('jobJson')) data = request_body_json.get('data') if copy_job_results.has_errors(): error_message = "Copy job failed with errors: {} ." \ "Backup for source: {}, target: {} " \ "has not been done. " \ .format(copy_job_results.error_message, data["sourceBqTable"], data["targetBqTable"]) ErrorReporting().report(error_message) return backup_table_metadata = BigQueryTableMetadata.get_table_by_big_query_table( copy_job_results.target_bq_table) if backup_table_metadata.table_exists(): self.__create_backup(backup_table_metadata, copy_job_results) if backup_table_metadata.has_partition_expiration(): self.__disable_partition_expiration( TableReference.from_bq_table( copy_job_results.target_bq_table)) else: pass ErrorReporting().report( "Backup table {0} not exist. Backup entity is not created". format(copy_job_results.target_bq_table))
def test_should_delete_same_day_duplicates_backups(self): #given reference = TableReference('example-project-id', 'example-dataset-id', 'example-table-id') first_5_backups = create_backup_daily_sequence(5, start_date=datetime( 2017, 6, 1, 12)) second_5_backups = create_backup_daily_sequence(5, start_date=datetime( 2017, 6, 6, 12)) first_5_backups_duplicated = create_backup_daily_sequence( 5, start_date=datetime(2017, 6, 1, 14)) backups = list(first_5_backups + second_5_backups + first_5_backups_duplicated) backups_expected_for_deletion = list(first_5_backups) #when eligible_for_deletion = \ self.under_test.get_backups_eligible_for_deletion( backups=list(backups), table_reference=reference) #then self.sortAndAssertListEqual(backups_expected_for_deletion, eligible_for_deletion)
def generate_restore_items(cls, project_id, dataset_id, target_project_id, target_dataset_id, max_partition_days): if max_partition_days: table_entities = Table \ .get_tables_with_max_partition_days(project_id, dataset_id, max_partition_days) else: table_entities = Table.get_tables(project_id, dataset_id) for table_entity_sublist in paginated(1000, table_entities): restore_items = [] for table_entity, backup_entity in Table.get_last_backup_for_tables( table_entity_sublist): if backup_entity is not None: source_table_reference = \ RestoreTableReference.backup_table_reference( table_entity, backup_entity) target_table_reference = TableReference( target_project_id, target_dataset_id, table_entity.table_id, table_entity.partition_id ) restore_item = RestoreItem.create(source_table_reference, target_table_reference) restore_items.append(restore_item) logging.info("Restore items generator yields %s restore items", len(restore_items)) yield restore_items
def test_should_fill_deleted_field_in_backup_entity_if_table_not_found_error_during_deletion( self, _): # given table = Table(project_id='example-proj-name', dataset_id='example-dataset-name', table_id='example-table-name', last_checked=datetime.datetime(2017, 2, 1, 16, 30)) table.put() reference = TableReference.from_table_entity(table) backup1 = backup_utils.create_backup(datetime.datetime( 2017, 2, 1, 16, 30), table, table_id="backup1") backup2 = backup_utils.create_backup(datetime.datetime( 2017, 2, 2, 16, 30), table, table_id="backup2") ndb.put_multi([backup1, backup2]) self.policy.get_backups_eligible_for_deletion = Mock( return_value=[backup1, backup2]) # when self.under_test.perform_retention(reference, table.key.urlsafe()) # then self.assertTrue(Backup.get_by_key(backup1.key).deleted is not None) self.assertTrue(Backup.get_by_key(backup2.key).deleted is not None)
def __update_last_check(self, table_entity): logging.info( "Updating last_check in entity entity for %s", TableReference(self.project_id, self.dataset_id, self.table_id, self.partition_id)) table_entity.last_checked = self.now table_entity.put()
def target_table_reference(self): return TableReference( project_id=self.target_table.project_id, dataset_id=self.target_table.dataset_id, table_id=self.target_table.table_id, partition_id=self.target_table.partition_id )
def __delete_backup_in_bq_and_update_datastore(self, backup): try: table_reference = TableReference(configuration.backup_project_id, backup.dataset_id, backup.table_id) self.big_query_service.delete_table(table_reference) logging.debug( u"Table %s deleted from BigQuery. " u"Updating datastore. Retention policy used: '%s'", table_reference, type(self.policy).__name__) Backup.mark_backup_deleted(backup.key) except TableNotFoundException: Backup.mark_backup_deleted(backup.key) logging.warning( u"Table '%s' was not found. But we updated datastore anyway", backup.table_id) except HttpError as ex: error_message = u"Unexpected HttpError occurred while deleting " \ u"table '{}', error: {}: {}"\ .format(backup.table_id, type(ex), ex) logging.exception(error_message) except Exception as ex: error_message = u"Could not delete backup '{}' error: {}: {}"\ .format(backup.table_id, type(ex), ex) logging.exception(error_message)
def test_should_delete_many_today_duplicates_and_11th_young_version_after_deduplication_and_retain_old_backup( self, _1, _2, _3): #given reference = TableReference('example-project-id', 'example-dataset-id', 'example-table-id') young_backups = create_backup_daily_sequence(10, start_date=datetime( 2017, 8, 1)) newest_duplicated_backup = create_backup(datetime(2017, 8, 19, 10)) today_duplicated_backups = [ newest_duplicated_backup, create_backup(datetime(2017, 8, 19, 9)), create_backup(datetime(2017, 8, 19, 8)), create_backup(datetime(2017, 8, 19, 7)) ] old_backup = create_backup(datetime(2016, 8, 19, 10)) backups = list(young_backups + today_duplicated_backups + [old_backup]) backups_expected_for_deletion = list([young_backups[9]] + today_duplicated_backups[1:]) #when eligible_for_deletion = \ self.under_test.get_backups_eligible_for_deletion( backups=list(backups), table_reference=reference) #then self.sortAndAssertListEqual(backups_expected_for_deletion, eligible_for_deletion)
def test_that_last_checked_date_is_updated_even_if_table_should_not_be_backed_up( # nopep8 pylint: disable=C0301 self, copy_table, _1, _2): # given table = Table(project_id="test-project", dataset_id="test-dataset", table_id="test-table", last_checked=datetime.datetime(2017, 3, 3)) table_reference = TableReference(project_id="test-project", dataset_id="test-dataset", table_id="test-table") # when table.put() BackupProcess(table_reference, self.big_query, self.big_query_table_metadata).start() table_entity = Table.get_table("test-project", "test-dataset", "test-table") # then self.assertEqual(table_entity.last_checked, datetime.datetime(2017, 04, 4)) copy_table.assert_not_called()
def to_table_reference(table): partition_id = table[ 'partitionId'] if table['partitionId'] != "None" else None return TableReference(project_id=table['projectId'], dataset_id=table['datasetId'], table_id=table['tableId'], partition_id=partition_id)
def test_partition_str(self): # given table_partition = TableReference("project1", "dataset1", "table1", "partition1") # when table_partition_string = str(table_partition) # then self.assertEqual(table_partition_string, "project1:dataset1.table1$partition1")
def test_should_gracefully_deal_with_empty_backup_list(self): # given reference = TableReference('example-project-id', 'example-dataset-id', 'example-table-id') # when backups_to_retain = self.under_test.filter(backups=[], table_reference=reference) # then self.assertFalse(backups_to_retain)
def test_default_parameters_for_table_restoration(self, restore): # given & when self.under_test.get(RESTORE_TABLE_URL + '?') # then expected_table_reference = \ TableReference('project-id', 'dataset_id', 'table_id') restore.assert_called_once_with(expected_table_reference, '', None, None, None, None)
def __generate_expected_restore_item( table, target_project_id=RESTORATION_PROJECT_ID, custom_target_dataset=None): expected_source = TableReference( project_id=BACKUP_PROJECT_ID, dataset_id=table.last_backup.dataset_id, table_id=table.last_backup.table_id, partition_id=table.partition_id) target_dataset = TestDatasetRestoreItemsGenerator.__create_target_dataset( custom_target_dataset) expected_target = TableReference(project_id=target_project_id, dataset_id=target_dataset, table_id=table.table_id, partition_id=table.partition_id) expected_restore_item = RestoreItem.create(expected_source, expected_target) return expected_restore_item
def __create_target_table_reference(restore_request, source_entity): target_project_id = restore_request.target_project_id target_dataset_id = restore_request.target_dataset_id if target_project_id is None: target_project_id = source_entity.project_id if target_dataset_id is None: target_dataset_id = source_entity.dataset_id return TableReference(target_project_id, target_dataset_id, source_entity.table_id, source_entity.partition_id)
def test_should_throw_parameter_validation_exception_if_table_is_partitioned_but_partition_number_was_not_given( self, _1, _2): # given table_reference = TableReference(project_id="test-project", dataset_id="test-dataset", table_id="test-table", partition_id="") # when-then with self.assertRaises(ParameterValidationException): OnDemandTableBackup.start(table_reference)
def test_that_dataset_will_not_be_unnecessary_created_twice(self, _, _1, _2, _3): # given table_reference_1 = TableReference(project_id="test-project", dataset_id="test-dataset", table_id="test-table-1") table_reference_2 = TableReference(project_id="test-project", dataset_id="test-dataset", table_id="test-table-2") # when self.big_query.create_dataset = MagicMock() BackupProcess(table_reference_1, self.big_query, self.big_query_table_metadata).start() BackupProcess(table_reference_2, self.big_query, self.big_query_table_metadata).start() # then self.big_query.create_dataset.assert_called_once()