def test_iterating_datasets(self, _): # given self._create_http.return_value = self.__create_dataset_list_responses() bq = BigQuery() # when dataset_ids, next_page_token = bq.list_dataset_ids("project123") # then self.assertEqual(self.count(dataset_ids), 2) self.assertEqual(next_page_token, 'FMLMpsxvgM') # when dataset_ids, next_page_token = bq.list_dataset_ids( "project123", page_token=next_page_token) # then self.assertEqual(self.count(dataset_ids), 1) self.assertEqual(next_page_token, None)
class BackupScheduler(object): def __init__(self): self.big_query = BigQuery() self.request_correlation_id = str(uuid.uuid4()) def iterate_over_all_datasets_and_schedule_backups(self): custom_project_list = configuration.backup_settings_custom_project_list if custom_project_list: project_ids = custom_project_list logging.info( 'Only projects specified in the configuration will' ' be backed up: %s', project_ids) else: project_ids = list(self.big_query.list_project_ids()) logging.info('Scheduling backups of %s projects', len(project_ids)) for project_id in project_ids: try: self.__list_and_backup_datasets(project_id) except Exception as ex: error_message = 'Failed to list and backup datasets: ' + str( ex) ErrorReporting().report(error_message) def __list_and_backup_datasets(self, project_id): if project_id in configuration.projects_to_skip: logging.info('Skipping project: %s', project_id) return logging.info('Backing up project: %s, request_correlation_id: %s', project_id, self.request_correlation_id) for dataset_id in self.big_query.list_dataset_ids(project_id): try: self.__backup_dataset(project_id, dataset_id) except Exception as ex: error_message = 'Failed to backup dataset: ' + str(ex) ErrorReporting().report(error_message) def __backup_dataset(self, project_id, dataset_id): logging.info('Backing up dataset: %s', dataset_id) task = Tasks.create(url='/tasks/backups/dataset', params={ 'projectId': project_id, 'datasetId': dataset_id }, headers={ request_correlation_id.HEADER_NAME: self.request_correlation_id }) Tasks.schedule('backup-scheduler', task)
class ProjectBackupScheduler(object): def __init__(self): self.big_query = BigQuery() def schedule_backup(self, project_id, page_token=None): dataset_ids_to_backup, next_page_token = self.big_query.list_dataset_ids( project_id=project_id, page_token=page_token) self._schedule_dataset_backup_scheduler_tasks(project_id, dataset_ids_to_backup) if next_page_token: logging.info( u'Scheduling Project Backup Scheduler task for %s, page_token: %s', project_id, next_page_token) Tasks.schedule('backup-scheduler', TaskCreator.create_project_backup_scheduler_task( project_id, next_page_token) ) @staticmethod def _schedule_dataset_backup_scheduler_tasks(project_id, dataset_ids): logging.info( u'Scheduling Dataset Backup Scheduler tasks for %s %s project datasets: %s.', len(dataset_ids), project_id, dataset_ids) tasks = [] for dataset_id in dataset_ids: tasks.append( TaskCreator.create_dataset_backup_scheduler_task( project_id=project_id, dataset_id=dataset_id)) Tasks.schedule('backup-scheduler', tasks)