예제 #1
0
    def test_iterating_datasets(self, _):
        # given
        self._create_http.return_value = self.__create_dataset_list_responses()
        bq = BigQuery()
        # when
        dataset_ids, next_page_token = bq.list_dataset_ids("project123")

        # then
        self.assertEqual(self.count(dataset_ids), 2)
        self.assertEqual(next_page_token, 'FMLMpsxvgM')

        # when
        dataset_ids, next_page_token = bq.list_dataset_ids(
            "project123", page_token=next_page_token)

        # then
        self.assertEqual(self.count(dataset_ids), 1)
        self.assertEqual(next_page_token, None)
예제 #2
0
class BackupScheduler(object):
    def __init__(self):
        self.big_query = BigQuery()
        self.request_correlation_id = str(uuid.uuid4())

    def iterate_over_all_datasets_and_schedule_backups(self):
        custom_project_list = configuration.backup_settings_custom_project_list
        if custom_project_list:
            project_ids = custom_project_list
            logging.info(
                'Only projects specified in the configuration will'
                ' be backed up: %s', project_ids)
        else:
            project_ids = list(self.big_query.list_project_ids())

        logging.info('Scheduling backups of %s projects', len(project_ids))
        for project_id in project_ids:
            try:
                self.__list_and_backup_datasets(project_id)
            except Exception as ex:
                error_message = 'Failed to list and backup datasets: ' + str(
                    ex)
                ErrorReporting().report(error_message)

    def __list_and_backup_datasets(self, project_id):
        if project_id in configuration.projects_to_skip:
            logging.info('Skipping project: %s', project_id)
            return

        logging.info('Backing up project: %s, request_correlation_id: %s',
                     project_id, self.request_correlation_id)
        for dataset_id in self.big_query.list_dataset_ids(project_id):
            try:
                self.__backup_dataset(project_id, dataset_id)
            except Exception as ex:
                error_message = 'Failed to backup dataset: ' + str(ex)
                ErrorReporting().report(error_message)

    def __backup_dataset(self, project_id, dataset_id):
        logging.info('Backing up dataset: %s', dataset_id)
        task = Tasks.create(url='/tasks/backups/dataset',
                            params={
                                'projectId': project_id,
                                'datasetId': dataset_id
                            },
                            headers={
                                request_correlation_id.HEADER_NAME:
                                self.request_correlation_id
                            })
        Tasks.schedule('backup-scheduler', task)
예제 #3
0
class ProjectBackupScheduler(object):

    def __init__(self):
        self.big_query = BigQuery()

    def schedule_backup(self, project_id, page_token=None):
        dataset_ids_to_backup, next_page_token = self.big_query.list_dataset_ids(
            project_id=project_id,
            page_token=page_token)

        self._schedule_dataset_backup_scheduler_tasks(project_id,
                                                      dataset_ids_to_backup)

        if next_page_token:
            logging.info(
                u'Scheduling Project Backup Scheduler task for %s, page_token: %s',
                project_id, next_page_token)
            Tasks.schedule('backup-scheduler',
                           TaskCreator.create_project_backup_scheduler_task(
                               project_id,
                               next_page_token)
                           )

    @staticmethod
    def _schedule_dataset_backup_scheduler_tasks(project_id, dataset_ids):
        logging.info(
            u'Scheduling Dataset Backup Scheduler tasks for %s %s project datasets: %s.',
            len(dataset_ids), project_id, dataset_ids)

        tasks = []

        for dataset_id in dataset_ids:
            tasks.append(
                TaskCreator.create_dataset_backup_scheduler_task(
                    project_id=project_id,
                    dataset_id=dataset_id))

        Tasks.schedule('backup-scheduler', tasks)