def __validate_restoration_date(date):
     try:
         datetime.strptime(date, '%Y-%m-%d')
     except ValueError:
         raise ParameterValidationException(
             "Wrong date value format for parameter 'restoration_date'. "
             "Should be 'YYYY-mm-dd'")
Esempio n. 2
0
 def __get_backup_entities(backup_items):
     try:
         backup_keys = [i.backup_key for i in backup_items]
         for key, entity in \
                 zip(backup_keys, ndb.get_multi(backup_keys,
                                                use_cache=False,
                                                use_memcache=False)):
             if not entity:
                 error_message = "Backup entity (key={}) doesn't exist " \
                                 "in datastore.".format(key)
                 raise ParameterValidationException(error_message)
             yield entity
     except BadRequestError, e:
         error_message = "Couldn't obtain backup entity in datastore. " \
                         "Error: \n{}".format(e.message)
         raise ParameterValidationException(error_message)
Esempio n. 3
0
 def __get_source_table_entity(backup_entity):
     source_table_entity = Table.get_table_from_backup(backup_entity)
     if not source_table_entity:
         error_message = "Backup ancestor doesn't exists: '{}:{}'. " \
             .format(backup_entity.dataset_id,
                     backup_entity.table_id)
         raise ParameterValidationException(error_message)
     return source_table_entity
Esempio n. 4
0
 def parse_url_safe_key(url_safe_key):
     try:
         return ndb.Key(urlsafe=url_safe_key)
     except (TypeError, ProtocolBufferDecodeError), e:
         raise ParameterValidationException(
             "Unable to parse url safe key: {}, error type: {}, "
             "error message: {}".format(url_safe_key,
                                        type(e).__name__, e.message))
    def test(self, big_query_table_metadata, table_entity):
        if big_query_table_metadata.is_daily_partitioned() and not big_query_table_metadata.is_partition():
            raise ParameterValidationException("Partition id is required for partitioned table in on-demand mode")

        table_validation_status, table_validation_message = self._is_possible_to_copy_table(big_query_table_metadata)

        if not table_validation_status:
            if table_validation_message == "Table not found":
                raise NotFoundException(table_validation_message)
            else:
                raise ParameterValidationException(table_validation_message)

        logging.info("Performing on-demand backup for %s."
                     "It is performed without checking "
                     "if table already has up to date backup",
                     big_query_table_metadata.table_reference())

        return True
Esempio n. 6
0
 def parse_bq_key(backup_bq_key):
     try:
         key_parts = backup_bq_key.decode('base64') \
             .replace("\"", "").replace(" ", "").split(",")
         if len(key_parts) != 4:
             raise ParameterValidationException(
                 "Unable to parse backup BQ key: {}, "
                 "key doesn't consist of 4 parts".format(backup_bq_key))
         table_kind = key_parts[0]
         table_id = int(key_parts[1])
         backup_kind = key_parts[2]
         backup_id = int(key_parts[3])
         return ndb.Key(backup_kind, backup_id,
                        parent=ndb.Key(table_kind, table_id))
     except (Error, ValueError), e:
         raise ParameterValidationException(
             "Unable to parse backup BQ key: {}, error type: {}, "
             "error message: {}".format(backup_bq_key,
                                        type(e).__name__, e.message))
    def validate_parameters(self, project_id, dataset_id, target_project_id,
                            target_dataset_id, max_partition_days):

        if target_project_id is None:
            raise ParameterValidationException(
                "Required target project id parameter is None")

        any_backup = self.__get_backup(project_id, dataset_id,
                                       max_partition_days)

        self.__validate_locations(any_backup, target_project_id,
                                  target_dataset_id)
    def __validate_locations(self, any_backup, target_project_id,
                             target_dataset_id):
        target_location = self.__get_target_dataset_location(
            target_project_id, target_dataset_id)

        if target_location is None:
            return

        backup_location = self.__get_backup_dataset_location(any_backup)
        if target_location != backup_location:
            raise ParameterValidationException(
                "Target dataset already exist and has different location than backup dataset"
            )
Esempio n. 9
0
    def create(cls, date, location, project):
        """
        :return:
            Dataset id for specified project and location
                in 'year_week_location_project' format.
            If date, location or project are not specified
                throws ParameterValidationException.
        """
        if date is None:
            raise ParameterValidationException(
                'No date specified, attribute is mandatory.')
        if location is None:
            raise ParameterValidationException(
                'No location specified, attribute is mandatory.')
        if project is None:
            raise ParameterValidationException(
                'No project id specified, attribute is mandatory.')

        year = str(date.year)
        week = format(date.isocalendar()[1], '02')

        return '_'.join((year, week, location, project)).replace('-', '_')
Esempio n. 10
0
    def create(cls,
               project_id,
               dataset_id,
               table_id,
               timestamp,
               partition_id=None):
        """
        :return:
            Backup id for specified project, dataset, table, timestamp
                and partition(optional).
            If created id exceeds number of 1024 chars,
                it replace last 24 signs with '-' and 18-20 length hash
            If project, dataset, table or timestamp are not specified
                throws ParameterValidationException.
        """
        if project_id is None:
            raise ParameterValidationException(
                'No project specified, attribute is mandatory.')
        if dataset_id is None:
            raise ParameterValidationException(
                'No dataset specified, attribute is mandatory.')
        if table_id is None:
            raise ParameterValidationException(
                'No table specified, attribute is mandatory.')
        if timestamp is None:
            raise ParameterValidationException(
                'No timestamp specified, attribute is mandatory.')

        name = '_'.join(
            (timestamp.strftime("%Y%m%d_%H%M%S"), project_id.replace('-', '_'),
             dataset_id, table_id)) + ('' if partition_id is None else
                                       '_partition_' + str(partition_id))
        if len(name) > 1024:
            # checksum returns long int with a sign, 18-20 characters long
            checksum = str(hash(name)).replace('-', '_')
            return '_'.join((name[:1000], checksum))[:1024]
        else:
            return name
Esempio n. 11
0
    def start(table_reference):
        big_query_table_metadata = BigQueryTableMetadata.get_table_by_reference(
            table_reference)

        if big_query_table_metadata.is_daily_partitioned(
        ) and not big_query_table_metadata.is_partition():
            raise ParameterValidationException(
                "Partition id is required for partitioned table in on-demand mode"
            )

        BackupProcess(
            table_reference=table_reference,
            big_query=BigQuery(),
            big_query_table_metadata=big_query_table_metadata,
            should_backup_predicate=OnDemandBackupPredicate()).start()
    def __get_backup(self, project_id, dataset_id, max_partition_days):
        logging.info(
            "Getting backups for project '%s' for dataset '%s'"
            " with max_partition_days '%s'", project_id, dataset_id,
            max_partition_days)
        table_entities_found = False
        tables = self.__get_tables(project_id, dataset_id, max_partition_days)
        for table in tables:
            table_entities_found = True
            table_backup = table.last_backup
            if table_backup is not None:
                return table_backup

        if not table_entities_found:
            # @refactor: this should be NotFoundException that is mapped to 404.
            # Not ParameterValidationException that is mapped to 400
            raise ParameterValidationException(
                "No Tables was found in Datastore for project {}, dataset {}".
                format(project_id, dataset_id))

        # @refactor: same as above
        raise ParameterValidationException(
            "No Backups was found in Datastore for project {}, dataset {}".
            format(project_id, dataset_id))
Esempio n. 13
0
def validate_restore_request_params(
        source_project_id=None, source_dataset_id=None,
        target_project_id=None, target_dataset_id=None,
        create_disposition=None, write_disposition=None):
    try:
        if source_project_id:
            validate_project_id(source_project_id)
        if source_dataset_id:
            validate_dataset_id(source_dataset_id)
        if target_project_id:
            validate_project_id(target_project_id)
        if target_dataset_id:
            validate_dataset_id(target_dataset_id)
        if write_disposition:
            validate_write_disposition(write_disposition)
        if create_disposition:
            validate_create_disposition(create_disposition)

    except (WrongDatasetNameException,
            WrongProjectNameException,
            WrongWriteDispositionException,
            WrongCreateDispositionException), e:

        raise ParameterValidationException(e.message)
class TestOnDemandTableBackupHandler(unittest.TestCase):
    def setUp(self):
        patch('googleapiclient.discovery.build').start()
        app = on_demand_table_backup_handler.app
        self.under_test = webtest.TestApp(app)
        self.testbed = testbed.Testbed()
        self.testbed.activate()
        self.testbed.init_memcache_stub()

    def tearDown(self):
        self.testbed.deactivate()
        patch.stopall()

    @patch.object(OnDemandTableBackup, 'start')
    def test_on_demand_request_for_partitioned_table_is_properly_parsed(
            self, on_demand_table_backup_start):
        # given
        table_reference = TableReference('example-proj-name',
                                         'example-dataset-name',
                                         'example-table-name', '20171201')
        url = '/tasks/backups/on_demand/table/{}/{}/{}/{}' \
            .format(table_reference.get_project_id(),
                    table_reference.get_dataset_id(),
                    table_reference.get_table_id(),
                    table_reference.get_partition_id())

        # when
        self.under_test.get(url)

        # then
        on_demand_table_backup_start.assert_called_with(table_reference)

    @patch.object(OnDemandTableBackup, 'start')
    def test_on_demand_request_for_non_partitioned_table_is_properly_parsed(
            self, on_demand_table_backup_start):
        # given
        table_reference = TableReference('example-proj-name',
                                         'example-dataset-name',
                                         'example-table-name')
        url = '/tasks/backups/on_demand/table/{}/{}/{}'.format(
            table_reference.get_project_id(), table_reference.get_dataset_id(),
            table_reference.get_table_id())

        # when
        self.under_test.get(url)

        # then
        on_demand_table_backup_start.assert_called_with(table_reference)

    @patch.object(OnDemandTableBackup,
                  'start',
                  side_effect=ParameterValidationException("error msg"))
    def test_on_demand_request_for_partitioned_but_without_passing_partition_should_casue_400(
            self, on_demand_table_backup_start):
        # given
        table_reference = TableReference('example-proj-name',
                                         'example-dataset-name',
                                         'example-table-name')
        url = '/tasks/backups/on_demand/table/{}/{}/{}'.format(
            table_reference.get_project_id(), table_reference.get_dataset_id(),
            table_reference.get_table_id())

        # when
        response = self.under_test.get(url, expect_errors=True)

        # then
        self.assertEquals(400, response.status_int)