Exemple #1
0
def __increase_geojson_nesting(updater: DocumentUpdater,
                               from_type: str,
                               to_type: str,
                               depth: int = 1):
    """
    Wraps `coordinates` field into nested array on GeoJSON fields
    with given type.
    :param updater: DocumentUpdater object
    :param from_type: GeoJSON type to change
    :param to_type: this GeoJSON type will be set in changed records
    :param depth: nested array depth to wrap in
    :return:
    """
    assert depth > 0

    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if isinstance(doc.get(updater.field_name), dict):
            match = doc[updater.field_name].get('type') == from_type \
                    and doc[updater.field_name].get('coordinates')
            if match:
                doc[updater.field_name]['coordinates'] = functools.reduce(
                    lambda x, y: [x], range(depth),
                    doc[updater.field_name].get('coordinates', [.0, .0]))

    updater.update_by_document(by_doc)
Exemple #2
0
def to_string(updater: DocumentUpdater):
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if updater.field_name not in doc or doc[updater.field_name] is None:
            return

        f = doc[updater.field_name]
        is_dict = isinstance(f, dict)
        if is_dict and isinstance(f.get('_ref'), bson.DBRef):  # dynamic ref
            doc[updater.field_name] = str(f['_ref'].id)
        elif is_dict and isinstance(f.get('_id'), bson.ObjectId):  # manual ref
            doc[updater.field_name] = str(f['_id'])
        elif isinstance(f, bson.DBRef):
            doc[updater.field_name] = str(f.id)
        else:
            try:
                doc[updater.field_name] = str(f)
            except (TypeError, ValueError) as e:
                if updater.migration_policy.name == 'strict':
                    raise MigrationError(
                        f'Cannot convert value {updater.field_name}: '
                        f'{doc[updater.field_name]} to string') from e

    # TODO: precheck if field actually contains value other than string
    updater.update_by_document(by_doc)
Exemple #3
0
def __decrease_geojson_nesting(updater: DocumentUpdater,
                               from_type: str,
                               to_type: str,
                               depth: int = 1):
    """
    Extract the first element from nested arrays in `coordinates` field
    on GeoJSON fields with given type
    :param updater: DocumentUpdater object
    :param from_type: GeoJSON type to change
    :param to_type: this GeoJSON type will be set in changed records
    :param depth: nested array depth to extract from
    :return:
    """
    assert depth > 0

    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if isinstance(doc.get(updater.field_name), dict):
            match = doc[updater.field_name].get('type') == from_type \
                    and doc[updater.field_name].get('coordinates')
            if match:
                doc[updater.field_name]['coordinates'] = functools.reduce(
                    lambda x, y: x[0]
                    if x and isinstance(x, (list, tuple)) else None,
                    range(depth),
                    doc[updater.field_name].get('coordinates', [.0, .0]))

    updater.update_by_document(by_doc)
Exemple #4
0
def to_dbref(updater: DocumentUpdater):
    """Convert references (ObjectId, manual ref, dynamic ref) to dbref
    """
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if updater.field_name not in doc or doc[updater.field_name] is None:
            return

        f = doc[updater.field_name]
        if isinstance(f, bson.DBRef):  # Already DBRef
            return
        elif isinstance(f, str):  # ObjectId as string
            try:
                f = bson.ObjectId(f)
            except bson.errors.BSONError:
                pass

        collection_name = ctx.collection.name if ctx.collection is not None else None
        is_dict = isinstance(f, dict)
        if is_dict and isinstance(f.get('_id'), bson.ObjectId):  # manual ref
            doc[updater.field_name] = bson.DBRef(collection_name, f['_id'])
        elif is_dict and isinstance(f.get('_ref'), bson.DBRef):  # dynamic ref
            doc[updater.field_name] = f['_ref']
        elif isinstance(f, bson.ObjectId):
            doc[updater.field_name] = bson.DBRef(collection_name, f)
        elif updater.migration_policy.name == 'strict':  # Other data type
            raise InconsistencyError(
                f"Field {updater.field_name} has wrong value {f!r} "
                f"(should be DBRef, ObjectId, manual ref, dynamic ref, "
                f"ObjectId string) in record {doc}")

    # TODO: precheck if field actually contains value other than DBRef
    updater.update_by_document(by_doc)
Exemple #5
0
def item_to_list(updater: DocumentUpdater, remove_cls_key=False):
    """Make a list with single element from every non-array value"""
    def by_doc(ctx: ByDocContext):
        if updater.field_name in ctx.document:
            f = ctx.document[updater.field_name]
            if f is not None:
                if remove_cls_key and isinstance(f, dict) and '_cls' in f:
                    del f['_cls']
                if not isinstance(f, (list, tuple)):
                    ctx.document[updater.field_name] = [f]
            else:
                ctx.document[updater.field_name] = []  # null -> []

    updater.update_by_document(by_doc)
Exemple #6
0
def geojson_to_legacy_pairs(updater: DocumentUpdater, from_type: str):
    """Convert GeoJSON objects of given type to legacy coordinate pairs"""
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if isinstance(doc.get(updater.field_name), dict):
            if 'Point' in doc[updater.field_name]:
                doc[updater.field_name] = doc[updater.field_name].get(
                    'coordinates')

    if updater.migration_policy.name == 'strict':
        __check_geojson_objects(updater, ["Point", from_type])
        __check_legacy_point_coordinates(updater)
        __check_value_types(updater, ['object', 'array'])

    convert_geojson(updater, from_type, 'Point')

    updater.update_by_document(by_doc)
Exemple #7
0
def legacy_pairs_to_geojson(updater: DocumentUpdater, to_type: str):
    """Convert legacy coordinate pairs to GeoJSON objects of given type"""
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if isinstance(doc.get(updater.field_name), (list, tuple)):
            doc[updater.field_name] = {
                'type': 'Point',
                'coordinates': doc[updater.field_name]
            }

    if updater.migration_policy.name == 'strict':
        __check_geojson_objects(updater, ['Point', to_type])
        __check_legacy_point_coordinates(updater)
        __check_value_types(updater, ['object', 'array'])

    updater.update_by_document(by_doc)
    convert_geojson(updater, 'Point', to_type)
    def change_dynamic(self, updater: DocumentUpdater, diff: Diff):
        """If document becomes non-dynamic then remove fields which
        are not defined in mongoengine EmbeddedDocument
        """
        def by_doc(ctx: ByDocContext):
            extra_keys = ctx.document.keys() - self_schema.keys()
            if extra_keys:
                newdoc = {k: v for k, v in ctx.document.items() if k in self_schema.keys()}
                ctx.document.clear()
                ctx.document.update(newdoc)

        self._check_diff(diff, False, bool)
        if diff.new:
            return  # Nothing to do

        # Remove fields which are not in schema
        self_schema = self._run_ctx['left_schema'][self.document_type]  # type: Schema.Document
        updater.update_by_document(by_doc)
Exemple #9
0
def __mongo_convert(updater: DocumentUpdater, target_type: str):
    """
    Convert field to a given type in a given collection. `target_type`
    contains MongoDB type name, such as 'string', 'decimal', etc.

    https://docs.mongodb.com/manual/reference/operator/aggregation/convert/
    :param updater: DocumentUpdater object
    :param target_type: MongoDB type name
    :return:
    """
    def by_doc(ctx: ByDocContext):
        # https://docs.mongodb.com/manual/reference/operator/aggregation/convert/
        type_map = {
            'double': float,
            'string': str,
            'objectId': bson.ObjectId,
            'bool': bool,
            'date': lambda x: dateutil_parse(str(x)),
            'int': int,
            'long': bson.Int64,
            'decimal': float,
            'binary': bson.Binary,
            'object': dict
        }
        assert target_type in type_map

        doc = ctx.document
        field_name = updater.field_name
        if field_name in doc:
            t = type_map[target_type]
            if not isinstance(doc[field_name],
                              t) and doc[field_name] is not None:
                try:
                    doc[field_name] = type_map[target_type](doc[field_name])
                except (TypeError, ValueError) as e:
                    if updater.migration_policy.name == 'strict':
                        raise MigrationError(
                            f'Cannot convert value '
                            f'{field_name}: {doc[field_name]} to type {t}'
                        ) from e

    updater.update_by_document(by_doc)
Exemple #10
0
def to_uuid_bin(updater: DocumentUpdater):
    """Convert strings with UUID to binData with UUID"""
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if updater.field_name not in doc or doc[updater.field_name] is None:
            return

        f = doc[updater.field_name]
        if isinstance(f, uuid.UUID):
            return
        elif isinstance(f, str) and uuid_pattern.match(f):
            doc[updater.field_name] = uuid.UUID(f)
        elif updater.migration_policy.name == 'strict':
            raise InconsistencyError(
                f"Field {updater.field_name} has wrong value {f!r} "
                f"(should be UUID string or UUID Binary data) in record {doc}")

    uuid_pattern = re.compile(
        r'\A[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}\Z',
        re.IGNORECASE)
    updater.update_by_document(by_doc)
Exemple #11
0
def extract_from_list(updater: DocumentUpdater,
                      item_type,
                      remove_cls_key=False):
    """
    Replace every list which was met with its first element with
    checking item type. If type is other than `item_type` then
    the error will be raised
    :param updater:
    :param item_type: python type(s) to check the element
    :param remove_cls_key: if True then '_cls' keys will be removed
     from dict items if any
    :return:
    """
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if updater.field_name not in doc or doc[updater.field_name] is None:
            return

        f = doc[updater.field_name]
        if isinstance(f, (list, tuple)):
            if f:
                f = f[0]
                if remove_cls_key and isinstance(f, dict) and '_cls' in f:
                    del f['_cls']
                if not isinstance(
                        f, item_type
                ) and updater.migration_policy.name == 'strict':
                    raise InconsistencyError(
                        f"Field {updater.field_name} has wrong value {f!r} "
                        f"(should be {item_type}) in record {doc}")
            else:
                f = None
            doc[updater.field_name] = f
        elif f is not None and updater.migration_policy.name == 'strict':
            raise MigrationError(
                f'Could not extract item from non-list value '
                f'{updater.field_name}: {doc[updater.field_name]}')

    updater.update_by_document(by_doc)
Exemple #12
0
def to_dynamic_ref(updater: DocumentUpdater):
    """Convert references (ObjectId, DBRef, manual ref) to dynamic ref
    """
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if updater.field_name not in doc or doc[updater.field_name] is None:
            return

        f = doc[updater.field_name]
        is_dict = isinstance(f, dict)
        collection_name = ctx.collection.name if ctx.collection is not None else None

        if isinstance(f, str):  # ObjectId as string
            try:
                f = bson.ObjectId(f)
            except bson.errors.BSONError:
                pass

        # We cannot get dynamic ref from other types of refs because
        # of lack of '_cls' value. Mongoengine fields which use this
        # converter can keep DBRef. So return DBRef instead
        if is_dict and isinstance(f.get('_ref'),
                                  bson.DBRef):  # Already dynamic ref
            return
        elif isinstance(f, bson.DBRef):
            return
        elif is_dict and isinstance(f.get('_id'), bson.ObjectId):  # manual ref
            doc[updater.field_name] = bson.DBRef(collection_name, f['_id'])
        elif isinstance(f, bson.ObjectId):
            doc[updater.field_name] = bson.DBRef(collection_name, f)
        elif updater.migration_policy.name == 'strict':  # Other data type
            raise InconsistencyError(
                f"Field {updater.field_name} has wrong value {f!r} "
                f"(should be DBRef, ObjectId, manual ref, dynamic ref) "
                f"in record {doc}")

    # TODO: precheck if field actually contains value other than dynamic ref or DBRef
    updater.update_by_document(by_doc)