Example #1
0
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if updater.field_name not in doc or doc[updater.field_name] is None:
            return

        f = doc[updater.field_name]
        if isinstance(f, bson.DBRef):  # Already DBRef
            return
        elif isinstance(f, str):  # ObjectId as string
            try:
                f = bson.ObjectId(f)
            except bson.errors.BSONError:
                pass

        collection_name = ctx.collection.name if ctx.collection is not None else None
        is_dict = isinstance(f, dict)
        if is_dict and isinstance(f.get('_id'), bson.ObjectId):  # manual ref
            doc[updater.field_name] = bson.DBRef(collection_name, f['_id'])
        elif is_dict and isinstance(f.get('_ref'), bson.DBRef):  # dynamic ref
            doc[updater.field_name] = f['_ref']
        elif isinstance(f, bson.ObjectId):
            doc[updater.field_name] = bson.DBRef(collection_name, f)
        elif updater.migration_policy.name == 'strict':  # Other data type
            raise InconsistencyError(
                f"Field {updater.field_name} has wrong value {f!r} "
                f"(should be DBRef, ObjectId, manual ref, dynamic ref, "
                f"ObjectId string) in record {doc}")
Example #2
0
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if updater.field_name not in doc or doc[updater.field_name] is None:
            return

        f = doc[updater.field_name]
        is_dict = isinstance(f, dict)
        collection_name = ctx.collection.name if ctx.collection else None

        if isinstance(f, str):  # ObjectId as string
            try:
                f = bson.ObjectId(f)
            except bson.errors.BSONError:
                pass

        # We cannot get dynamic ref from other types of refs because
        # of lack of '_cls' value. Mongoengine fields which use this
        # converter can keep DBRef. So return DBRef instead
        if is_dict and isinstance(f.get('_ref'),
                                  bson.DBRef):  # Already dynamic ref
            return
        elif isinstance(f, bson.DBRef):
            return
        elif is_dict and isinstance(f.get('_id'), bson.ObjectId):  # manual ref
            doc[updater.field_name] = bson.DBRef(collection_name, f['_id'])
        elif isinstance(f, bson.ObjectId):
            doc[updater.field_name] = bson.DBRef(collection_name, f)
        elif updater.migration_policy.name == 'strict':  # Other data type
            raise InconsistencyError(
                f"Field {updater.field_name} has wrong value {f!r} "
                f"(should be DBRef, ObjectId, manual ref, dynamic ref) "
                f"in record {doc}")
Example #3
0
 def by_doc(ctx: ByDocContext):
     doc = ctx.document
     if updater.field_name in doc:
         f = doc[updater.field_name]
         valid = f is None or (isinstance(f, (list, tuple)) and len(f) == 2)
         if not valid:
             raise InconsistencyError(
                 f"Field {updater.field_name} has wrong value {f!r} "
                 f"(should be legacy geo point) in record {doc}")
Example #4
0
 def by_doc(ctx: ByDocContext):
     doc = ctx.document
     if updater.field_name in doc:
         f = doc[updater.field_name]
         valid = f is None or (isinstance(f, dict)
                               and f.get('type') in geojson_types)
         if not valid:
             raise InconsistencyError(
                 f"Field {updater.field_name} has wrong value {f!r} "
                 f"(should be GeoJSON) in record {doc}")
Example #5
0
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if updater.field_name not in doc or doc[updater.field_name] is None:
            return

        f = doc[updater.field_name]
        if isinstance(f, uuid.UUID):
            return
        elif isinstance(f, str) and uuid_pattern.match(f):
            doc[updater.field_name] = uuid.UUID(f)
        elif updater.migration_policy.name == 'strict':
            raise InconsistencyError(
                f"Field {updater.field_name} has wrong value {f!r} "
                f"(should be UUID string or UUID Binary data) in record {doc}")
Example #6
0
def check_empty_result(collection: Collection, db_field: str,
                       find_filter: dict) -> None:
    """
    Find records in collection satisfied to a given filter expression
    and raise error if anything found
    :param collection: pymongo collection object to find in
    :param db_field: collection field name
    :param find_filter: collection.find() method filter argument
    :raises MigrationError: if any records found
    """
    bad_records = list(collection.find(find_filter, limit=3))
    if bad_records:
        examples = (
            f'{{_id: {x.get("_id", "unknown")},...{db_field}: {x.get(db_field, "unknown")}}}'
            for x in bad_records)
        raise InconsistencyError(
            f"Field {collection.name}.{db_field} in some records "
            f"has wrong values. First several examples: "
            f"{','.join(examples)}")
Example #7
0
    def by_doc(ctx: ByDocContext):
        # https://docs.mongodb.com/manual/reference/operator/aggregation/convert/
        type_map = {
            'double': float,
            'string': str,
            'objectId': bson.ObjectId,
            'bool': bool,
            'date': datetime,
            'int': int,
            'long': int,
            'decimal': float
        }
        assert set(allowed_types) < type_map.keys()

        doc = ctx.document
        if updater.field_name in doc:
            f = doc[updater.field_name]
            valid_types = tuple(type_map[t] for t in allowed_types)
            valid = f is None or isinstance(f, valid_types)
            if not valid:
                raise InconsistencyError(
                    f"Field {updater.field_name} has wrong type of value "
                    f"{f!r} (should be any of {valid_types}) in record {doc}")
Example #8
0
    def by_doc(ctx: ByDocContext):
        doc = ctx.document
        if updater.field_name not in doc or doc[updater.field_name] is None:
            return

        f = doc[updater.field_name]
        if isinstance(f, (list, tuple)):
            if f:
                f = f[0]
                if remove_cls_key and isinstance(f, dict) and '_cls' in f:
                    del f['_cls']
                if not isinstance(
                        f, item_type
                ) and updater.migration_policy.name == 'strict':
                    raise InconsistencyError(
                        f"Field {updater.field_name} has wrong value {f!r} "
                        f"(should be {item_type}) in record {doc}")
            else:
                f = None
            doc[updater.field_name] = f
        elif f is not None and updater.migration_policy.name == 'strict':
            raise MigrationError(
                f'Could not extract item from non-list value '
                f'{updater.field_name}: {doc[updater.field_name]}')
Example #9
0
    def _update_by_document(self, callback: Callable, collection: Collection,
                            filter_path: List[str],
                            update_path: List[str]) -> None:
        """
        Call a callback for every document found by given filterpath
        :param callback: by_doc callback
        :param collection: pymongo.Collection object
        :param filter_path: filter dotpath to substitute to find()
        :param update_path: Update dotpath (with $[]) is
         pointed which document to pick and call the callback
         for each of them (nested array of embedded documents for
         instance). If None is passed then we pick a document itself
        :return:
        """
        field_filter_path = copy(filter_path)
        if self.field_name:
            field_filter_path += [self.field_name]
        filter_dotpath = '.'.join(field_filter_path)

        if not update_path:
            json_path = '$'  # update_path points to any document
        else:
            # update_path is mongo update path
            json_path = '.'.join(f.replace('$[]', '[*]') for f in update_path)
            json_path = json_path.replace('.[*]', '[*]')
        parser = jsonpath_rw.parse(json_path)

        find_fltr = {}
        if not self._include_missed_fields and filter_dotpath:
            find_fltr = {filter_dotpath: {'$exists': True}}
        if self.document_cls:
            find_fltr['_cls'] = self.document_cls

        if flags.dry_run:
            msg = '* db.%s.find(%s) -> [Loop](%s) -> db.%s.bulk_write(...)'
            log.info(msg, collection.name, find_fltr, filter_dotpath,
                     collection.name)
            return

        bulk_db = flags.database2
        bulk_collection = bulk_db[collection.name]

        buf = []
        for doc in collection.find(find_fltr):
            prev_doc = deepcopy(doc)

            # Recursively apply the callback to every embedded doc
            for embedded_doc in parser.find(doc):
                embedded_doc = embedded_doc.value
                if self.document_cls:
                    if embedded_doc is None:
                        continue
                    if not isinstance(embedded_doc, dict):
                        # Field contains smth another than embedded doc
                        if self.migration_policy.name == 'strict':
                            raise InconsistencyError(
                                f"Field {filter_dotpath} has wrong value {embedded_doc!r} "
                                f"(should be embedded document) in record {doc}"
                            )
                        else:
                            continue
                    if embedded_doc.get(
                            '_cls', self.document_cls) != self.document_cls:
                        # Skip since document doesn't belong to
                        # document class (document inheritance,
                        # DynamicField)
                        # See `DocumentMetaclass` implementation
                        continue
                ctx = ByDocContext(collection=collection,
                                   document=embedded_doc,
                                   filter_dotpath=filter_dotpath)
                # Callback should change a dict in-place
                callback(ctx)

            # Write a document only if it was changed by callback
            if prev_doc != doc:
                buf.append(ReplaceOne({'_id': doc['_id']}, doc, upsert=False))

            # Flush buffer
            if len(buf) >= flags.BULK_BUFFER_LENGTH:
                bulk_collection.bulk_write(buf, ordered=False)
                buf.clear()
        if buf:
            bulk_collection.bulk_write(buf, ordered=False)
            buf.clear()