def __increase_geojson_nesting(updater: DocumentUpdater, from_type: str, to_type: str, depth: int = 1): """ Wraps `coordinates` field into nested array on GeoJSON fields with given type. :param updater: DocumentUpdater object :param from_type: GeoJSON type to change :param to_type: this GeoJSON type will be set in changed records :param depth: nested array depth to wrap in :return: """ assert depth > 0 def by_doc(ctx: ByDocContext): doc = ctx.document if isinstance(doc.get(updater.field_name), dict): match = doc[updater.field_name].get('type') == from_type \ and doc[updater.field_name].get('coordinates') if match: doc[updater.field_name]['coordinates'] = functools.reduce( lambda x, y: [x], range(depth), doc[updater.field_name].get('coordinates', [.0, .0])) updater.update_by_document(by_doc)
def to_string(updater: DocumentUpdater): def by_doc(ctx: ByDocContext): doc = ctx.document if updater.field_name not in doc or doc[updater.field_name] is None: return f = doc[updater.field_name] is_dict = isinstance(f, dict) if is_dict and isinstance(f.get('_ref'), bson.DBRef): # dynamic ref doc[updater.field_name] = str(f['_ref'].id) elif is_dict and isinstance(f.get('_id'), bson.ObjectId): # manual ref doc[updater.field_name] = str(f['_id']) elif isinstance(f, bson.DBRef): doc[updater.field_name] = str(f.id) else: try: doc[updater.field_name] = str(f) except (TypeError, ValueError) as e: if updater.migration_policy.name == 'strict': raise MigrationError( f'Cannot convert value {updater.field_name}: ' f'{doc[updater.field_name]} to string') from e # TODO: precheck if field actually contains value other than string updater.update_by_document(by_doc)
def __decrease_geojson_nesting(updater: DocumentUpdater, from_type: str, to_type: str, depth: int = 1): """ Extract the first element from nested arrays in `coordinates` field on GeoJSON fields with given type :param updater: DocumentUpdater object :param from_type: GeoJSON type to change :param to_type: this GeoJSON type will be set in changed records :param depth: nested array depth to extract from :return: """ assert depth > 0 def by_doc(ctx: ByDocContext): doc = ctx.document if isinstance(doc.get(updater.field_name), dict): match = doc[updater.field_name].get('type') == from_type \ and doc[updater.field_name].get('coordinates') if match: doc[updater.field_name]['coordinates'] = functools.reduce( lambda x, y: x[0] if x and isinstance(x, (list, tuple)) else None, range(depth), doc[updater.field_name].get('coordinates', [.0, .0])) updater.update_by_document(by_doc)
def to_dbref(updater: DocumentUpdater): """Convert references (ObjectId, manual ref, dynamic ref) to dbref """ def by_doc(ctx: ByDocContext): doc = ctx.document if updater.field_name not in doc or doc[updater.field_name] is None: return f = doc[updater.field_name] if isinstance(f, bson.DBRef): # Already DBRef return elif isinstance(f, str): # ObjectId as string try: f = bson.ObjectId(f) except bson.errors.BSONError: pass collection_name = ctx.collection.name if ctx.collection is not None else None is_dict = isinstance(f, dict) if is_dict and isinstance(f.get('_id'), bson.ObjectId): # manual ref doc[updater.field_name] = bson.DBRef(collection_name, f['_id']) elif is_dict and isinstance(f.get('_ref'), bson.DBRef): # dynamic ref doc[updater.field_name] = f['_ref'] elif isinstance(f, bson.ObjectId): doc[updater.field_name] = bson.DBRef(collection_name, f) elif updater.migration_policy.name == 'strict': # Other data type raise InconsistencyError( f"Field {updater.field_name} has wrong value {f!r} " f"(should be DBRef, ObjectId, manual ref, dynamic ref, " f"ObjectId string) in record {doc}") # TODO: precheck if field actually contains value other than DBRef updater.update_by_document(by_doc)
def item_to_list(updater: DocumentUpdater, remove_cls_key=False): """Make a list with single element from every non-array value""" def by_doc(ctx: ByDocContext): if updater.field_name in ctx.document: f = ctx.document[updater.field_name] if f is not None: if remove_cls_key and isinstance(f, dict) and '_cls' in f: del f['_cls'] if not isinstance(f, (list, tuple)): ctx.document[updater.field_name] = [f] else: ctx.document[updater.field_name] = [] # null -> [] updater.update_by_document(by_doc)
def geojson_to_legacy_pairs(updater: DocumentUpdater, from_type: str): """Convert GeoJSON objects of given type to legacy coordinate pairs""" def by_doc(ctx: ByDocContext): doc = ctx.document if isinstance(doc.get(updater.field_name), dict): if 'Point' in doc[updater.field_name]: doc[updater.field_name] = doc[updater.field_name].get( 'coordinates') if updater.migration_policy.name == 'strict': __check_geojson_objects(updater, ["Point", from_type]) __check_legacy_point_coordinates(updater) __check_value_types(updater, ['object', 'array']) convert_geojson(updater, from_type, 'Point') updater.update_by_document(by_doc)
def legacy_pairs_to_geojson(updater: DocumentUpdater, to_type: str): """Convert legacy coordinate pairs to GeoJSON objects of given type""" def by_doc(ctx: ByDocContext): doc = ctx.document if isinstance(doc.get(updater.field_name), (list, tuple)): doc[updater.field_name] = { 'type': 'Point', 'coordinates': doc[updater.field_name] } if updater.migration_policy.name == 'strict': __check_geojson_objects(updater, ['Point', to_type]) __check_legacy_point_coordinates(updater) __check_value_types(updater, ['object', 'array']) updater.update_by_document(by_doc) convert_geojson(updater, 'Point', to_type)
def change_dynamic(self, updater: DocumentUpdater, diff: Diff): """If document becomes non-dynamic then remove fields which are not defined in mongoengine EmbeddedDocument """ def by_doc(ctx: ByDocContext): extra_keys = ctx.document.keys() - self_schema.keys() if extra_keys: newdoc = {k: v for k, v in ctx.document.items() if k in self_schema.keys()} ctx.document.clear() ctx.document.update(newdoc) self._check_diff(diff, False, bool) if diff.new: return # Nothing to do # Remove fields which are not in schema self_schema = self._run_ctx['left_schema'][self.document_type] # type: Schema.Document updater.update_by_document(by_doc)
def __mongo_convert(updater: DocumentUpdater, target_type: str): """ Convert field to a given type in a given collection. `target_type` contains MongoDB type name, such as 'string', 'decimal', etc. https://docs.mongodb.com/manual/reference/operator/aggregation/convert/ :param updater: DocumentUpdater object :param target_type: MongoDB type name :return: """ def by_doc(ctx: ByDocContext): # https://docs.mongodb.com/manual/reference/operator/aggregation/convert/ type_map = { 'double': float, 'string': str, 'objectId': bson.ObjectId, 'bool': bool, 'date': lambda x: dateutil_parse(str(x)), 'int': int, 'long': bson.Int64, 'decimal': float, 'binary': bson.Binary, 'object': dict } assert target_type in type_map doc = ctx.document field_name = updater.field_name if field_name in doc: t = type_map[target_type] if not isinstance(doc[field_name], t) and doc[field_name] is not None: try: doc[field_name] = type_map[target_type](doc[field_name]) except (TypeError, ValueError) as e: if updater.migration_policy.name == 'strict': raise MigrationError( f'Cannot convert value ' f'{field_name}: {doc[field_name]} to type {t}' ) from e updater.update_by_document(by_doc)
def to_uuid_bin(updater: DocumentUpdater): """Convert strings with UUID to binData with UUID""" def by_doc(ctx: ByDocContext): doc = ctx.document if updater.field_name not in doc or doc[updater.field_name] is None: return f = doc[updater.field_name] if isinstance(f, uuid.UUID): return elif isinstance(f, str) and uuid_pattern.match(f): doc[updater.field_name] = uuid.UUID(f) elif updater.migration_policy.name == 'strict': raise InconsistencyError( f"Field {updater.field_name} has wrong value {f!r} " f"(should be UUID string or UUID Binary data) in record {doc}") uuid_pattern = re.compile( r'\A[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}\Z', re.IGNORECASE) updater.update_by_document(by_doc)
def extract_from_list(updater: DocumentUpdater, item_type, remove_cls_key=False): """ Replace every list which was met with its first element with checking item type. If type is other than `item_type` then the error will be raised :param updater: :param item_type: python type(s) to check the element :param remove_cls_key: if True then '_cls' keys will be removed from dict items if any :return: """ def by_doc(ctx: ByDocContext): doc = ctx.document if updater.field_name not in doc or doc[updater.field_name] is None: return f = doc[updater.field_name] if isinstance(f, (list, tuple)): if f: f = f[0] if remove_cls_key and isinstance(f, dict) and '_cls' in f: del f['_cls'] if not isinstance( f, item_type ) and updater.migration_policy.name == 'strict': raise InconsistencyError( f"Field {updater.field_name} has wrong value {f!r} " f"(should be {item_type}) in record {doc}") else: f = None doc[updater.field_name] = f elif f is not None and updater.migration_policy.name == 'strict': raise MigrationError( f'Could not extract item from non-list value ' f'{updater.field_name}: {doc[updater.field_name]}') updater.update_by_document(by_doc)
def to_dynamic_ref(updater: DocumentUpdater): """Convert references (ObjectId, DBRef, manual ref) to dynamic ref """ def by_doc(ctx: ByDocContext): doc = ctx.document if updater.field_name not in doc or doc[updater.field_name] is None: return f = doc[updater.field_name] is_dict = isinstance(f, dict) collection_name = ctx.collection.name if ctx.collection is not None else None if isinstance(f, str): # ObjectId as string try: f = bson.ObjectId(f) except bson.errors.BSONError: pass # We cannot get dynamic ref from other types of refs because # of lack of '_cls' value. Mongoengine fields which use this # converter can keep DBRef. So return DBRef instead if is_dict and isinstance(f.get('_ref'), bson.DBRef): # Already dynamic ref return elif isinstance(f, bson.DBRef): return elif is_dict and isinstance(f.get('_id'), bson.ObjectId): # manual ref doc[updater.field_name] = bson.DBRef(collection_name, f['_id']) elif isinstance(f, bson.ObjectId): doc[updater.field_name] = bson.DBRef(collection_name, f) elif updater.migration_policy.name == 'strict': # Other data type raise InconsistencyError( f"Field {updater.field_name} has wrong value {f!r} " f"(should be DBRef, ObjectId, manual ref, dynamic ref) " f"in record {doc}") # TODO: precheck if field actually contains value other than dynamic ref or DBRef updater.update_by_document(by_doc)