def remove_cls_key(updater: DocumentUpdater): """Unset '_cls' key in documents if any""" def by_path(ctx: ByPathContext): ctx.collection.update_many( { ctx.filter_dotpath + '._cls': { '$exists': True }, **ctx.extra_filter }, {'$unset': { ctx.update_dotpath + '._cls': '' }}, array_filters=ctx.build_array_filters()) updater.update_by_path(by_path)
def drop_field(updater: DocumentUpdater): """Drop field""" def by_path(ctx: ByPathContext): ctx.collection.update_many( { ctx.filter_dotpath: { '$exists': True }, **ctx.extra_filter }, {'$unset': { ctx.update_dotpath: '' }}, array_filters=ctx.build_array_filters()) updater.update_by_path(by_path)
def change_inherit(self, updater: DocumentUpdater, diff: Diff): """Remove '_cls' key if EmbeddedDocument becomes non-inherit, otherwise do nothing """ def by_path(ctx: ByPathContext): ctx.collection.update_many( {ctx.filter_dotpath + '._cls': {'$exists': True}, **ctx.extra_filter}, {'$unset': {ctx.update_dotpath + '._cls': ''}}, array_filters=ctx.build_array_filters() ) self._check_diff(diff, False, bool) if diff.new: return updater.update_by_path(by_path)
def to_url_string(updater: DocumentUpdater, check_only=False): """Cast fields to string and then verify if they contain URLs""" def by_path(ctx: ByPathContext): fltr = { ctx.filter_dotpath: { '$not': url_regex, '$ne': None }, **ctx.extra_filter } check_empty_result(ctx.collection, ctx.filter_dotpath, fltr) to_string(updater) url_regex = re.compile( r"\A[A-Z]{3,}://[A-Z0-9\-._~:/?#\[\]@!$&'()*+,;%=]\Z", re.IGNORECASE) if updater.migration_policy.name == 'strict': updater.update_by_path(by_path)
def to_email_string(updater: DocumentUpdater): def by_path(ctx: ByPathContext): fltr = { ctx.filter_dotpath: { '$not': email_regex, '$ne': None }, **ctx.extra_filter } check_empty_result(ctx.collection, ctx.filter_dotpath, fltr) to_string(updater) email_regex = re.compile( r"\A.*\Z", # TODO: insert email validation regex here re.IGNORECASE) if updater.migration_policy.name == 'strict': updater.update_by_path(by_path)
def to_complex_datetime(updater: DocumentUpdater): def by_path(ctx: ByPathContext): fltr = { ctx.filter_dotpath: { '$not': regex, '$ne': None }, **ctx.extra_filter } check_empty_result(ctx.collection, ctx.filter_dotpath, fltr) to_string(updater) # We should not know which separator is used, so use '.+' # Separator change is handled by appropriate field method regex = r'\A' + str( '.+'.join([r"\d{4}"] + [r"\d{2}"] * 5 + [r"\d{6}"])) + r'\Z' if updater.migration_policy.name == 'strict': updater.update_by_path(by_path)
def to_email_string(updater: DocumentUpdater): def by_path(ctx: ByPathContext): email_regex = r"\A[^\W][A-Z0-9._%+-]+@[\p{L}0-9.-]+\.\p{L}+\Z" fltr = { ctx.filter_dotpath: { '$not': { '$regex': email_regex, '$options': 'i' }, '$ne': None }, **ctx.extra_filter } check_empty_result(ctx.collection, ctx.filter_dotpath, fltr) to_string(updater) if updater.migration_policy.name == 'strict': updater.update_by_path(by_path)