def build_object(cls, document_type: str, left_schema: Schema, right_schema: Schema): # Check if field exists under different name in schema. # Field also can have small schema changes in the same time # So we try to get similarity percentage and if it more than # threshold then we're consider such change as rename/alter. # Otherwise it is drop/create match = document_type in left_schema and document_type not in right_schema if not match: return is_left_embedded = document_type.startswith( flags.EMBEDDED_DOCUMENT_NAME_PREFIX) left_document_schema = left_schema[document_type] candidates = [] for right_document_type, right_document_schema in right_schema.items(): matches = 0 compares = 0 # Skip collections which apparently was not renamed if right_document_type in left_schema: continue # Prevent adding to 'candidates' a right document, which # could have same/similar schema but has another type # (embedded and usual and vice versa) is_right_embedded = right_document_type.startswith( flags.EMBEDDED_DOCUMENT_NAME_PREFIX) if is_left_embedded != is_right_embedded: continue # Exact match, collection was just renamed. We found it if left_document_schema == right_document_schema: candidates = [(right_document_type, right_document_schema)] break # Count of equal fields and parameters items and then # divide it on whole compared fields/parameters count items = ((left_document_schema, right_document_schema), (left_document_schema.parameters, right_document_schema.parameters)) for left, right in items: all_keys = left.keys() | right.keys() compares += len(all_keys) # FIXME: keys can be functions (default for instance) # they will not be equal then dispite they hasn't change matches += sum(left.get(k) == right.get(k) for k in all_keys) if compares > 0 and (matches / compares * 100) >= cls.similarity_threshold: candidates.append((right_document_type, right_document_schema)) if len(candidates) == 1: return cls(document_type=document_type, new_name=candidates[0][0])
def build_object(cls, document_type: str, left_schema: Schema, right_schema: Schema): # Check if field exists under different name in schema. # Field also can have small schema changes in the same time # So we try to get similarity percentage and if it more than # threshold then we're consider such change as rename/alter. # Otherwise it is drop/create match = document_type in left_schema and document_type not in right_schema if not match: return left_document_schema = left_schema[document_type] candidates = [] matches = 0 compares = 0 for right_document_type, right_document_schema in right_schema.items(): # Skip collections which was not renamed if right_document_type in left_schema: continue # Exact match, collection was just renamed if left_document_schema == right_document_schema: candidates = [(right_document_type, right_document_schema)] break # Try to find collection by its schema similarity # Compares are counted as every field schema comparing common_fields = left_document_schema.keys( ) | right_document_schema.keys() for field_name in common_fields: left_field_schema = left_document_schema.get(field_name, {}) right_field_schema = right_document_schema.get(field_name, {}) common_keys = left_field_schema.keys( ) & right_field_schema.keys() compares += len(common_keys) matches += sum(left_field_schema[k] == right_field_schema[k] for k in common_keys) if compares > 0 and (matches / compares * 100) >= cls.similarity_threshold: candidates.append((right_document_type, right_document_schema)) if len(candidates) == 1: return cls(document_type=document_type, new_name=candidates[0][0])
def _verify_schema(self, schema: Schema): # Check if all derived documents have the same collection as # their parents. # E.g. user could comment/remove AlterDocument(collection=...) # for any derived document, but leave for base one) collections = {} # {top_level_document: collection} for document_type, doc_schema in schema.items(): if 'collection' not in doc_schema.parameters: continue col = doc_schema.parameters['collection'] top_lvl_doc = document_type.split( runtime_flags.DOCUMENT_NAME_SEPARATOR)[0] if top_lvl_doc in collections and collections[top_lvl_doc] != col: log.warning( f'The collection in derived document {document_type} ({col}) ' f'is differ than its base document {top_lvl_doc} ' f'({collections[top_lvl_doc]}). Please fix collection name and rerun ' f'an affected migration') collections.setdefault(top_lvl_doc, col)