Esempio n. 1
0
    def build_object(cls, document_type: str, left_schema: Schema,
                     right_schema: Schema):
        # Check if field exists under different name in schema.
        # Field also can have small schema changes in the same time
        # So we try to get similarity percentage and if it more than
        # threshold then we're consider such change as rename/alter.
        # Otherwise it is drop/create
        match = document_type in left_schema and document_type not in right_schema
        if not match:
            return

        is_left_embedded = document_type.startswith(
            flags.EMBEDDED_DOCUMENT_NAME_PREFIX)
        left_document_schema = left_schema[document_type]
        candidates = []
        for right_document_type, right_document_schema in right_schema.items():
            matches = 0
            compares = 0

            # Skip collections which apparently was not renamed
            if right_document_type in left_schema:
                continue

            # Prevent adding to 'candidates' a right document, which
            # could have same/similar schema but has another type
            # (embedded and usual and vice versa)
            is_right_embedded = right_document_type.startswith(
                flags.EMBEDDED_DOCUMENT_NAME_PREFIX)
            if is_left_embedded != is_right_embedded:
                continue

            # Exact match, collection was just renamed. We found it
            if left_document_schema == right_document_schema:
                candidates = [(right_document_type, right_document_schema)]
                break

            # Count of equal fields and parameters items and then
            # divide it on whole compared fields/parameters count
            items = ((left_document_schema, right_document_schema),
                     (left_document_schema.parameters,
                      right_document_schema.parameters))
            for left, right in items:
                all_keys = left.keys() | right.keys()
                compares += len(all_keys)
                # FIXME: keys can be functions (default for instance)
                #        they will not be equal then dispite they hasn't change
                matches += sum(left.get(k) == right.get(k) for k in all_keys)

            if compares > 0 and (matches / compares *
                                 100) >= cls.similarity_threshold:
                candidates.append((right_document_type, right_document_schema))

        if len(candidates) == 1:
            return cls(document_type=document_type, new_name=candidates[0][0])
Esempio n. 2
0
    def build_object(cls, document_type: str, left_schema: Schema,
                     right_schema: Schema):
        # Check if field exists under different name in schema.
        # Field also can have small schema changes in the same time
        # So we try to get similarity percentage and if it more than
        # threshold then we're consider such change as rename/alter.
        # Otherwise it is drop/create
        match = document_type in left_schema and document_type not in right_schema
        if not match:
            return

        left_document_schema = left_schema[document_type]
        candidates = []
        matches = 0
        compares = 0
        for right_document_type, right_document_schema in right_schema.items():
            # Skip collections which was not renamed
            if right_document_type in left_schema:
                continue

            # Exact match, collection was just renamed
            if left_document_schema == right_document_schema:
                candidates = [(right_document_type, right_document_schema)]
                break

            # Try to find collection by its schema similarity
            # Compares are counted as every field schema comparing
            common_fields = left_document_schema.keys(
            ) | right_document_schema.keys()
            for field_name in common_fields:
                left_field_schema = left_document_schema.get(field_name, {})
                right_field_schema = right_document_schema.get(field_name, {})
                common_keys = left_field_schema.keys(
                ) & right_field_schema.keys()
                compares += len(common_keys)
                matches += sum(left_field_schema[k] == right_field_schema[k]
                               for k in common_keys)

            if compares > 0 and (matches / compares *
                                 100) >= cls.similarity_threshold:
                candidates.append((right_document_type, right_document_schema))

        if len(candidates) == 1:
            return cls(document_type=document_type, new_name=candidates[0][0])
Esempio n. 3
0
    def _verify_schema(self, schema: Schema):
        # Check if all derived documents have the same collection as
        # their parents.
        # E.g. user could comment/remove AlterDocument(collection=...)
        # for any derived document, but leave for base one)
        collections = {}  # {top_level_document: collection}
        for document_type, doc_schema in schema.items():
            if 'collection' not in doc_schema.parameters:
                continue

            col = doc_schema.parameters['collection']
            top_lvl_doc = document_type.split(
                runtime_flags.DOCUMENT_NAME_SEPARATOR)[0]
            if top_lvl_doc in collections and collections[top_lvl_doc] != col:
                log.warning(
                    f'The collection in derived document {document_type} ({col}) '
                    f'is differ than its base document {top_lvl_doc} '
                    f'({collections[top_lvl_doc]}). Please fix collection name and rerun '
                    f'an affected migration')
            collections.setdefault(top_lvl_doc, col)