예제 #1
0
파일: intersect.py 프로젝트: g11r/lgr-core
def intersect_description(first, second):
    """
    Intersect two description objects.
    :param first: First object to intersect with.
    :param second: Other object to intersect with.
    :return: New object.
    """
    # Check that none of the object is None before processing
    if first is None:
        return second
    if second is None:
        return first

    if first.description_type == second.description_type:
        # Same MIME types, can merge content
        value = let_user_choose(first.value, second.value)
        description_type = first.description_type
    else:
        # MIME types are different, set MIME type to text
        description_type = 'text/enriched'
        value = """
Original MIME-type for first description: '{0}'.
{1}

----

Original MIME-type for second description: '{2}'.
{3}
""".format(first.description_type, first.value, second.description_type,
           second.value)

    return Description(value, description_type)
예제 #2
0
파일: union.py 프로젝트: j-bernard/lgr-core
def union_version(first, second):
    """
    Union two version objects.
    :param first: First version object to union.
    :param second: Other version object to union.
    :return: New object.
    """
    # Check that none of the object is None before processing
    if first is None:
        return second
    if second is None:
        return first

    value = let_user_choose(first.value, second.value)
    comment = let_user_choose(first.comment, second.comment)

    return Version(value, comment)
예제 #3
0
파일: union.py 프로젝트: j-bernard/lgr-core
def union_char(lgr, first, second):
    """
    Union two chars with same CP.

    :param lgr: The produced LGR.
    :param first: The first object to union.
    :param second: The other object to union.
    """
    comment = let_user_choose(first.comment, second.comment)

    tags = set.union(set(first.tags), set(second.tags))

    lgr.add_cp(first.cp,comment=comment, tag=tags)

    for v in set.union(set(first.get_variants()), set(second.get_variants())):
        lgr.add_variant(first.cp, v.cp, variant_type=v.type,
                        when=v.when, not_when=v.not_when,
                        comment=v.comment)
예제 #4
0
파일: intersect.py 프로젝트: g11r/lgr-core
def intersect_char(lgr, first, second):
    """
    Intersect two chars with same CP.

    :param lgr: The produced LGR.
    :param first: The first object to intersect with.
    :param second: The other object to intersect with.
    """
    comment = let_user_choose(first.comment, second.comment)

    tags = set.intersection(set(first.tags), set(second.tags))

    lgr.add_cp(first.cp, comment=comment, tag=tags)

    variant_first = {(v.cp, v.when, v.not_when) for v in first.get_variants()}
    variant_second = {(v.cp, v.when, v.not_when)
                      for v in second.get_variants()}

    for v in set.intersection(variant_first, variant_second):
        lgr.add_variant(first.cp, v[0], when=v[1], not_when=v[2])
예제 #5
0
파일: merge_set.py 프로젝트: g11r/lgr-core
def merge_chars(lgr, script, merged_lgr, ref_mapping, previous_scripts):
    """
    Merge chars from LGR set.

    :param lgr: A LGR from the set
    :param script: The LGR script
    :param merged_lgr: The merged LGR
    :param ref_mapping: The reference mapping from base LGR to new LGR
    :param previous_scripts: The scripts that has already been processed
    """
    new_variants = []
    merged_chars = {char: char for char in merged_lgr.repertoire}
    for char in lgr.repertoire:
        if len(char.cp) == 1 and lgr.unicode_database is not None:
            script_extensions = lgr.unicode_database.get_script_extensions(
                char.cp[0])
        else:
            script_extensions = []
        new_tags = set(script + '-' + x if ':' not in x else x
                       for x in char.tags) | set(script_extensions)
        existing_char = None
        if char in merged_chars:
            existing_char = merged_chars[char]

        if existing_char:
            # same cp already in LGR
            existing_char.comment = let_user_choose(existing_char.comment,
                                                    char.comment)
            existing_char.tags = list(
                set.union(set(existing_char.tags), set(new_tags)))
            existing_char.references = set.union(set(existing_char.references),
                                                 set(char.references))

            # if 2 scripts have different variants on a character, we need to add the variants for script 1 as
            # variant on script 2 to keep transitivity (e.g. b is variant of a in script 1, c is variant of a in
            # script 2, we need to set b as c variant and conversely). We do that after processing all code points
            # as the code point for the new variant may not be in the merged LGR in the current iteration.
            new_v1 = set.difference(set(char.get_variants()),
                                    set(existing_char.get_variants()))
            new_v2 = set.difference(set(existing_char.get_variants()),
                                    set(char.get_variants()))
            # remove cp itself to avoid error with reflexive variants
            for v in new_v1:
                if v.cp == existing_char.cp:
                    new_v1.remove(v)
                    break
            for v in new_v2:
                if v.cp == existing_char.cp:
                    new_v2.remove(v)
                    break
            if new_v1 and new_v2:
                new_variants.append((new_v1, new_v2))

            # add new variants to current code point
            # do not keep new_v1 as reflexive variant may have been removed
            for v in set.difference(set(char.get_variants()),
                                    set(existing_char.get_variants())):
                new_ref = [ref_mapping[script].get(x, x) for x in v.references]
                new_when = None
                new_not_when = None
                if v.when:
                    new_when = script + '-' + v.when
                if v.not_when:
                    new_not_when = script + '-' + v.not_when
                merged_lgr.add_variant(existing_char.cp,
                                       v.cp,
                                       variant_type='blocked',
                                       when=new_when,
                                       not_when=new_not_when,
                                       comment=v.comment,
                                       ref=new_ref)
                # existing variants comment or references are not updated as it is not really important

            # if when or not-when:
            #  - if existing cp has no concurrent rule or conversely, keep the rule as is (i.e. if existing cp has
            #    no rule but cp has one, keep the cp rule with prefixed with the current script)
            #  - if existing cp has the same when/not-when rules (same name, content is not checked), update cp WLE with
            #    the prefix from this script
            #  - if existing cp has a different rule (not the same name), raise an exception
            existing_when = existing_char.when
            existing_not_when = existing_char.not_when
            # retrieve WLE names
            for other_script in reversed(previous_scripts):
                if existing_char.when:
                    existing_when = re.sub(r'^{}-'.format(other_script), '',
                                           existing_when)
                if existing_char.not_when:
                    existing_not_when = re.sub(r'^{}-'.format(other_script),
                                               '', existing_not_when)

            if char.when:
                if not existing_when:
                    existing_char.when = script + '-' + char.when
                elif existing_when == char.when:
                    existing_char.when = script + '-' + existing_char.when
                    # add a merged rule
                    matching_script = re.sub(r'-{}$'.format(existing_when), '',
                                             existing_char.when)
                    merge_rules(lgr,
                                matching_script,
                                merged_lgr,
                                ref_mapping,
                                specific=existing_when)
                else:
                    raise CharInvalidContextRule(char.cp, char.when)

            if char.not_when:
                if not existing_not_when:
                    existing_char.not_when = script + '-' + char.not_when
                elif existing_not_when == char.not_when:
                    existing_char.not_when = script + '-' + existing_char.not_when
                    # add a merged rule
                    matching_script = re.sub(r'-{}$'.format(existing_not_when),
                                             '', existing_char.not_when)
                    merge_rules(lgr,
                                matching_script,
                                merged_lgr,
                                ref_mapping,
                                specific=existing_not_when)
                else:
                    raise CharInvalidContextRule(char.cp, char.not_when)

            continue

        # add new cp in LGR
        when = None
        not_when = None
        if char.when:
            when = script + '-' + char.when
        if char.not_when:
            not_when = script + '-' + char.not_when

        new_ref = [
            ref_mapping.get(script, {}).get(x, x) for x in char.references
        ]
        merged_lgr.add_cp(char.cp,
                          comment=char.comment,
                          ref=new_ref,
                          tag=list(new_tags),
                          when=when,
                          not_when=not_when)
        for v in char.get_variants():
            when = None
            not_when = None
            if v.when:
                when = script + '-' + v.when
            if v.not_when:
                not_when = script + '-' + v.not_when

            new_ref = [ref_mapping[script].get(r, r) for r in v.references]
            merged_lgr.add_variant(char.cp,
                                   v.cp,
                                   variant_type='blocked',
                                   comment=v.comment,
                                   ref=new_ref,
                                   when=when,
                                   not_when=not_when)

    # handle transitivity for variants that differ between scripts
    for var1_list, var2_list in new_variants:
        for v1 in var1_list:
            for v2 in var2_list:
                merged_lgr.add_variant(
                    v1.cp,
                    v2.cp,
                    variant_type='blocked',
                    comment='New variant for merge to keep transitivity')
                merged_lgr.add_variant(
                    v2.cp,
                    v1.cp,
                    variant_type='blocked',
                    comment='New variant for merge to keep transitivity')