def intersect_description(first, second): """ Intersect two description objects. :param first: First object to intersect with. :param second: Other object to intersect with. :return: New object. """ # Check that none of the object is None before processing if first is None: return second if second is None: return first if first.description_type == second.description_type: # Same MIME types, can merge content value = let_user_choose(first.value, second.value) description_type = first.description_type else: # MIME types are different, set MIME type to text description_type = 'text/enriched' value = """ Original MIME-type for first description: '{0}'. {1} ---- Original MIME-type for second description: '{2}'. {3} """.format(first.description_type, first.value, second.description_type, second.value) return Description(value, description_type)
def union_version(first, second): """ Union two version objects. :param first: First version object to union. :param second: Other version object to union. :return: New object. """ # Check that none of the object is None before processing if first is None: return second if second is None: return first value = let_user_choose(first.value, second.value) comment = let_user_choose(first.comment, second.comment) return Version(value, comment)
def union_char(lgr, first, second): """ Union two chars with same CP. :param lgr: The produced LGR. :param first: The first object to union. :param second: The other object to union. """ comment = let_user_choose(first.comment, second.comment) tags = set.union(set(first.tags), set(second.tags)) lgr.add_cp(first.cp,comment=comment, tag=tags) for v in set.union(set(first.get_variants()), set(second.get_variants())): lgr.add_variant(first.cp, v.cp, variant_type=v.type, when=v.when, not_when=v.not_when, comment=v.comment)
def intersect_char(lgr, first, second): """ Intersect two chars with same CP. :param lgr: The produced LGR. :param first: The first object to intersect with. :param second: The other object to intersect with. """ comment = let_user_choose(first.comment, second.comment) tags = set.intersection(set(first.tags), set(second.tags)) lgr.add_cp(first.cp, comment=comment, tag=tags) variant_first = {(v.cp, v.when, v.not_when) for v in first.get_variants()} variant_second = {(v.cp, v.when, v.not_when) for v in second.get_variants()} for v in set.intersection(variant_first, variant_second): lgr.add_variant(first.cp, v[0], when=v[1], not_when=v[2])
def merge_chars(lgr, script, merged_lgr, ref_mapping, previous_scripts): """ Merge chars from LGR set. :param lgr: A LGR from the set :param script: The LGR script :param merged_lgr: The merged LGR :param ref_mapping: The reference mapping from base LGR to new LGR :param previous_scripts: The scripts that has already been processed """ new_variants = [] merged_chars = {char: char for char in merged_lgr.repertoire} for char in lgr.repertoire: if len(char.cp) == 1 and lgr.unicode_database is not None: script_extensions = lgr.unicode_database.get_script_extensions( char.cp[0]) else: script_extensions = [] new_tags = set(script + '-' + x if ':' not in x else x for x in char.tags) | set(script_extensions) existing_char = None if char in merged_chars: existing_char = merged_chars[char] if existing_char: # same cp already in LGR existing_char.comment = let_user_choose(existing_char.comment, char.comment) existing_char.tags = list( set.union(set(existing_char.tags), set(new_tags))) existing_char.references = set.union(set(existing_char.references), set(char.references)) # if 2 scripts have different variants on a character, we need to add the variants for script 1 as # variant on script 2 to keep transitivity (e.g. b is variant of a in script 1, c is variant of a in # script 2, we need to set b as c variant and conversely). We do that after processing all code points # as the code point for the new variant may not be in the merged LGR in the current iteration. new_v1 = set.difference(set(char.get_variants()), set(existing_char.get_variants())) new_v2 = set.difference(set(existing_char.get_variants()), set(char.get_variants())) # remove cp itself to avoid error with reflexive variants for v in new_v1: if v.cp == existing_char.cp: new_v1.remove(v) break for v in new_v2: if v.cp == existing_char.cp: new_v2.remove(v) break if new_v1 and new_v2: new_variants.append((new_v1, new_v2)) # add new variants to current code point # do not keep new_v1 as reflexive variant may have been removed for v in set.difference(set(char.get_variants()), set(existing_char.get_variants())): new_ref = [ref_mapping[script].get(x, x) for x in v.references] new_when = None new_not_when = None if v.when: new_when = script + '-' + v.when if v.not_when: new_not_when = script + '-' + v.not_when merged_lgr.add_variant(existing_char.cp, v.cp, variant_type='blocked', when=new_when, not_when=new_not_when, comment=v.comment, ref=new_ref) # existing variants comment or references are not updated as it is not really important # if when or not-when: # - if existing cp has no concurrent rule or conversely, keep the rule as is (i.e. if existing cp has # no rule but cp has one, keep the cp rule with prefixed with the current script) # - if existing cp has the same when/not-when rules (same name, content is not checked), update cp WLE with # the prefix from this script # - if existing cp has a different rule (not the same name), raise an exception existing_when = existing_char.when existing_not_when = existing_char.not_when # retrieve WLE names for other_script in reversed(previous_scripts): if existing_char.when: existing_when = re.sub(r'^{}-'.format(other_script), '', existing_when) if existing_char.not_when: existing_not_when = re.sub(r'^{}-'.format(other_script), '', existing_not_when) if char.when: if not existing_when: existing_char.when = script + '-' + char.when elif existing_when == char.when: existing_char.when = script + '-' + existing_char.when # add a merged rule matching_script = re.sub(r'-{}$'.format(existing_when), '', existing_char.when) merge_rules(lgr, matching_script, merged_lgr, ref_mapping, specific=existing_when) else: raise CharInvalidContextRule(char.cp, char.when) if char.not_when: if not existing_not_when: existing_char.not_when = script + '-' + char.not_when elif existing_not_when == char.not_when: existing_char.not_when = script + '-' + existing_char.not_when # add a merged rule matching_script = re.sub(r'-{}$'.format(existing_not_when), '', existing_char.not_when) merge_rules(lgr, matching_script, merged_lgr, ref_mapping, specific=existing_not_when) else: raise CharInvalidContextRule(char.cp, char.not_when) continue # add new cp in LGR when = None not_when = None if char.when: when = script + '-' + char.when if char.not_when: not_when = script + '-' + char.not_when new_ref = [ ref_mapping.get(script, {}).get(x, x) for x in char.references ] merged_lgr.add_cp(char.cp, comment=char.comment, ref=new_ref, tag=list(new_tags), when=when, not_when=not_when) for v in char.get_variants(): when = None not_when = None if v.when: when = script + '-' + v.when if v.not_when: not_when = script + '-' + v.not_when new_ref = [ref_mapping[script].get(r, r) for r in v.references] merged_lgr.add_variant(char.cp, v.cp, variant_type='blocked', comment=v.comment, ref=new_ref, when=when, not_when=not_when) # handle transitivity for variants that differ between scripts for var1_list, var2_list in new_variants: for v1 in var1_list: for v2 in var2_list: merged_lgr.add_variant( v1.cp, v2.cp, variant_type='blocked', comment='New variant for merge to keep transitivity') merged_lgr.add_variant( v2.cp, v1.cp, variant_type='blocked', comment='New variant for merge to keep transitivity')