def main(): from lgr.parser.rfc3743_parser import RFC3743Parser from lgr.parser.xml_serializer import serialize_lgr_xml parser = argparse.ArgumentParser( description='Parse and dump a RFC3743 file') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-o', '--output', metavar='OUTPUT', help='Optional output file') parser.add_argument('file', metavar='FILE') args = parser.parse_args() logging.basicConfig(stream=sys.stderr, level=logging.DEBUG if args.verbose else logging.INFO) rfc_parser = RFC3743Parser(args.file) lgr = rfc_parser.parse_document() if args.output is not None: xml = serialize_lgr_xml(lgr, pretty_print=True) with io.open(args.output, mode='wb') as output: output.write(xml) else: print( serialize_lgr_xml(lgr, pretty_print=True, encoding='unicode', xml_declaration=False))
def main(): from lgr.parser.xml_parser import XMLParser from lgr.parser.xml_serializer import serialize_lgr_xml parser = argparse.ArgumentParser( description='Parse and dump a LGR XML file') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-o', '--output', metavar='OUTPUT', help='Optional output file') parser.add_argument('xml', metavar='XML') args = parser.parse_args() logging.basicConfig(stream=sys.stderr, level=logging.DEBUG if args.verbose else logging.INFO) lgr_parser = XMLParser(args.xml) if args.rng is not None: validation_result = lgr_parser.validate_document(args.rng) if validation_result is not None: print(validation_result) return lgr = lgr_parser.parse_document() if args.verbose: for char in lgr.repertoire: print(char) if args.output is not None: xml = serialize_lgr_xml(lgr, pretty_print=True) with io.open(args.output, mode='wb') as output: output.write(xml) else: print( serialize_lgr_xml(lgr, pretty_print=True, encoding='unicode', xml_declaration=False))
def main(): from lgr.parser.line_parser import parse_document from lgr.parser.xml_serializer import serialize_lgr_xml parser = argparse.ArgumentParser( description='Parse and dump a "one codepoint per line" file') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-o', '--output', metavar='OUTPUT', help='Optional output file') parser.add_argument('file', metavar='FILE') args = parser.parse_args() logging.basicConfig(stream=sys.stderr, level=logging.DEBUG if args.verbose else logging.INFO) lgr = parse_document(args.file) xml = serialize_lgr_xml(lgr, pretty_print=True) if args.output is not None: with io.open(args.output, mode='wb') as output: output.write(xml) else: print(xml)
def check_xml_validity(lgr, options): """ Serialize the LGR to XML, and validate the XML against the RNG. `options` argument can contain: * rng_filepath: Filepath of the RNG schema used to validate. when rebuilding the LGR. If None is given, use the current one. :param LGR lgr: The LGR to check. :param options: Dictionary of options to the validation function. """ # Local import to prevent import cycles. from lgr.parser.xml_serializer import serialize_lgr_xml from lgr.parser.xml_parser import XMLParser logger.info("Testing XML validity") if 'rng_filepath' not in options: logger.warning("rng_filepath not in 'options' arguments, skipping") return True xml = StringIO(serialize_lgr_xml(lgr)) parser = XMLParser(xml) validation_result = parser.validate_document(options['rng_filepath']) if validation_result is not None: logger.warning('RNG validation failed: XML error is') logger.warning(validation_result) else: logger.info('RNG validation OK') logger.info("Testing XML validity done") return validation_result is None
def main(): parser = argparse.ArgumentParser(description='LGR Populate Variants CLI') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries', required=True) parser.add_argument('-x', '--lgr-xml', metavar='LGR', help='The LGR to populate', required=True) args = parser.parse_args() log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(stream=sys.stderr, level=log_level) libpath, i18n_libpath, libver = args.libs.split('#') manager = UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) lgr_parser = XMLParser(args.lgr_xml) lgr_parser.unicode_database = unidb if args.rng is not None: validation_result = lgr_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation of LGR file %s: %s', args.lgr_xml, validation_result) lgr = lgr_parser.parse_document() if lgr is None: logger.error("Error while parsing LGR file %s", args.lgr_xml) logger.error("Please check compliance with RNG.") return lgr.populate_variants() print( serialize_lgr_xml(lgr, pretty_print=True, encoding='unicode', xml_declaration=False))
def make_idna_repertoire(version): """ Make a repertoire from IDNA tables. Parse IDNA table registry, convert it to an LGR XML format, and output it on stdout. Input: * version: The unicode version to use. """ from lgr.core import LGR from lgr.parser.xml_serializer import serialize_lgr_xml lgr = LGR('idna2008-%s' % version) idna_url = IDNATABLES_URL.format(version=version) logger.debug("Fetching and parsing '%s'", idna_url) registry = etree.parse(idna_url) # To keep '{}' when string-formatting namespace = "{{{0}}}".format(IDNATABLES_NS) registry_id = "idna-tables-properties" if list(map(int, version.split('.'))) <= [6, 0, 0]: registry_id = "idna-tables-{}-properties".format(version) record_xpath = '{0}registry[@id="{1}"]/{0}record'.format( namespace, registry_id) for record in registry.findall(record_xpath): codepoint = record.find(CODEPOINT_TAG).text prop = record.find(PROPERTY_TAG).text if prop not in ['PVALID', 'CONTEXTO', 'CONTEXTJ']: continue if codepoint.find('-') > 0: # Codepoint is a range (first_cp, last_cp) = [int(c, 16) for c in codepoint.split('-')] lgr.add_range(first_cp, last_cp) else: # Single codepoint lgr.add_cp(int(codepoint, 16)) lgr_root = serialize_lgr_xml(lgr, pretty_print=True, encoding='unicode', xml_declaration=False) print(lgr_root)
def merge_lgr_set(lgr_set, name): """ Merge LGRs from a set :param lgr_set: The list of LGRs in the set :param name: Merged LGR name :return: New LGR (merge of LGR set) """ logger.debug("Merge %s", name) # order LGRs lgr_set.sort(key=lambda x: get_script(x).replace('und-', 'zzz')) # Ensure all unicode version are correct unicode_version = OrderedDict().fromkeys(lgr.metadata.unicode_version for lgr in lgr_set) if len(unicode_version) > 1: logger.warning("Different unicode version in set: %s", unicode_version.keys()) ref_mapping = {} metadata = copy.deepcopy(merge_metadata(lgr_set)) merged_lgr = LGR(name=name, metadata=metadata) previous_scripts = [] for lgr in lgr_set: script = get_script(lgr) lgr.expand_ranges() merge_references(lgr, script, merged_lgr, ref_mapping) merge_chars(lgr, script, merged_lgr, ref_mapping, previous_scripts) merge_actions(lgr, script, merged_lgr, ref_mapping) merge_rules(lgr, script, merged_lgr, ref_mapping) merge_classes(lgr, script, merged_lgr, ref_mapping) previous_scripts.append(script) # XXX As the created merged_lgr is not a valid Python LGR object, # we have to serialize it/parse it to get a valid object. merged_lgr_xml = BytesIO(serialize_lgr_xml(merged_lgr)) lgr_parser = XMLParser(source=merged_lgr_xml, filename=name) return lgr_parser.parse_document()
def main(): parser = argparse.ArgumentParser(description='LGR diff and collision CLI') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-n', '--name', metavar='NAME', help="Merged LGR name") parser.add_argument('-s', '--lgr-set', metavar='LGR-SET', action='append', help='LGR in the set (can be used multiple times)', required=True) args = parser.parse_args() log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(stream=sys.stdout, level=log_level) unidb = None if args.libs: libpath, i18n_libpath, libver = args.libs.split('#') manager = UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) if len(args.lgr_set) == 1: logger.error("Please provide more than one LGR to make a set") return logger.warning('Please wait, this can take some time...\n') merged_lgr, _ = merge_lgrs(args.lgr_set, name=args.name, rng=args.rng, unidb=unidb) if not merged_lgr: return print(serialize_lgr_xml(merged_lgr, pretty_print=True))
def session_merge_set(request, lgr_info_set, lgr_set_name): """ Merge some LGR to build a set. :param request: Django request object :param lgr_info_set: The list of LGRInfo objects in the set :param lgr_set_name: The name of the LGR set :return: The LGR set merge id """ merged_lgr = merge_lgr_set([l.lgr for l in lgr_info_set], lgr_set_name) merged_id = slugify(merged_lgr.name) merged_lgr_xml = serialize_lgr_xml(merged_lgr) session_open_lgr(request, merged_id, merged_lgr_xml, validating_repertoire_name=None, validate=True, lgr_set=lgr_info_set) return merged_id
def update_xml(self, pretty_print=False): # if something was changed in `lgr`, calling this will re-generate the xml self.xml = serialize_lgr_xml(self.lgr, pretty_print=pretty_print)
def main(): parser = argparse.ArgumentParser(description='LGR Compare CLI') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-1', '--first', metavar='LGR1', action='append', help='First LGR or LGR set if used multiple times', required=True) parser.add_argument('-2', '--second', metavar='LGR2', action='append', help='Second LGR or LGR set if used multiple times', required=True) parser.add_argument('action', metavar="ACTION", help='Compare action (INTERSECT, UNION, DIFF)', choices=['INTERSECT', 'UNION', 'DIFF']) parser.add_argument('-g', '--generate', action='store_true', help='Generate a full dump (with identical elements as well)') parser.add_argument('-n1', '--name-first', metavar='NAME1', help="Merged LGR 1 name") parser.add_argument('-n2', '--name-second', metavar='NAME2', help="Merged LGR 2 name") args = parser.parse_args() log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(stream=sys.stderr, level=log_level) if (len(args.first) == 1 and len(args.second) > 1) or (len(args.second) == 1 and len(args.first) > 1): logger.error("Cannot compare LGR with LGR sets") return logger.info('Please wait, this can take some time...\n') if len(args.first) > 1: if args.action in ['INTERSECT', 'UNION']: logger.error('Cannot perform intersection or union with LGR sets') return merged_lgr_1, lgr_set_1 = merge_lgrs(args.first, name=args.name_first, rng=args.rng) if not merged_lgr_1: return merged_lgr_2, lgr_set_2 = merge_lgrs(args.second, name=args.name_second, rng=args.rng) if not merged_lgr_2: return print(diff_lgr_sets(merged_lgr_1, merged_lgr_2, lgr_set_1, lgr_set_2)) else: lgr1_parser = XMLParser(args.first[0]) lgr2_parser = XMLParser(args.second[0]) if args.rng is not None: validation_result = lgr1_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation of first LGR: %s', validation_result) validation_result = lgr2_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation of second LGR: %s', validation_result) lgr1 = lgr1_parser.parse_document() if lgr1 is None: logger.error("Error while parsing first LGR file.") logger.error("Please check compliance with RNG.") return lgr2 = lgr2_parser.parse_document() if lgr2 is None: logger.error("Error while parsing second LGR file.") logger.error("Please check compliance with RNG.") return if args.action in ['INTERSECT', 'UNION']: if args.action == 'INTERSECT': lgr = intersect_lgrs(lgr1, lgr2) elif args.action == 'UNION': lgr = union_lgrs(lgr1, lgr2) print(serialize_lgr_xml(lgr, pretty_print=True, encoding='unicode', xml_declaration=False)) elif args.action == 'DIFF': print(diff_lgrs(lgr1, lgr2, show_same=args.generate))