def check_xml_validity(lgr, options): """ Serialize the LGR to XML, and validate the XML against the RNG. `options` argument can contain: * rng_filepath: Filepath of the RNG schema used to validate. when rebuilding the LGR. If None is given, use the current one. :param LGR lgr: The LGR to check. :param options: Dictionary of options to the validation function. """ # Local import to prevent import cycles. from lgr.parser.xml_serializer import serialize_lgr_xml from lgr.parser.xml_parser import XMLParser logger.info("Testing XML validity") if 'rng_filepath' not in options: logger.warning("rng_filepath not in 'options' arguments, skipping") return True xml = StringIO(serialize_lgr_xml(lgr)) parser = XMLParser(xml) validation_result = parser.validate_document(options['rng_filepath']) if validation_result is not None: logger.warning('RNG validation failed: XML error is') logger.warning(validation_result) else: logger.info('RNG validation OK') logger.info("Testing XML validity done") return validation_result is None
def get_by_name(repertoire_name): if repertoire_name not in REPERTOIRES: logger.debug("%s parsing file", repertoire_name) repertoire_path = os.path.join(settings.REPERTOIRE_STORAGE_LOCATION, '{}.xml'.format(repertoire_name)) parser = XMLParser(repertoire_path, repertoire_name) doc = parser.parse_document() doc.expand_ranges() # need to get through all code points REPERTOIRES[repertoire_name] = doc return REPERTOIRES[repertoire_name]
def main(): parser = argparse.ArgumentParser(description='LGR Populate Variants CLI') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries', required=True) parser.add_argument('-x', '--lgr-xml', metavar='LGR', help='The LGR to populate', required=True) args = parser.parse_args() log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(stream=sys.stderr, level=log_level) libpath, i18n_libpath, libver = args.libs.split('#') manager = UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) lgr_parser = XMLParser(args.lgr_xml) lgr_parser.unicode_database = unidb if args.rng is not None: validation_result = lgr_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation of LGR file %s: %s', args.lgr_xml, validation_result) lgr = lgr_parser.parse_document() if lgr is None: logger.error("Error while parsing LGR file %s", args.lgr_xml) logger.error("Please check compliance with RNG.") return lgr.populate_variants() print( serialize_lgr_xml(lgr, pretty_print=True, encoding='unicode', xml_declaration=False))
def main(): from lgr.parser.xml_parser import XMLParser from lgr.parser.xml_serializer import serialize_lgr_xml parser = argparse.ArgumentParser( description='Parse and dump a LGR XML file') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-o', '--output', metavar='OUTPUT', help='Optional output file') parser.add_argument('xml', metavar='XML') args = parser.parse_args() logging.basicConfig(stream=sys.stderr, level=logging.DEBUG if args.verbose else logging.INFO) lgr_parser = XMLParser(args.xml) if args.rng is not None: validation_result = lgr_parser.validate_document(args.rng) if validation_result is not None: print(validation_result) return lgr = lgr_parser.parse_document() if args.verbose: for char in lgr.repertoire: print(char) if args.output is not None: xml = serialize_lgr_xml(lgr, pretty_print=True) with io.open(args.output, mode='wb') as output: output.write(xml) else: print( serialize_lgr_xml(lgr, pretty_print=True, encoding='unicode', xml_declaration=False))
def merge_lgr_set(lgr_set, name): """ Merge LGRs from a set :param lgr_set: The list of LGRs in the set :param name: Merged LGR name :return: New LGR (merge of LGR set) """ logger.debug("Merge %s", name) # order LGRs lgr_set.sort(key=lambda x: get_script(x).replace('und-', 'zzz')) # Ensure all unicode version are correct unicode_version = OrderedDict().fromkeys(lgr.metadata.unicode_version for lgr in lgr_set) if len(unicode_version) > 1: logger.warning("Different unicode version in set: %s", unicode_version.keys()) ref_mapping = {} metadata = copy.deepcopy(merge_metadata(lgr_set)) merged_lgr = LGR(name=name, metadata=metadata) previous_scripts = [] for lgr in lgr_set: script = get_script(lgr) lgr.expand_ranges() merge_references(lgr, script, merged_lgr, ref_mapping) merge_chars(lgr, script, merged_lgr, ref_mapping, previous_scripts) merge_actions(lgr, script, merged_lgr, ref_mapping) merge_rules(lgr, script, merged_lgr, ref_mapping) merge_classes(lgr, script, merged_lgr, ref_mapping) previous_scripts.append(script) # XXX As the created merged_lgr is not a valid Python LGR object, # we have to serialize it/parse it to get a valid object. merged_lgr_xml = BytesIO(serialize_lgr_xml(merged_lgr)) lgr_parser = XMLParser(source=merged_lgr_xml, filename=name) return lgr_parser.parse_document()
def main(): parser = argparse.ArgumentParser(description='LGR CLI') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries') parser.add_argument('-v', '--variants', action='store_true', help='Generate variants') parser.add_argument('xml', metavar='XML') args = parser.parse_args() logging.basicConfig(stream=sys.stdout, level=logging.ERROR) lgr_parser = XMLParser(args.xml) unidb = None if args.libs is not None: libpath, i18n_libpath, libver = args.libs.split('#') manager = UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) if args.rng is not None: validation_result = lgr_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation: %s', validation_result) if unidb is not None: lgr_parser.unicode_database = unidb lgr = lgr_parser.parse_document() if lgr is None: logger.error("Error while parsing LGR file.") logger.error("Please check compliance with RNG.") return label_input = codecs.getreader('utf8')(sys.stdin) for label in label_input.read().splitlines(): check_label(lgr, label, args.variants)
def merge_lgrs(input_lgrs, name=None, rng=None, unidb=None): """ Merge LGRs to create a LGR set :param input_lgrs: The LGRs belonging to the set :param name: The merged LGR name :param rng: The RNG file to validate input LGRs :param unidb: The unicode database :return: The merged LGR and the LGRs in the set. """ lgr_set = [] for lgr_file in input_lgrs: lgr_parser = XMLParser(lgr_file) if unidb: lgr_parser.unicode_database = unidb if rng: validation_result = lgr_parser.validate_document(rng) if validation_result is not None: logger.error('Errors for RNG validation of LGR %s: %s', lgr_file, validation_result) lgr = lgr_parser.parse_document() if lgr is None: logger.error("Error while parsing LGR file %s." % lgr_file) logger.error("Please check compliance with RNG.") return lgr_set.append(lgr) if not name: name = 'merged-lgr-set' merged_lgr = merge_lgr_set(lgr_set, name) if unidb: merged_lgr.unicode_database = unidb return merged_lgr, lgr_set
def _parse_lgr(cls, name, xml, validate): # Create parser - Assume xml is unicode data parser = XMLParser(six.BytesIO(xml.encode('utf-8')), name) # Do we need to validate the schema? if validate: validation_result = parser.validate_document(settings.LGR_RNG_FILE) if validation_result is not None: raise LGRValidationException(validation_result) # Some explanations: Parsing the document with an Unicode database takes # more time since there are some Unicode-related checks performed # (IDNA validity, script checking) # Doing these checks for each parsing of the LGR (ie. for each request) # is not really useful. # So we do the following: # - For the first import of the LGR ("validate_cp" is True), # do a full-fledged parsing, enabling all checks. # This will filter out IDNA-invalid codepoints, issue warnings # about out-of script codepoints, etc. # - Otherwise, meaning the LGR is already in the user's session, # we do not set the Unicode database for parsing. However, the database # is still set AFTER the parsing is done in order to validate # user's input (add codepoint, validation of LGR). # Do we need to validate against Unicode? if validate: # Retrieve Unicode version to set appropriate Unicode database unicode_version = parser.unicode_version() parser.unicode_database = unidb.manager.get_db_by_version( unicode_version) # Actually parse document lgr = parser.parse_document() # If we did not set the actual Unicode database, do it now if not validate: # Retrieve Unicode version to set appropriate Unicode database unicode_version = lgr.metadata.unicode_version lgr.unicode_database = unidb.manager.get_db_by_version( unicode_version) return lgr
def main(): parser = argparse.ArgumentParser(description='LGR Annotate CLI') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries', required=True) parser.add_argument('-o', '--output', metavar='OUTPUT_FILE', help='File path to output the annotated labels', required=True) parser.add_argument('-x', '--lgr-xml', metavar='LGR_XML', action='append', required=True, help='The LGR or LGR set if used multiple times') parser.add_argument('-s', '--lgr-script', metavar='LGR_SCRIPT', help='If LGR is a set, the script used to validate input labels') parser.add_argument('-f', '--set-labels', metavar='SET_LABELS', help='If LGR is a set, the file containing the label of the LGR set') parser.add_argument('labels', metavar='LABELS', help='File path to the reference labels to annotate') args = parser.parse_args() log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(stream=sys.stdout, level=log_level) libpath, i18n_libpath, libver = args.libs.split('#') manager = UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) if len(args.lgr_xml) > 1: if not args.lgr_script: logger.error('For LGR set, lgr script is required') return merged_lgr, lgr_set = merge_lgrs(args.lgr_xml, rng=args.rng, unidb=unidb) if not merged_lgr: logger.error('Error while creating the merged LGR') return set_labels = io.StringIO() if args.set_labels: with io.open(args.set_labels, 'r', encoding='utf-8') as set_labels_input: set_labels = io.StringIO(set_labels_input.read()) script_lgr = None for lgr_s in lgr_set: try: if lgr_s.metadata.languages[0] == args.lgr_script: if script_lgr: logger.warning('Script %s is provided in more than one LGR of the set, ' 'will only evaluate with %s', args.lgr_script, lgr_s.name) script_lgr = lgr_s except (AttributeError, IndexError): pass if not script_lgr: logger.error('Cannot find script %s in any of the LGR provided as input', args.lgr_script) return else: lgr_parser = XMLParser(args.lgr_xml[0]) lgr_parser.unicode_database = unidb if args.rng is not None: validation_result = lgr_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation: %s', validation_result) lgr = lgr_parser.parse_document() if lgr is None: logger.error("Error while parsing LGR file.") logger.error("Please check compliance with RNG.") return # Compute index label with io.open(args.labels, 'r', encoding='utf-8') as labels_input: with io.open(args.output, 'w', encoding='utf-8') as labels_output: if len(args.lgr_xml) > 1: for out in lgr_set_annotate(merged_lgr, script_lgr, set_labels, labels_input): labels_output.write(out) else: for out in annotate(lgr, labels_input): labels_output.write(out)
def main(): from lgr.parser.xml_parser import XMLParser parser = argparse.ArgumentParser(description='LGR CLI') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-q', '--quiet', action='store_true', help='Be quiet (no log)') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-m', '--msr', metavar='MSR', help='Validating repertoire') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries') parser.add_argument('-u', '--unicode', metavar='Unicode', default='6.3.0', help='Unicode version') parser.add_argument('-t', '--test', action='store_true', help='Enable automatic test mode') parser.add_argument('-c', '--check', action='store_true', help='Enable label checking') parser.add_argument('-i', '--invalid', action='store_true', help='Do not filter out "invalid" labels') parser.add_argument('xml', metavar='XML') parser.add_argument('label', metavar='LABEL', nargs='?') args = parser.parse_args() # "Disable" logging in test mode except if we ask to be verbose log_level = logging.DEBUG if args.verbose else logging.INFO if args.test and not args.verbose: log_level = logging.ERROR if args.quiet: log_level = logging.CRITICAL logging.basicConfig(stream=sys.stderr, level=log_level, format="%(levelname)s:%(name)s [%(filename)s:%(lineno)s] %(message)s") lgr_parser = XMLParser(args.xml) unidb = None if args.libs is not None: libpath, i18n_libpath, libver = args.libs.split('#') manager = UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) if args.rng is not None: validation_result = lgr_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation: %s', validation_result) msr = None if args.msr is not None: msr_parser = XMLParser(args.msr) msr = msr_parser.parse_document() if unidb is not None: lgr_parser.unicode_database = unidb lgr = lgr_parser.parse_document() if lgr is None: logger.error("Error while parsing LGR file.") logger.error("Please check compliance with RNG.") return options = { 'validating_repertoire': msr, 'unicode_version': args.unicode, } if unidb is not None: options['unidb'] = unidb if not args.test: summary = lgr.validate(options) logger.info('Result of validation: %s', summary) if args.check: if args.label: label_u = ''.join(wide_unichr(int(cphex, 16)) for cphex in args.label.split()) check_label(lgr, label_u, args.invalid, args.test) else: for label in get_stdin().read().splitlines(): logger.info("Label '%s'", label) check_label(lgr, label, args.invalid, args.test)
def main(): parser = argparse.ArgumentParser(description='LGR diff and collision CLI') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries', required=True) parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-1', '--first', metavar='LGR1', help='First LGR', required=True) parser.add_argument('-2', '--second', metavar='LGR2', help='Second LGR', required=False) parser.add_argument('-s', '--set', metavar='SET_FILE', help='Filepath to the set of reference labels', required=True) parser.add_argument('-g', '--generate', action='store_true', help='Generate a full dump') parser.add_argument('-q', '--quiet', action='store_true', help='Do not print rules as it may be very very ' 'verbose (None will be printed instead)') args = parser.parse_args() log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(stream=sys.stdout, level=log_level) libpath, i18n_libpath, libver = args.libs.split('#') manager = UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) lgr1_parser = XMLParser(args.first) lgr1_parser.unicode_database = unidb if args.second is not None: lgr2_parser = XMLParser(args.second) lgr2_parser.unicode_database = unidb else: write_output("No second LGR, will only output collisions") if args.rng is not None: validation_result = lgr1_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation of first LGR: %s', validation_result) if args.second is not None: validation_result = lgr2_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation of second LGR: %s', validation_result) lgr1 = lgr1_parser.parse_document() if lgr1 is None: logger.error("Error while parsing first LGR file.") logger.error("Please check compliance with RNG.") return if args.second is not None: lgr2 = lgr2_parser.parse_document() if lgr2 is None: logger.error("Error while parsing second LGR file.") logger.error("Please check compliance with RNG.") return write_output('Please wait, this can take some time...\n') with io.open(args.set, 'r', encoding='utf-8') as label_input: if args.second is not None: for out in diff(lgr1, lgr2, label_input, True, args.generate, args.quiet): write_output(out) else: for out in collision(lgr1, label_input, args.generate, args.quiet): write_output(out)
def main(): parser = argparse.ArgumentParser(description='LGR Collision') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-g', '--generate', action='store_true', help='Generate variants') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries', required=True) parser.add_argument('-s', '--set', metavar='SET FILE', help='Filepath to the set of reference labels', required=True) parser.add_argument('xml', metavar='XML') args = parser.parse_args() log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(stream=sys.stdout, level=log_level) lgr_parser = XMLParser(args.xml) libpath, i18n_libpath, libver = args.libs.split('#') manager = UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) lgr_parser.unicode_database = unidb lgr = lgr_parser.parse_document() if lgr is None: logger.error("Error while parsing LGR file.") logger.error("Please check compliance with RNG.") return ref_label_indexes = {} # Compute index label for set or reference labels with io.open(args.set, 'r', encoding='utf-8') as ref_set: for ref_label in ref_set: label_cp = tuple([ord(c) for c in ref_label.strip()]) try: label_index = compute_label_index(lgr, label_cp) except NotInLGR: continue ref_label_indexes[label_index] = label_cp # Deal with input for label in get_stdin().read().splitlines(): write_output("Check label '%s'" % label) label_cp = tuple([ord(c) for c in label]) label_disp = format_cp(label_cp) label_index = compute_label_index(lgr, label_cp) if label_index in ref_label_indexes: ref_label_cp = ref_label_indexes[label_index] ref_label_disp = format_cp(ref_label_cp) ref_label_u = cp_to_ulabel(ref_label_cp) write_output("Collision for label '%s' [%s] with '%s' [%s]" % (label, label_disp, ref_label_u, ref_label_disp)) if args.generate: find_variants_to_block(lgr, ref_label_cp, label_cp) else: write_output("No collision for label %s [%s]" % (label, label_disp))
def parse_lgr(filename): parser = XMLParser(os.path.join(RESOURCE_DIR, filename)) return parser.parse_document()
def main(): parser = argparse.ArgumentParser(description='LGR Compare CLI') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-1', '--first', metavar='LGR1', action='append', help='First LGR or LGR set if used multiple times', required=True) parser.add_argument('-2', '--second', metavar='LGR2', action='append', help='Second LGR or LGR set if used multiple times', required=True) parser.add_argument('action', metavar="ACTION", help='Compare action (INTERSECT, UNION, DIFF)', choices=['INTERSECT', 'UNION', 'DIFF']) parser.add_argument('-g', '--generate', action='store_true', help='Generate a full dump (with identical elements as well)') parser.add_argument('-n1', '--name-first', metavar='NAME1', help="Merged LGR 1 name") parser.add_argument('-n2', '--name-second', metavar='NAME2', help="Merged LGR 2 name") args = parser.parse_args() log_level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(stream=sys.stderr, level=log_level) if (len(args.first) == 1 and len(args.second) > 1) or (len(args.second) == 1 and len(args.first) > 1): logger.error("Cannot compare LGR with LGR sets") return logger.info('Please wait, this can take some time...\n') if len(args.first) > 1: if args.action in ['INTERSECT', 'UNION']: logger.error('Cannot perform intersection or union with LGR sets') return merged_lgr_1, lgr_set_1 = merge_lgrs(args.first, name=args.name_first, rng=args.rng) if not merged_lgr_1: return merged_lgr_2, lgr_set_2 = merge_lgrs(args.second, name=args.name_second, rng=args.rng) if not merged_lgr_2: return print(diff_lgr_sets(merged_lgr_1, merged_lgr_2, lgr_set_1, lgr_set_2)) else: lgr1_parser = XMLParser(args.first[0]) lgr2_parser = XMLParser(args.second[0]) if args.rng is not None: validation_result = lgr1_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation of first LGR: %s', validation_result) validation_result = lgr2_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation of second LGR: %s', validation_result) lgr1 = lgr1_parser.parse_document() if lgr1 is None: logger.error("Error while parsing first LGR file.") logger.error("Please check compliance with RNG.") return lgr2 = lgr2_parser.parse_document() if lgr2 is None: logger.error("Error while parsing second LGR file.") logger.error("Please check compliance with RNG.") return if args.action in ['INTERSECT', 'UNION']: if args.action == 'INTERSECT': lgr = intersect_lgrs(lgr1, lgr2) elif args.action == 'UNION': lgr = union_lgrs(lgr1, lgr2) print(serialize_lgr_xml(lgr, pretty_print=True, encoding='unicode', xml_declaration=False)) elif args.action == 'DIFF': print(diff_lgrs(lgr1, lgr2, show_same=args.generate))
def main(): from lgr.parser.xml_parser import XMLParser parser = argparse.ArgumentParser(description='check rfc7940 compliance') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-q', '--quiet', action='store_true', help='Be quiet (no details, no log)') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries') parser.add_argument('-u', '--unicode', metavar='Unicode', default='6.3.0', help='Unicode version') parser.add_argument('-t', '--test', action='store_true', help='Enable automatic test mode') parser.add_argument('xml', metavar='XML') args = parser.parse_args() # "Disable" logging in test mode except if we ask to be verbose log_level = logging.DEBUG if args.verbose else logging.INFO if args.test and not args.verbose: log_level = logging.ERROR if args.quiet: log_level = logging.CRITICAL logging.basicConfig(stream=sys.stderr, level=log_level, format="%(levelname)s:%(name)s [%(filename)s:%(lineno)s] %(message)s") lgr_parser = XMLParser(args.xml, force_mode=False) unidb = None if args.libs is not None: libpath, i18n_libpath, libver = args.libs.split('#') manager = munidata.UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) if unidb is not None: lgr_parser.unicode_database = unidb try: lgr = lgr_parser.parse_document() except: lgr = None if lgr is None: logger.error("Error while parsing LGR file.") logger.error("Please check compliance with RNG.") sys.stdout.write("FAIL\n") return options = { 'unicode_version': args.unicode, 'rfc7940': True } if unidb is not None: options['unidb'] = unidb if args.rng is not None: options['rng_filepath'] = args.rng validation_result = lgr_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation: %s', validation_result) if not args.test: summary = lgr.validate(options) logger.info('Result of validation: %s', summary) policy = dict( validity_end_expiry="ERROR", validity_start_end="ERROR", validity_started="ERROR", metadata_description_type="ERROR", metadata_scope_type="ERROR", metadata_language="ERROR", metadata_version_integer="WARNING", data_variant_type="ERROR", codepoint_valid="ERROR", char_ascending_order="WARNING", char_strict_ascending_order="IGNORE", ref_attribute_ascending="WARNING", standard_dispositions="ERROR", basic_symmetry="WARNING", strict_symmetry="WARNING", basic_transitivity="WARNING", parse_xml="ERROR", schema="ERROR", ) full_report = not args.quiet final_result = lgr.get_rfc7940_validation(policy, verbose=full_report) sys.stdout.write(final_result) sys.stdout.write("\n")
def load_lgr(name): parser = XMLParser( os.path.join(os.path.dirname(__file__), '..', 'inputs', 'harmonization', name)) return parser.parse_document()
def main(): parser = argparse.ArgumentParser(description='LGR Validate CLI') parser.add_argument('-v', '--verbose', action='store_true', help='be verbose') parser.add_argument('-r', '--rng', metavar='RNG', help='RelaxNG XML schema') parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries', required=True) parser.add_argument('-g', '--variants', action='store_true', help='Generate variants') parser.add_argument('-x', '--lgr-xml', metavar='LGR_XML', action='append', required=True, help='The LGR or LGR set if used multiple times') parser.add_argument( '-s', '--lgr-script', metavar='LGR_SCRIPT', help='If LGR is a set, the script used to validate input labels') parser.add_argument( '-f', '--set-labels', metavar='SET_LABELS', help='If LGR is a set, the file containing the label of the LGR set') args = parser.parse_args() log_level = logging.DEBUG if args.verbose else logging.WARNING logging.basicConfig(stream=sys.stdout, level=log_level) libpath, i18n_libpath, libver = args.libs.split('#') manager = UnicodeDataVersionManager() unidb = manager.register(None, libpath, i18n_libpath, libver) if len(args.lgr_xml) > 1: if not args.lgr_script: logger.error('For LGR set, lgr script is required') return if not args.set_labels: logger.error('For LGR set, LGR set labels file is required') return merged_lgr, lgr_set = merge_lgrs(args.lgr_xml, unidb=unidb) if not merged_lgr: logger.error('Error while creating the merged LGR') return with io.open(args.set_labels, 'r', encoding='utf-8') as set_labels_input: set_labels = StringIO(set_labels_input.read()) script_lgr = None for lgr_s in lgr_set: try: if lgr_s.metadata.languages[0] == args.lgr_script: if script_lgr: logger.warning( 'Script %s is provided in more than one LGR of the set, ' 'will only evaluate with %s', args.lgr_script, lgr_s.name) script_lgr = lgr_s except (AttributeError, IndexError): pass if not script_lgr: logger.error( 'Cannot find script %s in any of the LGR provided as input', args.lgr_script) return else: lgr_parser = XMLParser(args.lgr_xml[0]) lgr_parser.unicode_database = unidb if args.rng is not None: validation_result = lgr_parser.validate_document(args.rng) if validation_result is not None: logger.error('Errors for RNG validation: %s', validation_result) lgr = lgr_parser.parse_document() if lgr is None: logger.error("Error while parsing LGR file.") logger.error("Please check compliance with RNG.") return filtered_set_labels = [] if len(args.lgr_xml) > 1: write_output( "# The following labels from the set labels are invalid\n") for label, valid, error in read_labels(set_labels, script_lgr.unicode_database): if not valid: write_output("{}: {}\n".format(label, error)) else: label_cp = tuple([ord(c) for c in label]) if not script_lgr._test_preliminary_eligibility(label_cp)[0]: write_output("%s: Not in LGR %s\n" % label, script_lgr) else: filtered_set_labels.append(label) write_output("# End of filtered set labels\n\n") for label in get_stdin().read().splitlines(): if len(args.lgr_xml) > 1: check_label(script_lgr, label, args.variants, merged_lgr=merged_lgr, set_labels=filtered_set_labels) else: check_label(lgr, label, args.variants)