Ejemplo n.º 1
0
def check_xml_validity(lgr, options):
    """
    Serialize the LGR to XML, and validate the XML against the RNG.

    `options` argument can contain:
        * rng_filepath: Filepath of the RNG schema used to validate.
          when rebuilding the LGR. If None is given, use the current one.

    :param LGR lgr: The LGR to check.
    :param options: Dictionary of options to the validation function.
    """
    # Local import to prevent import cycles.
    from lgr.parser.xml_serializer import serialize_lgr_xml
    from lgr.parser.xml_parser import XMLParser

    logger.info("Testing XML validity")

    if 'rng_filepath' not in options:
        logger.warning("rng_filepath not in 'options' arguments, skipping")
        return True

    xml = StringIO(serialize_lgr_xml(lgr))
    parser = XMLParser(xml)

    validation_result = parser.validate_document(options['rng_filepath'])
    if validation_result is not None:
        logger.warning('RNG validation failed: XML error is')
        logger.warning(validation_result)
    else:
        logger.info('RNG validation OK')

    logger.info("Testing XML validity done")

    return validation_result is None
Ejemplo n.º 2
0
def get_by_name(repertoire_name):
    if repertoire_name not in REPERTOIRES:
        logger.debug("%s parsing file", repertoire_name)
        repertoire_path = os.path.join(settings.REPERTOIRE_STORAGE_LOCATION,
                                       '{}.xml'.format(repertoire_name))
        parser = XMLParser(repertoire_path, repertoire_name)
        doc = parser.parse_document()
        doc.expand_ranges()  # need to get through all code points
        REPERTOIRES[repertoire_name] = doc

    return REPERTOIRES[repertoire_name]
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description='LGR Populate Variants CLI')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='be verbose')
    parser.add_argument('-r',
                        '--rng',
                        metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-l',
                        '--libs',
                        metavar='LIBS',
                        help='ICU libraries',
                        required=True)
    parser.add_argument('-x',
                        '--lgr-xml',
                        metavar='LGR',
                        help='The LGR to populate',
                        required=True)

    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(stream=sys.stderr, level=log_level)

    libpath, i18n_libpath, libver = args.libs.split('#')
    manager = UnicodeDataVersionManager()
    unidb = manager.register(None, libpath, i18n_libpath, libver)

    lgr_parser = XMLParser(args.lgr_xml)
    lgr_parser.unicode_database = unidb

    if args.rng is not None:
        validation_result = lgr_parser.validate_document(args.rng)
        if validation_result is not None:
            logger.error('Errors for RNG validation of LGR file %s: %s',
                         args.lgr_xml, validation_result)

    lgr = lgr_parser.parse_document()
    if lgr is None:
        logger.error("Error while parsing LGR file %s", args.lgr_xml)
        logger.error("Please check compliance with RNG.")
        return

    lgr.populate_variants()
    print(
        serialize_lgr_xml(lgr,
                          pretty_print=True,
                          encoding='unicode',
                          xml_declaration=False))
Ejemplo n.º 4
0
def main():
    from lgr.parser.xml_parser import XMLParser
    from lgr.parser.xml_serializer import serialize_lgr_xml

    parser = argparse.ArgumentParser(
        description='Parse and dump a LGR XML file')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='be verbose')
    parser.add_argument('-r',
                        '--rng',
                        metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-o',
                        '--output',
                        metavar='OUTPUT',
                        help='Optional output file')
    parser.add_argument('xml', metavar='XML')

    args = parser.parse_args()

    logging.basicConfig(stream=sys.stderr,
                        level=logging.DEBUG if args.verbose else logging.INFO)

    lgr_parser = XMLParser(args.xml)

    if args.rng is not None:
        validation_result = lgr_parser.validate_document(args.rng)
        if validation_result is not None:
            print(validation_result)
            return

    lgr = lgr_parser.parse_document()

    if args.verbose:
        for char in lgr.repertoire:
            print(char)

    if args.output is not None:
        xml = serialize_lgr_xml(lgr, pretty_print=True)
        with io.open(args.output, mode='wb') as output:
            output.write(xml)
    else:
        print(
            serialize_lgr_xml(lgr,
                              pretty_print=True,
                              encoding='unicode',
                              xml_declaration=False))
Ejemplo n.º 5
0
def merge_lgr_set(lgr_set, name):
    """
    Merge LGRs from a set

    :param lgr_set: The list of LGRs in the set
    :param name: Merged LGR name
    :return: New LGR (merge of LGR set)
    """
    logger.debug("Merge %s", name)

    # order LGRs
    lgr_set.sort(key=lambda x: get_script(x).replace('und-', 'zzz'))

    # Ensure all unicode version are correct
    unicode_version = OrderedDict().fromkeys(lgr.metadata.unicode_version
                                             for lgr in lgr_set)
    if len(unicode_version) > 1:
        logger.warning("Different unicode version in set: %s",
                       unicode_version.keys())

    ref_mapping = {}
    metadata = copy.deepcopy(merge_metadata(lgr_set))
    merged_lgr = LGR(name=name, metadata=metadata)
    previous_scripts = []
    for lgr in lgr_set:
        script = get_script(lgr)
        lgr.expand_ranges()

        merge_references(lgr, script, merged_lgr, ref_mapping)
        merge_chars(lgr, script, merged_lgr, ref_mapping, previous_scripts)
        merge_actions(lgr, script, merged_lgr, ref_mapping)
        merge_rules(lgr, script, merged_lgr, ref_mapping)
        merge_classes(lgr, script, merged_lgr, ref_mapping)
        previous_scripts.append(script)

    # XXX As the created merged_lgr is not a valid Python LGR object,
    # we have to serialize it/parse it to get a valid object.

    merged_lgr_xml = BytesIO(serialize_lgr_xml(merged_lgr))

    lgr_parser = XMLParser(source=merged_lgr_xml, filename=name)

    return lgr_parser.parse_document()
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(description='LGR CLI')
    parser.add_argument('-r', '--rng', metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-l', '--libs', metavar='LIBS',
                        help='ICU libraries')
    parser.add_argument('-v', '--variants', action='store_true',
                        help='Generate variants')
    parser.add_argument('xml', metavar='XML')

    args = parser.parse_args()

    logging.basicConfig(stream=sys.stdout, level=logging.ERROR)

    lgr_parser = XMLParser(args.xml)

    unidb = None
    if args.libs is not None:
        libpath, i18n_libpath, libver = args.libs.split('#')
        manager = UnicodeDataVersionManager()
        unidb = manager.register(None, libpath, i18n_libpath, libver)

    if args.rng is not None:
        validation_result = lgr_parser.validate_document(args.rng)
        if validation_result is not None:
            logger.error('Errors for RNG validation: %s', validation_result)

    if unidb is not None:
        lgr_parser.unicode_database = unidb

    lgr = lgr_parser.parse_document()
    if lgr is None:
        logger.error("Error while parsing LGR file.")
        logger.error("Please check compliance with RNG.")
        return

    label_input = codecs.getreader('utf8')(sys.stdin)

    for label in label_input.read().splitlines():
        check_label(lgr, label, args.variants)
Ejemplo n.º 7
0
def merge_lgrs(input_lgrs, name=None, rng=None, unidb=None):
    """
    Merge LGRs to create a LGR set

    :param input_lgrs: The LGRs belonging to the set
    :param name: The merged LGR name
    :param rng: The RNG file to validate input LGRs
    :param unidb: The unicode database
    :return: The merged LGR and the LGRs in the set.
    """
    lgr_set = []
    for lgr_file in input_lgrs:
        lgr_parser = XMLParser(lgr_file)
        if unidb:
            lgr_parser.unicode_database = unidb

        if rng:
            validation_result = lgr_parser.validate_document(rng)
            if validation_result is not None:
                logger.error('Errors for RNG validation of LGR %s: %s',
                             lgr_file, validation_result)

        lgr = lgr_parser.parse_document()
        if lgr is None:
            logger.error("Error while parsing LGR file %s." % lgr_file)
            logger.error("Please check compliance with RNG.")
            return

        lgr_set.append(lgr)

    if not name:
        name = 'merged-lgr-set'

    merged_lgr = merge_lgr_set(lgr_set, name)
    if unidb:
        merged_lgr.unicode_database = unidb

    return merged_lgr, lgr_set
Ejemplo n.º 8
0
    def _parse_lgr(cls, name, xml, validate):
        # Create parser - Assume xml is unicode data
        parser = XMLParser(six.BytesIO(xml.encode('utf-8')), name)

        # Do we need to validate the schema?
        if validate:
            validation_result = parser.validate_document(settings.LGR_RNG_FILE)
            if validation_result is not None:
                raise LGRValidationException(validation_result)

        # Some explanations: Parsing the document with an Unicode database takes
        # more time since there are some Unicode-related checks performed
        # (IDNA validity, script checking)
        # Doing these checks for each parsing of the LGR (ie. for each request)
        # is not really useful.
        # So we do the following:
        # - For the first import of the LGR ("validate_cp" is True),
        # do a full-fledged parsing, enabling all checks.
        # This will filter out IDNA-invalid codepoints, issue warnings
        # about out-of script codepoints, etc.
        # - Otherwise, meaning the LGR is already in the user's session,
        # we do not set the Unicode database for parsing. However, the database
        # is still set AFTER the parsing is done in order to validate
        # user's input (add codepoint, validation of LGR).

        # Do we need to validate against Unicode?
        if validate:
            # Retrieve Unicode version to set appropriate Unicode database
            unicode_version = parser.unicode_version()
            parser.unicode_database = unidb.manager.get_db_by_version(
                unicode_version)

        # Actually parse document
        lgr = parser.parse_document()

        # If we did not set the actual Unicode database, do it now
        if not validate:
            # Retrieve Unicode version to set appropriate Unicode database
            unicode_version = lgr.metadata.unicode_version
            lgr.unicode_database = unidb.manager.get_db_by_version(
                unicode_version)
        return lgr
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser(description='LGR Annotate CLI')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='be verbose')
    parser.add_argument('-r', '--rng', metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-l', '--libs', metavar='LIBS',
                        help='ICU libraries', required=True)
    parser.add_argument('-o', '--output', metavar='OUTPUT_FILE',
                        help='File path to output the annotated labels',
                        required=True)
    parser.add_argument('-x', '--lgr-xml', metavar='LGR_XML', action='append', required=True,
                        help='The LGR or LGR set if used multiple times')
    parser.add_argument('-s', '--lgr-script', metavar='LGR_SCRIPT',
                        help='If LGR is a set, the script used to validate input labels')
    parser.add_argument('-f', '--set-labels', metavar='SET_LABELS',
                        help='If LGR is a set, the file containing the label of the LGR set')
    parser.add_argument('labels', metavar='LABELS', help='File path to the reference labels to annotate')
    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(stream=sys.stdout, level=log_level)

    libpath, i18n_libpath, libver = args.libs.split('#')
    manager = UnicodeDataVersionManager()
    unidb = manager.register(None, libpath, i18n_libpath, libver)

    if len(args.lgr_xml) > 1:
        if not args.lgr_script:
            logger.error('For LGR set, lgr script is required')
            return

        merged_lgr, lgr_set = merge_lgrs(args.lgr_xml,
                                         rng=args.rng,
                                         unidb=unidb)
        if not merged_lgr:
            logger.error('Error while creating the merged LGR')
            return

        set_labels = io.StringIO()
        if args.set_labels:
            with io.open(args.set_labels, 'r', encoding='utf-8') as set_labels_input:
                set_labels = io.StringIO(set_labels_input.read())

        script_lgr = None
        for lgr_s in lgr_set:
            try:
                if lgr_s.metadata.languages[0] == args.lgr_script:
                    if script_lgr:
                        logger.warning('Script %s is provided in more than one LGR of the set, '
                                       'will only evaluate with %s', args.lgr_script, lgr_s.name)
                    script_lgr = lgr_s
            except (AttributeError, IndexError):
                pass

        if not script_lgr:
            logger.error('Cannot find script %s in any of the LGR provided as input', args.lgr_script)
            return
    else:
        lgr_parser = XMLParser(args.lgr_xml[0])
        lgr_parser.unicode_database = unidb

        if args.rng is not None:
            validation_result = lgr_parser.validate_document(args.rng)
            if validation_result is not None:
                logger.error('Errors for RNG validation: %s', validation_result)

        lgr = lgr_parser.parse_document()
        if lgr is None:
            logger.error("Error while parsing LGR file.")
            logger.error("Please check compliance with RNG.")
            return

    # Compute index label
    with io.open(args.labels, 'r', encoding='utf-8') as labels_input:
        with io.open(args.output, 'w', encoding='utf-8') as labels_output:
            if len(args.lgr_xml) > 1:
                for out in lgr_set_annotate(merged_lgr, script_lgr, set_labels, labels_input):
                    labels_output.write(out)
            else:
                for out in annotate(lgr, labels_input):
                    labels_output.write(out)
Ejemplo n.º 10
0
def main():
    from lgr.parser.xml_parser import XMLParser

    parser = argparse.ArgumentParser(description='LGR CLI')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='be verbose')
    parser.add_argument('-q', '--quiet', action='store_true',
                        help='Be quiet (no log)')
    parser.add_argument('-r', '--rng', metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-m', '--msr', metavar='MSR',
                        help='Validating repertoire')
    parser.add_argument('-l', '--libs', metavar='LIBS',
                        help='ICU libraries')
    parser.add_argument('-u', '--unicode', metavar='Unicode',
                        default='6.3.0', help='Unicode version')
    parser.add_argument('-t', '--test', action='store_true',
                        help='Enable automatic test mode')
    parser.add_argument('-c', '--check', action='store_true',
                        help='Enable label checking')
    parser.add_argument('-i', '--invalid', action='store_true',
                        help='Do not filter out "invalid" labels')
    parser.add_argument('xml', metavar='XML')
    parser.add_argument('label', metavar='LABEL', nargs='?')

    args = parser.parse_args()

    # "Disable" logging in test mode except if we ask to be verbose
    log_level = logging.DEBUG if args.verbose else logging.INFO
    if args.test and not args.verbose:
        log_level = logging.ERROR
    if args.quiet:
        log_level = logging.CRITICAL
    logging.basicConfig(stream=sys.stderr, level=log_level,
                        format="%(levelname)s:%(name)s [%(filename)s:%(lineno)s] %(message)s")

    lgr_parser = XMLParser(args.xml)

    unidb = None
    if args.libs is not None:
        libpath, i18n_libpath, libver = args.libs.split('#')
        manager = UnicodeDataVersionManager()
        unidb = manager.register(None, libpath, i18n_libpath, libver)

    if args.rng is not None:
        validation_result = lgr_parser.validate_document(args.rng)
        if validation_result is not None:
            logger.error('Errors for RNG validation: %s', validation_result)

    msr = None
    if args.msr is not None:
        msr_parser = XMLParser(args.msr)
        msr = msr_parser.parse_document()

    if unidb is not None:
        lgr_parser.unicode_database = unidb

    lgr = lgr_parser.parse_document()
    if lgr is None:
        logger.error("Error while parsing LGR file.")
        logger.error("Please check compliance with RNG.")
        return

    options = {
        'validating_repertoire': msr,
        'unicode_version': args.unicode,
    }
    if unidb is not None:
        options['unidb'] = unidb

    if not args.test:
        summary = lgr.validate(options)
        logger.info('Result of validation: %s', summary)

    if args.check:
        if args.label:
            label_u = ''.join(wide_unichr(int(cphex, 16)) for cphex in args.label.split())
            check_label(lgr, label_u, args.invalid, args.test)
        else:
            for label in get_stdin().read().splitlines():
                logger.info("Label '%s'", label)
                check_label(lgr, label, args.invalid, args.test)
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(description='LGR diff and collision CLI')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='be verbose')
    parser.add_argument('-l', '--libs', metavar='LIBS',
                        help='ICU libraries', required=True)
    parser.add_argument('-r', '--rng', metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-1', '--first', metavar='LGR1',
                        help='First LGR',
                        required=True)
    parser.add_argument('-2', '--second', metavar='LGR2',
                        help='Second LGR',
                        required=False)
    parser.add_argument('-s', '--set', metavar='SET_FILE',
                        help='Filepath to the set of reference labels',
                        required=True)
    parser.add_argument('-g', '--generate', action='store_true',
                        help='Generate a full dump')
    parser.add_argument('-q', '--quiet', action='store_true',
                        help='Do not print rules as it may be very very '
                        'verbose (None will be printed instead)')

    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(stream=sys.stdout, level=log_level)

    libpath, i18n_libpath, libver = args.libs.split('#')
    manager = UnicodeDataVersionManager()
    unidb = manager.register(None, libpath, i18n_libpath, libver)

    lgr1_parser = XMLParser(args.first)
    lgr1_parser.unicode_database = unidb
    if args.second is not None:
        lgr2_parser = XMLParser(args.second)
        lgr2_parser.unicode_database = unidb
    else:
        write_output("No second LGR, will only output collisions")

    if args.rng is not None:
        validation_result = lgr1_parser.validate_document(args.rng)
        if validation_result is not None:
            logger.error('Errors for RNG validation of first LGR: %s',
                         validation_result)
        if args.second is not None:
            validation_result = lgr2_parser.validate_document(args.rng)
            if validation_result is not None:
                logger.error('Errors for RNG validation of second LGR: %s',
                             validation_result)

    lgr1 = lgr1_parser.parse_document()
    if lgr1 is None:
        logger.error("Error while parsing first LGR file.")
        logger.error("Please check compliance with RNG.")
        return
    if args.second is not None:
        lgr2 = lgr2_parser.parse_document()
        if lgr2 is None:
            logger.error("Error while parsing second LGR file.")
            logger.error("Please check compliance with RNG.")
            return

    write_output('Please wait, this can take some time...\n')

    with io.open(args.set, 'r', encoding='utf-8') as label_input:
        if args.second is not None:
            for out in diff(lgr1, lgr2, label_input, True, args.generate,
                            args.quiet):
                write_output(out)
        else:
            for out in collision(lgr1, label_input, args.generate, args.quiet):
                write_output(out)
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser(description='LGR Collision')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='be verbose')
    parser.add_argument('-g',
                        '--generate',
                        action='store_true',
                        help='Generate variants')
    parser.add_argument('-l',
                        '--libs',
                        metavar='LIBS',
                        help='ICU libraries',
                        required=True)
    parser.add_argument('-s',
                        '--set',
                        metavar='SET FILE',
                        help='Filepath to the set of reference labels',
                        required=True)
    parser.add_argument('xml', metavar='XML')

    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(stream=sys.stdout, level=log_level)

    lgr_parser = XMLParser(args.xml)

    libpath, i18n_libpath, libver = args.libs.split('#')
    manager = UnicodeDataVersionManager()
    unidb = manager.register(None, libpath, i18n_libpath, libver)

    lgr_parser.unicode_database = unidb

    lgr = lgr_parser.parse_document()
    if lgr is None:
        logger.error("Error while parsing LGR file.")
        logger.error("Please check compliance with RNG.")
        return

    ref_label_indexes = {}

    # Compute index label for set or reference labels
    with io.open(args.set, 'r', encoding='utf-8') as ref_set:
        for ref_label in ref_set:
            label_cp = tuple([ord(c) for c in ref_label.strip()])
            try:
                label_index = compute_label_index(lgr, label_cp)
            except NotInLGR:
                continue
            ref_label_indexes[label_index] = label_cp

    # Deal with input
    for label in get_stdin().read().splitlines():
        write_output("Check label '%s'" % label)
        label_cp = tuple([ord(c) for c in label])
        label_disp = format_cp(label_cp)
        label_index = compute_label_index(lgr, label_cp)

        if label_index in ref_label_indexes:
            ref_label_cp = ref_label_indexes[label_index]
            ref_label_disp = format_cp(ref_label_cp)
            ref_label_u = cp_to_ulabel(ref_label_cp)

            write_output("Collision for label '%s' [%s] with '%s' [%s]" %
                         (label, label_disp, ref_label_u, ref_label_disp))
            if args.generate:
                find_variants_to_block(lgr, ref_label_cp, label_cp)
        else:
            write_output("No collision for label %s [%s]" %
                         (label, label_disp))
Ejemplo n.º 13
0
def parse_lgr(filename):
    parser = XMLParser(os.path.join(RESOURCE_DIR, filename))
    return parser.parse_document()
Ejemplo n.º 14
0
def main():
    parser = argparse.ArgumentParser(description='LGR Compare CLI')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='be verbose')
    parser.add_argument('-r', '--rng', metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-1', '--first', metavar='LGR1', action='append',
                        help='First LGR or LGR set if used multiple times',
                        required=True)
    parser.add_argument('-2', '--second', metavar='LGR2', action='append',
                        help='Second LGR or LGR set if used multiple times',
                        required=True)
    parser.add_argument('action', metavar="ACTION",
                        help='Compare action (INTERSECT, UNION, DIFF)',
                        choices=['INTERSECT', 'UNION', 'DIFF'])
    parser.add_argument('-g', '--generate', action='store_true',
                        help='Generate a full dump (with identical elements as well)')
    parser.add_argument('-n1', '--name-first', metavar='NAME1', help="Merged LGR 1 name")
    parser.add_argument('-n2', '--name-second', metavar='NAME2', help="Merged LGR 2 name")

    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(stream=sys.stderr, level=log_level)

    if (len(args.first) == 1 and len(args.second) > 1) or (len(args.second) == 1 and len(args.first) > 1):
        logger.error("Cannot compare LGR with LGR sets")
        return

    logger.info('Please wait, this can take some time...\n')

    if len(args.first) > 1:
        if args.action in ['INTERSECT', 'UNION']:
            logger.error('Cannot perform intersection or union with LGR sets')
            return

        merged_lgr_1, lgr_set_1 = merge_lgrs(args.first, name=args.name_first, rng=args.rng)
        if not merged_lgr_1:
            return

        merged_lgr_2, lgr_set_2 = merge_lgrs(args.second, name=args.name_second, rng=args.rng)
        if not merged_lgr_2:
            return

        print(diff_lgr_sets(merged_lgr_1, merged_lgr_2, lgr_set_1, lgr_set_2))
    else:
        lgr1_parser = XMLParser(args.first[0])
        lgr2_parser = XMLParser(args.second[0])

        if args.rng is not None:
            validation_result = lgr1_parser.validate_document(args.rng)
            if validation_result is not None:
                logger.error('Errors for RNG validation of first LGR: %s',
                             validation_result)
            validation_result = lgr2_parser.validate_document(args.rng)
            if validation_result is not None:
                logger.error('Errors for RNG validation of second LGR: %s',
                             validation_result)

        lgr1 = lgr1_parser.parse_document()
        if lgr1 is None:
            logger.error("Error while parsing first LGR file.")
            logger.error("Please check compliance with RNG.")
            return
        lgr2 = lgr2_parser.parse_document()
        if lgr2 is None:
            logger.error("Error while parsing second LGR file.")
            logger.error("Please check compliance with RNG.")
            return

        if args.action in ['INTERSECT', 'UNION']:
            if args.action == 'INTERSECT':
                lgr = intersect_lgrs(lgr1, lgr2)
            elif args.action == 'UNION':
                lgr = union_lgrs(lgr1, lgr2)

            print(serialize_lgr_xml(lgr, pretty_print=True, encoding='unicode', xml_declaration=False))
        elif args.action == 'DIFF':
            print(diff_lgrs(lgr1, lgr2, show_same=args.generate))
Ejemplo n.º 15
0
def main():
    from lgr.parser.xml_parser import XMLParser

    parser = argparse.ArgumentParser(description='check rfc7940 compliance')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='be verbose')
    parser.add_argument('-q', '--quiet', action='store_true',
                        help='Be quiet (no details, no log)')
    parser.add_argument('-r', '--rng', metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-l', '--libs', metavar='LIBS',
                        help='ICU libraries')
    parser.add_argument('-u', '--unicode', metavar='Unicode',
                        default='6.3.0', help='Unicode version')
    parser.add_argument('-t', '--test', action='store_true',
                        help='Enable automatic test mode')
    parser.add_argument('xml', metavar='XML')

    args = parser.parse_args()

    # "Disable" logging in test mode except if we ask to be verbose
    log_level = logging.DEBUG if args.verbose else logging.INFO
    if args.test and not args.verbose:
        log_level = logging.ERROR
    if args.quiet:
        log_level = logging.CRITICAL
    logging.basicConfig(stream=sys.stderr, level=log_level,
                        format="%(levelname)s:%(name)s [%(filename)s:%(lineno)s] %(message)s")

    lgr_parser = XMLParser(args.xml, force_mode=False)

    unidb = None
    if args.libs is not None:
        libpath, i18n_libpath, libver = args.libs.split('#')
        manager = munidata.UnicodeDataVersionManager()
        unidb = manager.register(None, libpath, i18n_libpath, libver)

    if unidb is not None:
        lgr_parser.unicode_database = unidb

    try:
        lgr = lgr_parser.parse_document()
    except:
        lgr = None

    if lgr is None:
        logger.error("Error while parsing LGR file.")
        logger.error("Please check compliance with RNG.")
        sys.stdout.write("FAIL\n")
        return

    options = {
        'unicode_version': args.unicode,
        'rfc7940': True
    }
    if unidb is not None:
        options['unidb'] = unidb

    if args.rng is not None:
        options['rng_filepath'] = args.rng
        validation_result = lgr_parser.validate_document(args.rng)
        if validation_result is not None:
            logger.error('Errors for RNG validation: %s', validation_result)

    if not args.test:
        summary = lgr.validate(options)
        logger.info('Result of validation: %s', summary)

    policy = dict(
        validity_end_expiry="ERROR",
        validity_start_end="ERROR",
        validity_started="ERROR",
        metadata_description_type="ERROR",
        metadata_scope_type="ERROR",
        metadata_language="ERROR",
        metadata_version_integer="WARNING",
        data_variant_type="ERROR",
        codepoint_valid="ERROR",
        char_ascending_order="WARNING",
        char_strict_ascending_order="IGNORE",
        ref_attribute_ascending="WARNING",
        standard_dispositions="ERROR",
        basic_symmetry="WARNING",
        strict_symmetry="WARNING",
        basic_transitivity="WARNING",
        parse_xml="ERROR",
        schema="ERROR",
    )

    full_report = not args.quiet

    final_result = lgr.get_rfc7940_validation(policy, verbose=full_report)

    sys.stdout.write(final_result)
    sys.stdout.write("\n")
Ejemplo n.º 16
0
def load_lgr(name):
    parser = XMLParser(
        os.path.join(os.path.dirname(__file__), '..', 'inputs',
                     'harmonization', name))
    return parser.parse_document()
Ejemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser(description='LGR Validate CLI')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='be verbose')
    parser.add_argument('-r',
                        '--rng',
                        metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-l',
                        '--libs',
                        metavar='LIBS',
                        help='ICU libraries',
                        required=True)
    parser.add_argument('-g',
                        '--variants',
                        action='store_true',
                        help='Generate variants')
    parser.add_argument('-x',
                        '--lgr-xml',
                        metavar='LGR_XML',
                        action='append',
                        required=True,
                        help='The LGR or LGR set if used multiple times')
    parser.add_argument(
        '-s',
        '--lgr-script',
        metavar='LGR_SCRIPT',
        help='If LGR is a set, the script used to validate input labels')
    parser.add_argument(
        '-f',
        '--set-labels',
        metavar='SET_LABELS',
        help='If LGR is a set, the file containing the label of the LGR set')
    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.WARNING
    logging.basicConfig(stream=sys.stdout, level=log_level)

    libpath, i18n_libpath, libver = args.libs.split('#')
    manager = UnicodeDataVersionManager()
    unidb = manager.register(None, libpath, i18n_libpath, libver)

    if len(args.lgr_xml) > 1:
        if not args.lgr_script:
            logger.error('For LGR set, lgr script is required')
            return

        if not args.set_labels:
            logger.error('For LGR set, LGR set labels file is required')
            return

        merged_lgr, lgr_set = merge_lgrs(args.lgr_xml, unidb=unidb)
        if not merged_lgr:
            logger.error('Error while creating the merged LGR')
            return

        with io.open(args.set_labels, 'r',
                     encoding='utf-8') as set_labels_input:
            set_labels = StringIO(set_labels_input.read())

        script_lgr = None
        for lgr_s in lgr_set:
            try:
                if lgr_s.metadata.languages[0] == args.lgr_script:
                    if script_lgr:
                        logger.warning(
                            'Script %s is provided in more than one LGR of the set, '
                            'will only evaluate with %s', args.lgr_script,
                            lgr_s.name)
                    script_lgr = lgr_s
            except (AttributeError, IndexError):
                pass

        if not script_lgr:
            logger.error(
                'Cannot find script %s in any of the LGR provided as input',
                args.lgr_script)
            return
    else:
        lgr_parser = XMLParser(args.lgr_xml[0])
        lgr_parser.unicode_database = unidb

        if args.rng is not None:
            validation_result = lgr_parser.validate_document(args.rng)
            if validation_result is not None:
                logger.error('Errors for RNG validation: %s',
                             validation_result)

        lgr = lgr_parser.parse_document()
        if lgr is None:
            logger.error("Error while parsing LGR file.")
            logger.error("Please check compliance with RNG.")
            return

    filtered_set_labels = []
    if len(args.lgr_xml) > 1:
        write_output(
            "# The following labels from the set labels are invalid\n")
        for label, valid, error in read_labels(set_labels,
                                               script_lgr.unicode_database):
            if not valid:
                write_output("{}: {}\n".format(label, error))
            else:
                label_cp = tuple([ord(c) for c in label])
                if not script_lgr._test_preliminary_eligibility(label_cp)[0]:
                    write_output("%s: Not in LGR %s\n" % label, script_lgr)
                else:
                    filtered_set_labels.append(label)
        write_output("# End of filtered set labels\n\n")

    for label in get_stdin().read().splitlines():
        if len(args.lgr_xml) > 1:
            check_label(script_lgr,
                        label,
                        args.variants,
                        merged_lgr=merged_lgr,
                        set_labels=filtered_set_labels)
        else:
            check_label(lgr, label, args.variants)