Esempio n. 1
0
def lgr_set_annotate(lgr, script_lgr, set_labels_input, labels_input):
    """
    Annotate a list of labels with their disposition.

    :param lgr: The LGR set object.
    :param script_lgr: The LGR object for the script used to check label validity.
    :param set_labels_input: The labels in the lgr set.
    :param labels_input: The file containing the labels
    """
    # First, we need to filter-out out-of-LGR labels from the set_labels_input:
    yield "# The following labels from the set labels are invalid\n"
    filtered_set = []
    for label, valid, error in read_labels(set_labels_input, lgr.unicode_database):
        if not valid:
            yield "%s: %s\n" % (label, error)
        else:
            label_cp = tuple([ord(c) for c in label])
            if not lgr._test_preliminary_eligibility(label_cp)[0]:
                yield "%s: invalid\n" % label
            else:
                filtered_set.append(label)
    yield "# End of filtered set labels\n\n"

    for label, valid, error in read_labels(labels_input, script_lgr.unicode_database):
        if not valid:
            out = error
            yield "%s: %s\n" % (label, out)
        else:
            label_cp = tuple([ord(c) for c in label])
            # First, verify that a proposed label is valid by processing it with the Element LGR
            # corresponding to the script that was selected for the label in the application.
            (eligible, _, label_invalid_parts, disp, action_idx, _) = script_lgr.test_label_eligible(label_cp,
                                                                                                    collect_log=False)
            collision = ''
            if eligible:
                # Second, process the now validated label against the common LGR to verify it does not collide
                # with any existing delegated labels (and any of their variants, whether blocked or allocatable).
                if label in filtered_set:
                    collision = 'Label is in the LGR set labels'
                indexes = get_collisions(lgr, filtered_set + [label], quiet=False)
                if len(indexes) > 0:
                    collision = 'Label collides with the LGR set labels'

            out = disp
            if collision:
                # TODO do we need to change disp to invalid???
                out = '{} - {}'.format(disp, collision)

            for l in _out_valid_label(lgr, label, eligible, label_invalid_parts, out, action_idx):
                yield l
Esempio n. 2
0
def annotate(lgr, labels_input):
    """
    Annotate a list of labels with their disposition.

    :param lgr: The LGR info object.
    :param labels_input: The file containing the labels
    """
    for label in read_labels(labels_input, lgr.unicode_database):
        label_cp = tuple([ord(c) for c in label])
        disp = lgr.test_label_eligible(label_cp, collect_log=False)[3]
        yield "%s: %s\n" % (label, disp)
Esempio n. 3
0
def diff(lgr_1,
         lgr_2,
         labels_input,
         show_collision=True,
         show_dump=False,
         quiet=False):
    """
    Show diff for a list of labels between 2 LGR

    :param lgr_1: The first LGR info object.
    :param lgr_2: The second LGR info object.
    :param labels_input: The file containing the labels
    :param show_collision: Output collisions
    :param show_dump: Generate a full dump
    :param quiet: Do not print rules
    """
    labels = set(read_labels(labels_input, lgr_1.unicode_database))

    # get diff between labels and variants for the two LGR
    # keep label without collision as we need to compare
    label1_indexes = _generate_indexes(lgr_1, labels, keep=True, quiet=quiet)
    label2_indexes = _generate_indexes(lgr_2, labels, keep=True, quiet=quiet)

    # generate a dictionary of indexes per label
    labels_dic = {}
    yield "# LGR comparison #\n"
    for label in labels:
        label_cp = tuple([ord(c) for c in label])
        index1 = lgr_1.generate_index_label(label_cp)
        index2 = lgr_2.generate_index_label(label_cp)
        labels_dic[label] = (index1, index2)

    for output in _compare(labels_dic, label1_indexes, label2_indexes):
        yield output
    # output collisions
    if show_collision:
        yield "\n\n# Collisions for LGR1 #\n"
        for output in _write_complete_output(label1_indexes):
            yield output
        if show_dump:
            yield "\n# Summary for LGR1 #\n"
            for output in _full_dump(label1_indexes):
                yield output
        yield "\n\n# Collisions for LGR2 #\n"
        for output in _write_complete_output(label2_indexes):
            yield output
        if show_dump:
            yield "\n# Summary for LGR2 #\n\n"
            for output in _full_dump(label2_indexes):
                yield output
Esempio n. 4
0
def get_collisions(lgr, labels_input, quiet=True):
    """
    Get collisions index in a list of labels for a given LGR

    :param lgr: The LGR object
    :param labels_input: The file containing the labels
    :param quiet: Do not get rules
    :return: The indexes for collisions
    """
    from lgr.tools.utils import read_labels
    labels = set()
    for label, valid, error in read_labels(labels_input, lgr.unicode_database):
        if valid:
            labels.add(label)
    label_indexes, _ = _generate_indexes(lgr, labels, keep=False, quiet=quiet)
    return label_indexes
Esempio n. 5
0
def annotate(lgr, labels_input):
    """
    Annotate a list of labels with their disposition.

    :param lgr: The LGR info object.
    :param labels_input: The file containing the labels
    """
    for label, valid, error in read_labels(labels_input, lgr.unicode_database):
        if valid:
            label_cp = tuple([ord(c) for c in label])
            (eligible, _, label_invalid_parts, disp, action_idx, _) = lgr.test_label_eligible(label_cp,
                                                                                             collect_log=False)
            for l in _out_valid_label(lgr, label, eligible, label_invalid_parts, disp, action_idx):
                yield l
        else:
            yield "%s: %s\n" % (label, error)
Esempio n. 6
0
def cross_script_variants(lgr, labels_input):
    """
    Compute cross-script variants of labels.

    :param lgr: The LGR to use for variant generation.
    :param labels_input: The file containing the labels
    """
    if lgr.metadata is None:
        logger.error("Cannot generate cross-scripts variants "
                     "for LGR without metadata")
        raise Exception
    if lgr.unicode_database is None:
        logger.error("Cannot generate cross-scripts variants "
                     "for LGR without unicode database attached")
        raise Exception
    found = False
    for label, valid, error in read_labels(labels_input, lgr.unicode_database):
        if not valid:
            yield "Input label {}: {}\n".format(label, error)
        else:
            label_cp = tuple([ord(c) for c in label])
            result, _, _, _, _, _ = lgr.test_label_eligible(label_cp)
            if not result:
                continue
            label_displayed = False
            for variant, disp, script_mapping in _generate_variants(
                    lgr, label_cp):
                if not label_displayed:
                    # Only display input label if it has x-variants
                    yield "Input label {} ({}) has cross-script variants:\n".format(
                        format_cp(label_cp), label)
                    label_displayed = True
                    found = True
                yield "\t- Cross-variant {} ({}), disposition {}:\n".format(
                    format_cp(variant), cp_to_ulabel(variant), disp)
                yield '\t\t+ ' + '\t\t+ '.join([
                    "{} ({}): {}\n".format(format_cp(c), cp_to_ulabel(c), s)
                    for c, s in script_mapping.items()
                ])

    if not found:
        yield 'No cross-script variants for input!'
Esempio n. 7
0
def collision(lgr, labels_input, show_dump=False, quiet=False):
    """
    Show collisions in a list of labels for a given LGR

    :param lgr: The LGR info object.
    :param labels_input: The file containing the labels
    :param show_dump: Generate a full dump
    :param quiet: Do not print rules
    """
    labels = set(read_labels(labels_input, lgr.unicode_database))

    # get diff between labels and variants for the two LGR
    # only keep label without collision for a full dump
    label_indexes = _generate_indexes(lgr, labels, keep=show_dump, quiet=quiet)

    # output collisions
    yield "\n# Collisions #\n\n"
    for output in _write_complete_output(label_indexes):
        yield output
    if show_dump:
        yield "\n# Summary #\n\n"
        for output in _full_dump(label_indexes):
            yield output
Esempio n. 8
0
def collision(lgr, labels_input, show_dump=False, quiet=False):
    """
    Show collisions in a list of labels for a given LGR

    :param lgr: The LGR object.
    :param labels_input: The file containing the labels
    :param show_dump: Generate a full dump
    :param quiet: Do not print rules
    """
    from lgr.tools.utils import read_labels
    labels = set()
    for label, valid, error in read_labels(labels_input, lgr.unicode_database):
        if valid:
            labels.add(label)
        else:
            yield "Label {}: {}\n".format(label, error)

    # get diff between labels and variants for the two LGR
    # only keep label without collision for a full dump
    label_indexes, not_in_lgr = _generate_indexes(lgr,
                                                  labels,
                                                  keep=show_dump,
                                                  quiet=quiet)

    if not_in_lgr:
        yield "\n# Labels not in LGR #\n\n"
        for label_cp in not_in_lgr:
            yield "Label {}\n".format(cp_to_ulabel(label_cp))

    # output collisions
    yield "\n# Collisions #\n\n"
    for output in _write_complete_output(label_indexes):
        yield output
    if show_dump:
        yield "\n# Summary #\n\n"
        for output in _full_dump(label_indexes):
            yield output
Esempio n. 9
0
def main():
    parser = argparse.ArgumentParser(description='LGR Validate CLI')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='be verbose')
    parser.add_argument('-r',
                        '--rng',
                        metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-l',
                        '--libs',
                        metavar='LIBS',
                        help='ICU libraries',
                        required=True)
    parser.add_argument('-g',
                        '--variants',
                        action='store_true',
                        help='Generate variants')
    parser.add_argument('-x',
                        '--lgr-xml',
                        metavar='LGR_XML',
                        action='append',
                        required=True,
                        help='The LGR or LGR set if used multiple times')
    parser.add_argument(
        '-s',
        '--lgr-script',
        metavar='LGR_SCRIPT',
        help='If LGR is a set, the script used to validate input labels')
    parser.add_argument(
        '-f',
        '--set-labels',
        metavar='SET_LABELS',
        help='If LGR is a set, the file containing the label of the LGR set')
    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.WARNING
    logging.basicConfig(stream=sys.stdout, level=log_level)

    libpath, i18n_libpath, libver = args.libs.split('#')
    manager = UnicodeDataVersionManager()
    unidb = manager.register(None, libpath, i18n_libpath, libver)

    if len(args.lgr_xml) > 1:
        if not args.lgr_script:
            logger.error('For LGR set, lgr script is required')
            return

        if not args.set_labels:
            logger.error('For LGR set, LGR set labels file is required')
            return

        merged_lgr, lgr_set = merge_lgrs(args.lgr_xml, unidb=unidb)
        if not merged_lgr:
            logger.error('Error while creating the merged LGR')
            return

        with io.open(args.set_labels, 'r',
                     encoding='utf-8') as set_labels_input:
            set_labels = StringIO(set_labels_input.read())

        script_lgr = None
        for lgr_s in lgr_set:
            try:
                if lgr_s.metadata.languages[0] == args.lgr_script:
                    if script_lgr:
                        logger.warning(
                            'Script %s is provided in more than one LGR of the set, '
                            'will only evaluate with %s', args.lgr_script,
                            lgr_s.name)
                    script_lgr = lgr_s
            except (AttributeError, IndexError):
                pass

        if not script_lgr:
            logger.error(
                'Cannot find script %s in any of the LGR provided as input',
                args.lgr_script)
            return
    else:
        lgr_parser = XMLParser(args.lgr_xml[0])
        lgr_parser.unicode_database = unidb

        if args.rng is not None:
            validation_result = lgr_parser.validate_document(args.rng)
            if validation_result is not None:
                logger.error('Errors for RNG validation: %s',
                             validation_result)

        lgr = lgr_parser.parse_document()
        if lgr is None:
            logger.error("Error while parsing LGR file.")
            logger.error("Please check compliance with RNG.")
            return

    filtered_set_labels = []
    if len(args.lgr_xml) > 1:
        write_output(
            "# The following labels from the set labels are invalid\n")
        for label, valid, error in read_labels(set_labels,
                                               script_lgr.unicode_database):
            if not valid:
                write_output("{}: {}\n".format(label, error))
            else:
                label_cp = tuple([ord(c) for c in label])
                if not script_lgr._test_preliminary_eligibility(label_cp)[0]:
                    write_output("%s: Not in LGR %s\n" % label, script_lgr)
                else:
                    filtered_set_labels.append(label)
        write_output("# End of filtered set labels\n\n")

    for label in get_stdin().read().splitlines():
        if len(args.lgr_xml) > 1:
            check_label(script_lgr,
                        label,
                        args.variants,
                        merged_lgr=merged_lgr,
                        set_labels=filtered_set_labels)
        else:
            check_label(lgr, label, args.variants)
Esempio n. 10
0
def diff(lgr_1,
         lgr_2,
         labels_input,
         show_collision=True,
         show_dump=False,
         quiet=False):
    """
    Show diff for a list of labels between 2 LGR

    :param lgr_1: The first LGR info object.
    :param lgr_2: The second LGR info object.
    :param labels_input: The file containing the labels
    :param show_collision: Output collisions
    :param show_dump: Generate a full dump
    :param quiet: Do not print rules
    """
    from lgr.tools.utils import read_labels
    labels = set()
    for label, valid, error in read_labels(labels_input,
                                           lgr_1.unicode_database):
        if valid:
            labels.add(label)
        else:
            yield "Label {}: {}\n".format(label, error)

    # get diff between labels and variants for the two LGR
    # keep label without collision as we need to compare
    label1_indexes, not_in_lgr_1 = _generate_indexes(lgr_1,
                                                     labels,
                                                     keep=True,
                                                     quiet=quiet)
    label2_indexes, not_in_lgr_2 = _generate_indexes(lgr_2,
                                                     labels,
                                                     keep=True,
                                                     quiet=quiet)

    if not_in_lgr_1 or not_in_lgr_2:
        for index, not_in_lgr in enumerate([not_in_lgr_1, not_in_lgr_2], 1):
            yield "# Labels not in LGR {} #\n\n".format(index)
            for label_cp in not_in_lgr:
                yield "Label {}\n".format(cp_to_ulabel(label_cp))
            yield '\n'

    # generate a dictionary of indexes per label
    labels_dic = {}
    yield "\n# LGR comparison #\n"
    for label in labels:
        label_cp = tuple([ord(c) for c in label])
        try:
            index1 = lgr_1.generate_index_label(label_cp)
        except NotInLGR:
            yield "Label {} not in LGR {}\n".format(label, lgr_1)
            continue
        try:
            index2 = lgr_2.generate_index_label(label_cp)
        except NotInLGR:
            yield "Label {} not in LGR {}\n".format(label, lgr_2)
            continue
        labels_dic[label] = (index1, index2)

    for output in _compare(labels_dic, label1_indexes, label2_indexes):
        yield output
    # output collisions
    if show_collision:
        yield "\n\n# Collisions for LGR1 #\n"
        for output in _write_complete_output(label1_indexes):
            yield output
        if show_dump:
            yield "\n# Summary for LGR1 #\n"
            for output in _full_dump(label1_indexes):
                yield output
        yield "\n\n# Collisions for LGR2 #\n"
        for output in _write_complete_output(label2_indexes):
            yield output
        if show_dump:
            yield "\n# Summary for LGR2 #\n\n"
            for output in _full_dump(label2_indexes):
                yield output