コード例 #1
0
def main():
    from lgr.parser.rfc3743_parser import RFC3743Parser
    from lgr.parser.xml_serializer import serialize_lgr_xml

    parser = argparse.ArgumentParser(
        description='Parse and dump a RFC3743 file')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='be verbose')
    parser.add_argument('-o',
                        '--output',
                        metavar='OUTPUT',
                        help='Optional output file')
    parser.add_argument('file', metavar='FILE')

    args = parser.parse_args()

    logging.basicConfig(stream=sys.stderr,
                        level=logging.DEBUG if args.verbose else logging.INFO)

    rfc_parser = RFC3743Parser(args.file)
    lgr = rfc_parser.parse_document()

    if args.output is not None:
        xml = serialize_lgr_xml(lgr, pretty_print=True)
        with io.open(args.output, mode='wb') as output:
            output.write(xml)
    else:
        print(
            serialize_lgr_xml(lgr,
                              pretty_print=True,
                              encoding='unicode',
                              xml_declaration=False))
コード例 #2
0
def main():
    from lgr.parser.xml_parser import XMLParser
    from lgr.parser.xml_serializer import serialize_lgr_xml

    parser = argparse.ArgumentParser(
        description='Parse and dump a LGR XML file')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='be verbose')
    parser.add_argument('-r',
                        '--rng',
                        metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-o',
                        '--output',
                        metavar='OUTPUT',
                        help='Optional output file')
    parser.add_argument('xml', metavar='XML')

    args = parser.parse_args()

    logging.basicConfig(stream=sys.stderr,
                        level=logging.DEBUG if args.verbose else logging.INFO)

    lgr_parser = XMLParser(args.xml)

    if args.rng is not None:
        validation_result = lgr_parser.validate_document(args.rng)
        if validation_result is not None:
            print(validation_result)
            return

    lgr = lgr_parser.parse_document()

    if args.verbose:
        for char in lgr.repertoire:
            print(char)

    if args.output is not None:
        xml = serialize_lgr_xml(lgr, pretty_print=True)
        with io.open(args.output, mode='wb') as output:
            output.write(xml)
    else:
        print(
            serialize_lgr_xml(lgr,
                              pretty_print=True,
                              encoding='unicode',
                              xml_declaration=False))
コード例 #3
0
def main():
    from lgr.parser.line_parser import parse_document
    from lgr.parser.xml_serializer import serialize_lgr_xml

    parser = argparse.ArgumentParser(
        description='Parse and dump a "one codepoint per line" file')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='be verbose')
    parser.add_argument('-o',
                        '--output',
                        metavar='OUTPUT',
                        help='Optional output file')
    parser.add_argument('file', metavar='FILE')

    args = parser.parse_args()

    logging.basicConfig(stream=sys.stderr,
                        level=logging.DEBUG if args.verbose else logging.INFO)

    lgr = parse_document(args.file)

    xml = serialize_lgr_xml(lgr, pretty_print=True)
    if args.output is not None:
        with io.open(args.output, mode='wb') as output:
            output.write(xml)
    else:
        print(xml)
コード例 #4
0
ファイル: xml_validity.py プロジェクト: j-bernard/lgr-core
def check_xml_validity(lgr, options):
    """
    Serialize the LGR to XML, and validate the XML against the RNG.

    `options` argument can contain:
        * rng_filepath: Filepath of the RNG schema used to validate.
          when rebuilding the LGR. If None is given, use the current one.

    :param LGR lgr: The LGR to check.
    :param options: Dictionary of options to the validation function.
    """
    # Local import to prevent import cycles.
    from lgr.parser.xml_serializer import serialize_lgr_xml
    from lgr.parser.xml_parser import XMLParser

    logger.info("Testing XML validity")

    if 'rng_filepath' not in options:
        logger.warning("rng_filepath not in 'options' arguments, skipping")
        return True

    xml = StringIO(serialize_lgr_xml(lgr))
    parser = XMLParser(xml)

    validation_result = parser.validate_document(options['rng_filepath'])
    if validation_result is not None:
        logger.warning('RNG validation failed: XML error is')
        logger.warning(validation_result)
    else:
        logger.info('RNG validation OK')

    logger.info("Testing XML validity done")

    return validation_result is None
コード例 #5
0
def main():
    parser = argparse.ArgumentParser(description='LGR Populate Variants CLI')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='be verbose')
    parser.add_argument('-r',
                        '--rng',
                        metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-l',
                        '--libs',
                        metavar='LIBS',
                        help='ICU libraries',
                        required=True)
    parser.add_argument('-x',
                        '--lgr-xml',
                        metavar='LGR',
                        help='The LGR to populate',
                        required=True)

    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(stream=sys.stderr, level=log_level)

    libpath, i18n_libpath, libver = args.libs.split('#')
    manager = UnicodeDataVersionManager()
    unidb = manager.register(None, libpath, i18n_libpath, libver)

    lgr_parser = XMLParser(args.lgr_xml)
    lgr_parser.unicode_database = unidb

    if args.rng is not None:
        validation_result = lgr_parser.validate_document(args.rng)
        if validation_result is not None:
            logger.error('Errors for RNG validation of LGR file %s: %s',
                         args.lgr_xml, validation_result)

    lgr = lgr_parser.parse_document()
    if lgr is None:
        logger.error("Error while parsing LGR file %s", args.lgr_xml)
        logger.error("Please check compliance with RNG.")
        return

    lgr.populate_variants()
    print(
        serialize_lgr_xml(lgr,
                          pretty_print=True,
                          encoding='unicode',
                          xml_declaration=False))
コード例 #6
0
ファイル: make_idna_repertoire.py プロジェクト: g11r/lgr-core
def make_idna_repertoire(version):
    """
    Make a repertoire from IDNA tables.
    Parse IDNA table registry, convert it to an LGR XML format,
    and output it on stdout.

    Input:
        * version: The unicode version to use.
    """
    from lgr.core import LGR
    from lgr.parser.xml_serializer import serialize_lgr_xml

    lgr = LGR('idna2008-%s' % version)

    idna_url = IDNATABLES_URL.format(version=version)
    logger.debug("Fetching and parsing '%s'", idna_url)
    registry = etree.parse(idna_url)

    # To keep '{}' when string-formatting
    namespace = "{{{0}}}".format(IDNATABLES_NS)
    registry_id = "idna-tables-properties"
    if list(map(int, version.split('.'))) <= [6, 0, 0]:
        registry_id = "idna-tables-{}-properties".format(version)
    record_xpath = '{0}registry[@id="{1}"]/{0}record'.format(
        namespace, registry_id)

    for record in registry.findall(record_xpath):
        codepoint = record.find(CODEPOINT_TAG).text
        prop = record.find(PROPERTY_TAG).text

        if prop not in ['PVALID', 'CONTEXTO', 'CONTEXTJ']:
            continue

        if codepoint.find('-') > 0:
            # Codepoint is a range
            (first_cp, last_cp) = [int(c, 16) for c in codepoint.split('-')]
            lgr.add_range(first_cp, last_cp)
        else:
            # Single codepoint
            lgr.add_cp(int(codepoint, 16))

    lgr_root = serialize_lgr_xml(lgr,
                                 pretty_print=True,
                                 encoding='unicode',
                                 xml_declaration=False)
    print(lgr_root)
コード例 #7
0
ファイル: merge_set.py プロジェクト: g11r/lgr-core
def merge_lgr_set(lgr_set, name):
    """
    Merge LGRs from a set

    :param lgr_set: The list of LGRs in the set
    :param name: Merged LGR name
    :return: New LGR (merge of LGR set)
    """
    logger.debug("Merge %s", name)

    # order LGRs
    lgr_set.sort(key=lambda x: get_script(x).replace('und-', 'zzz'))

    # Ensure all unicode version are correct
    unicode_version = OrderedDict().fromkeys(lgr.metadata.unicode_version
                                             for lgr in lgr_set)
    if len(unicode_version) > 1:
        logger.warning("Different unicode version in set: %s",
                       unicode_version.keys())

    ref_mapping = {}
    metadata = copy.deepcopy(merge_metadata(lgr_set))
    merged_lgr = LGR(name=name, metadata=metadata)
    previous_scripts = []
    for lgr in lgr_set:
        script = get_script(lgr)
        lgr.expand_ranges()

        merge_references(lgr, script, merged_lgr, ref_mapping)
        merge_chars(lgr, script, merged_lgr, ref_mapping, previous_scripts)
        merge_actions(lgr, script, merged_lgr, ref_mapping)
        merge_rules(lgr, script, merged_lgr, ref_mapping)
        merge_classes(lgr, script, merged_lgr, ref_mapping)
        previous_scripts.append(script)

    # XXX As the created merged_lgr is not a valid Python LGR object,
    # we have to serialize it/parse it to get a valid object.

    merged_lgr_xml = BytesIO(serialize_lgr_xml(merged_lgr))

    lgr_parser = XMLParser(source=merged_lgr_xml, filename=name)

    return lgr_parser.parse_document()
コード例 #8
0
def main():
    parser = argparse.ArgumentParser(description='LGR diff and collision CLI')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='be verbose')
    parser.add_argument('-l', '--libs', metavar='LIBS', help='ICU libraries')
    parser.add_argument('-r',
                        '--rng',
                        metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-n', '--name', metavar='NAME', help="Merged LGR name")
    parser.add_argument('-s',
                        '--lgr-set',
                        metavar='LGR-SET',
                        action='append',
                        help='LGR in the set (can be used multiple times)',
                        required=True)

    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(stream=sys.stdout, level=log_level)

    unidb = None
    if args.libs:
        libpath, i18n_libpath, libver = args.libs.split('#')
        manager = UnicodeDataVersionManager()
        unidb = manager.register(None, libpath, i18n_libpath, libver)

    if len(args.lgr_set) == 1:
        logger.error("Please provide more than one LGR to make a set")
        return

    logger.warning('Please wait, this can take some time...\n')

    merged_lgr, _ = merge_lgrs(args.lgr_set,
                               name=args.name,
                               rng=args.rng,
                               unidb=unidb)
    if not merged_lgr:
        return
    print(serialize_lgr_xml(merged_lgr, pretty_print=True))
コード例 #9
0
ファイル: api.py プロジェクト: ptudor/lgr-django
def session_merge_set(request, lgr_info_set, lgr_set_name):
    """
    Merge some LGR to build a set.

    :param request: Django request object
    :param lgr_info_set: The list of LGRInfo objects in the set
    :param lgr_set_name: The name of the LGR set
    :return: The LGR set merge id
    """
    merged_lgr = merge_lgr_set([l.lgr for l in lgr_info_set], lgr_set_name)
    merged_id = slugify(merged_lgr.name)

    merged_lgr_xml = serialize_lgr_xml(merged_lgr)

    session_open_lgr(request,
                     merged_id,
                     merged_lgr_xml,
                     validating_repertoire_name=None,
                     validate=True,
                     lgr_set=lgr_info_set)
    return merged_id
コード例 #10
0
ファイル: api.py プロジェクト: ptudor/lgr-django
 def update_xml(self, pretty_print=False):
     # if something was changed in `lgr`, calling this will re-generate the xml
     self.xml = serialize_lgr_xml(self.lgr, pretty_print=pretty_print)
コード例 #11
0
ファイル: lgr_compare.py プロジェクト: g11r/lgr-core
def main():
    parser = argparse.ArgumentParser(description='LGR Compare CLI')
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='be verbose')
    parser.add_argument('-r', '--rng', metavar='RNG',
                        help='RelaxNG XML schema')
    parser.add_argument('-1', '--first', metavar='LGR1', action='append',
                        help='First LGR or LGR set if used multiple times',
                        required=True)
    parser.add_argument('-2', '--second', metavar='LGR2', action='append',
                        help='Second LGR or LGR set if used multiple times',
                        required=True)
    parser.add_argument('action', metavar="ACTION",
                        help='Compare action (INTERSECT, UNION, DIFF)',
                        choices=['INTERSECT', 'UNION', 'DIFF'])
    parser.add_argument('-g', '--generate', action='store_true',
                        help='Generate a full dump (with identical elements as well)')
    parser.add_argument('-n1', '--name-first', metavar='NAME1', help="Merged LGR 1 name")
    parser.add_argument('-n2', '--name-second', metavar='NAME2', help="Merged LGR 2 name")

    args = parser.parse_args()

    log_level = logging.DEBUG if args.verbose else logging.INFO
    logging.basicConfig(stream=sys.stderr, level=log_level)

    if (len(args.first) == 1 and len(args.second) > 1) or (len(args.second) == 1 and len(args.first) > 1):
        logger.error("Cannot compare LGR with LGR sets")
        return

    logger.info('Please wait, this can take some time...\n')

    if len(args.first) > 1:
        if args.action in ['INTERSECT', 'UNION']:
            logger.error('Cannot perform intersection or union with LGR sets')
            return

        merged_lgr_1, lgr_set_1 = merge_lgrs(args.first, name=args.name_first, rng=args.rng)
        if not merged_lgr_1:
            return

        merged_lgr_2, lgr_set_2 = merge_lgrs(args.second, name=args.name_second, rng=args.rng)
        if not merged_lgr_2:
            return

        print(diff_lgr_sets(merged_lgr_1, merged_lgr_2, lgr_set_1, lgr_set_2))
    else:
        lgr1_parser = XMLParser(args.first[0])
        lgr2_parser = XMLParser(args.second[0])

        if args.rng is not None:
            validation_result = lgr1_parser.validate_document(args.rng)
            if validation_result is not None:
                logger.error('Errors for RNG validation of first LGR: %s',
                             validation_result)
            validation_result = lgr2_parser.validate_document(args.rng)
            if validation_result is not None:
                logger.error('Errors for RNG validation of second LGR: %s',
                             validation_result)

        lgr1 = lgr1_parser.parse_document()
        if lgr1 is None:
            logger.error("Error while parsing first LGR file.")
            logger.error("Please check compliance with RNG.")
            return
        lgr2 = lgr2_parser.parse_document()
        if lgr2 is None:
            logger.error("Error while parsing second LGR file.")
            logger.error("Please check compliance with RNG.")
            return

        if args.action in ['INTERSECT', 'UNION']:
            if args.action == 'INTERSECT':
                lgr = intersect_lgrs(lgr1, lgr2)
            elif args.action == 'UNION':
                lgr = union_lgrs(lgr1, lgr2)

            print(serialize_lgr_xml(lgr, pretty_print=True, encoding='unicode', xml_declaration=False))
        elif args.action == 'DIFF':
            print(diff_lgrs(lgr1, lgr2, show_same=args.generate))