def make_index(pages):
    index = collections.defaultdict(list)
    for p in pages:
        t = xml_parse(p)
        check_id(p, t)
        section = t.find('./refmeta/manvolnum').text
        refname = t.find('./refnamediv/refname').text
        purpose = ' '.join(t.find('./refnamediv/refpurpose').text.split())
        for f in t.findall('./refnamediv/refname'):
            infos = (f.text, section, purpose, refname)
            index[f.text[0].upper()].append(infos)
    return index
Exemple #2
0
def add_rules(rules, name):
    xml = xml_parse(name)
    # print('parsing {}'.format(name), file=sys.stderr)
    if xml.getroot().tag != 'refentry':
        return
    conditional = xml.getroot().get('conditional') or ''
    rulegroup = rules[conditional]
    refmeta = xml.find('./refmeta')
    title = refmeta.find('./refentrytitle').text
    number = refmeta.find('./manvolnum').text
    refnames = xml.findall('./refnamediv/refname')
    target = man(refnames[0].text, number)
    if title != refnames[0].text:
        raise ValueError('refmeta and refnamediv disagree: ' + name)
    for refname in refnames:
        assert all(refname not in group
                   for group in rules.values()), "duplicate page name"
        alias = man(refname.text, number)
        rulegroup[alias] = target
Exemple #3
0
def add_rules(rules, name):
    xml = xml_parse(name)
    # print('parsing {}'.format(name), file=sys.stderr)
    if xml.getroot().tag != 'refentry':
        return
    conditional = xml.getroot().get('conditional') or ''
    rulegroup = rules[conditional]
    refmeta = xml.find('./refmeta')
    title = refmeta.find('./refentrytitle').text
    number = refmeta.find('./manvolnum').text
    refnames = xml.findall('./refnamediv/refname')
    target = man(refnames[0].text, number)
    if title != refnames[0].text:
        raise ValueError('refmeta and refnamediv disagree: ' + name)
    for refname in refnames:
        assert all(refname not in group
                   for group in rules.values()), "duplicate page name"
        alias = man(refname.text, number)
        rulegroup[alias] = target
Exemple #4
0
def _extract_directives(page, names):
    directive_groups = {name: collections.defaultdict(set) for name in names}

    t = xml_parse(page)
    section = t.find('./refmeta/manvolnum').text
    pagename = t.find('./refmeta/refentrytitle').text
    formatting = {}

    storopt = directive_groups['options']
    for variablelist in t.iterfind('.//variablelist'):
        klass = variablelist.attrib.get('class')
        searchpath = variablelist.attrib.get('xpath',
                                             './varlistentry/term/varname')
        storvar = directive_groups[klass or 'miscellaneous']
        # <option>s go in OPTIONS, unless class is specified
        for xpath, stor in ((searchpath, storvar),
                            ('./varlistentry/term/option',
                             storvar if klass else storopt)):
            for name in variablelist.iterfind(xpath):
                text = re.sub(r'([= ]).*', r'\1', name.text).rstrip()
                if text.startswith('-'):
                    # for options, merge options with and without mandatory arg
                    text = text.partition('=')[0]
                stor[text].add((pagename, section))
                if text not in formatting:
                    # use element as formatted display
                    if name.text[-1] in "= '":
                        name.clear()
                    else:
                        name.tail = ''
                    name.text = text
                    formatting[text] = name
        extra = variablelist.attrib.get('extra-ref')
        if extra:
            stor[extra].add((pagename, section))
            if extra not in formatting:
                elt = tree.Element("varname")
                elt.text = extra
                formatting[extra] = elt

    storfile = directive_groups['filenames']
    for xpath, absolute_only in (('.//refsynopsisdiv//filename', False),
                                 ('.//refsynopsisdiv//command',
                                  False), ('.//filename', True)):
        for name in t.iterfind(xpath):
            if absolute_only and not (name.text and name.text.startswith('/')):
                continue
            if name.attrib.get('index') == 'false':
                continue
            name.tail = ''
            if name.text:
                if name.text.endswith('*'):
                    name.text = name.text[:-1]
                if not name.text.startswith('.'):
                    text = name.text.partition(' ')[0]
                    if text != name.text:
                        name.clear()
                        name.text = text
                    if text.endswith('/'):
                        text = text[:-1]
                    storfile[text].add((pagename, section))
                    if text not in formatting:
                        # use element as formatted display
                        formatting[text] = name
            else:
                text = ' '.join(name.itertext())
                storfile[text].add((pagename, section))
                formatting[text] = name

    storfile = directive_groups['constants']
    for name in t.iterfind('.//constant'):
        if name.attrib.get('index') == 'false':
            continue
        name.tail = ''
        if name.text.startswith('('):  # a cast, strip it
            name.text = name.text.partition(' ')[2]
        storfile[name.text].add((pagename, section))
        formatting[name.text] = name

    storfile = directive_groups['specifiers']
    for name in t.iterfind(".//table[@class='specifiers']//entry/literal"):
        if name.text[0] != '%' or name.getparent().text is not None:
            continue
        if name.attrib.get('index') == 'false':
            continue
        storfile[name.text].add((pagename, section))
        formatting[name.text] = name
    for name in t.iterfind(".//literal[@class='specifiers']"):
        storfile[name.text].add((pagename, section))
        formatting[name.text] = name

    # Serialize to allow pickling
    formatting = {name: xml_print(value) for name, value in formatting.items()}

    return directive_groups, formatting
Exemple #5
0
def _extract_directives(directive_groups, formatting, page):
    t = xml_parse(page)
    section = t.find('./refmeta/manvolnum').text
    pagename = t.find('./refmeta/refentrytitle').text

    storopt = directive_groups['options']
    for variablelist in t.iterfind('.//variablelist'):
        klass = variablelist.attrib.get('class')
        storvar = directive_groups[klass or 'miscellaneous']
        # <option>s go in OPTIONS, unless class is specified
        for xpath, stor in (('./varlistentry/term/varname', storvar),
                            ('./varlistentry/term/option',
                             storvar if klass else storopt)):
            for name in variablelist.iterfind(xpath):
                text = re.sub(r'([= ]).*', r'\1', name.text).rstrip()
                stor[text].append((pagename, section))
                if text not in formatting:
                    # use element as formatted display
                    if name.text[-1] in '= ':
                        name.clear()
                    else:
                        name.tail = ''
                    name.text = text
                    formatting[text] = name

    storfile = directive_groups['filenames']
    for xpath, absolute_only in (('.//refsynopsisdiv//filename', False),
                                 ('.//refsynopsisdiv//command',
                                  False), ('.//filename', True)):
        for name in t.iterfind(xpath):
            if absolute_only and not (name.text and name.text.startswith('/')):
                continue
            if name.attrib.get('noindex'):
                continue
            name.tail = ''
            if name.text:
                if name.text.endswith('*'):
                    name.text = name.text[:-1]
                if not name.text.startswith('.'):
                    text = name.text.partition(' ')[0]
                    if text != name.text:
                        name.clear()
                        name.text = text
                    if text.endswith('/'):
                        text = text[:-1]
                    storfile[text].append((pagename, section))
                    if text not in formatting:
                        # use element as formatted display
                        formatting[text] = name
            else:
                text = ' '.join(name.itertext())
                storfile[text].append((pagename, section))
                formatting[text] = name

    storfile = directive_groups['constants']
    for name in t.iterfind('.//constant'):
        if name.attrib.get('noindex'):
            continue
        name.tail = ''
        if name.text.startswith('('):  # a cast, strip it
            name.text = name.text.partition(' ')[2]
        storfile[name.text].append((pagename, section))
        formatting[name.text] = name
def _extract_directives(directive_groups, formatting, page):
    t = xml_parse(page)
    section = t.find('./refmeta/manvolnum').text
    pagename = t.find('./refmeta/refentrytitle').text

    storopt = directive_groups['options']
    for variablelist in t.iterfind('.//variablelist'):
        klass = variablelist.attrib.get('class')
        storvar = directive_groups[klass or 'miscellaneous']
        # <option>s go in OPTIONS, unless class is specified
        for xpath, stor in (('./varlistentry/term/varname', storvar),
                            ('./varlistentry/term/option',
                             storvar if klass else storopt)):
            for name in variablelist.iterfind(xpath):
                text = re.sub(r'([= ]).*', r'\1', name.text).rstrip()
                stor[text].append((pagename, section))
                if text not in formatting:
                    # use element as formatted display
                    if name.text[-1] in '= ':
                        name.clear()
                    else:
                        name.tail = ''
                    name.text = text
                    formatting[text] = name

    storfile = directive_groups['filenames']
    for xpath, absolute_only in (('.//refsynopsisdiv//filename', False),
                                 ('.//refsynopsisdiv//command', False),
                                 ('.//filename', True)):
        for name in t.iterfind(xpath):
            if absolute_only and not (name.text and name.text.startswith('/')):
                continue
            if name.attrib.get('noindex'):
                continue
            name.tail = ''
            if name.text:
                if name.text.endswith('*'):
                    name.text = name.text[:-1]
                if not name.text.startswith('.'):
                    text = name.text.partition(' ')[0]
                    if text != name.text:
                        name.clear()
                        name.text = text
                    if text.endswith('/'):
                        text = text[:-1]
                    storfile[text].append((pagename, section))
                    if text not in formatting:
                        # use element as formatted display
                        formatting[text] = name
            else:
                text = ' '.join(name.itertext())
                storfile[text].append((pagename, section))
                formatting[text] = name

    storfile = directive_groups['constants']
    for name in t.iterfind('.//constant'):
        if name.attrib.get('noindex'):
            continue
        name.tail = ''
        if name.text.startswith('('): # a cast, strip it
            name.text = name.text.partition(' ')[2]
        storfile[name.text].append((pagename, section))
        formatting[name.text] = name