Ejemplos de fromFile en Python, ejemplos de anolislib.generator.fromFile en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: runtests.py Proyecto: gsnedders/anolis

        def testFunc(self, file_name=file_name):
            assert file_name.endswith(".src.html")
            base_path = file_name[:-len(".src.html")]

            kwargs = {}
            try:
                options_file_name = base_path + ".options"
                with open(options_file_name, "r") as options_file:
                    kwargs = json.load(options_file)
            except IOError:
                pass

            default_processes = ["filter", "sub", "toc", "xref", "annotate"]
            new_processes = kwargs.get("processes", [])
            assert not set(default_processes) & set(new_processes)
            kwargs["processes"] = default_processes + new_processes

            # Sort attributes alphabetically by default.
            kwargs["alphabetical_attributes"] = True

            try:
                output = StringIO.StringIO()

                # Get the input
                input = open(file_name, "rb")
                tree = generator.fromFile(input, **kwargs)
                input.close()

                # Get the output
                generator.toFile(tree, output, **kwargs)

                # Get the expected result
                expectedfp = open(base_path + ".html", "rb")
                expected = expectedfp.read()
                expectedfp.close()

                # Run the test
                self.assertEquals(output.getvalue(), expected)
            except IOError as err:
                self.fail(err)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: runtests.py Proyecto: benschwarz/html5forAuthors

        def testFunc(self, file_name=file_name):
            try:
                # Get the input
                input = open(file_name, "rb")
                tree = generator.fromFile(input)
                input.close()

                # Get the output
                output = StringIO.StringIO()
                generator.toFile(tree, output)

                # Get the expected result
                expected = open(file_name[:-9] + ".html", "rb")

                # Run the test
                self.assertEquals(output.getvalue(), expected.read())

                # Close the files
                output.close()
                expected.close()
            except IOError, err:
                self.fail(err)

Ejemplo n.º 3

0

Mostrar archivo

  'space_before_trailing_solidus': False,
  'strip_whitespace': None,
  'use_best_quote_char': False,
  'use_trailing_solidus': False,
  'w3c_compat_class_toc': False,
  'w3c_compat_crazy_substitutions': False,
  'w3c_compat_substitutions': False,
  'w3c_compat': True,
  'w3c_compat_xref_a_placement': False,
  'w3c_compat_xref_elements': False,
  'w3c_compat_xref_normalization': False,
}

print 'indexing'
filtered.seek(0)
tree = generator.fromFile(filtered, **opts)
filtered.close()

try:
  os.makedirs('output/%s' % spec)
except:
  pass

if spec == 'html':
  from glob import glob
  for name in glob('output/html/*.html'):
    os.remove(name)

  output = open('output/html/single-page.html', 'wb')
else:
  output = open('output/%s/Overview.html' % spec, 'wb')

Ejemplo n.º 4

0

Mostrar archivo

def main(spec, spec_dir, branch="master"):
    conf = None
    try:
        conf = config.load_config()[spec]
    except KeyError:
        invoked_incorrectly()

    if 'select' in conf:
        select = conf['select']
    else:
        select = spec

    try:
        if not spec_dir:
            spec_dir = os.path.join(conf["output"], spec)
    except KeyError:
        sys.stderr.write("error: Must specify output directory for %s! \
Check default-config.json.\n" % spec)
        exit()

    cur_dir = os.path.abspath(os.path.dirname(__file__))
    os.chdir(conf["path"])

    print "parsing"
    source = open('source')
    after_microsyntax = StringIO()
    parser_microsyntax.main(source, after_microsyntax)
    after_microsyntax.seek(0)
    succint = StringIO()
    bs.main(after_microsyntax, succint)

    succint.seek(0)
    filtered = StringIO()
    try:
        boilerplate.main(succint, filtered, select, branch)
    except IOError:
        sys.stderr.write("error: Problem loading boilerplate for %s. \
Are you on the correct branch?\n" % spec)
        exit()
    succint.close()

    # See http://hg.gsnedders.com/anolis/file/tip/anolis
    opts = {
      'allow_duplicate_dfns': True,
      'disable': None,
      'escape_lt_in_attrs': False,
      'escape_rcdata': False,
      'force_html4_id': False,
      'indent_char': u' ',
      'inject_meta_charset': False,
      'max_depth': 6,
      'min_depth': 2,
      'minimize_boolean_attributes': False,
      'newline_char': u'\n',
      'omit_optional_tags': False,
      'output_encoding': 'utf-8',
      'parser': 'html5lib',
      'processes': set(['toc', 'xref', 'sub']),
      'profile': False,
      'quote_attr_values': True,
      'serializer': 'html5lib',
      'space_before_trailing_solidus': False,
      'strip_whitespace': None,
      'use_best_quote_char': False,
      'use_trailing_solidus': False,
      'w3c_compat_class_toc': False,
      'w3c_compat_crazy_substitutions': False,
      'w3c_compat_substitutions': False,
      'w3c_compat': True,
      'w3c_compat_xref_a_placement': False,
      'w3c_compat_xref_elements': False,
      'w3c_compat_xref_normalization': False,
    }
    if "anolis" in conf:
        opts.update(conf["anolis"])

    if spec == "srcset":
        import html5lib

        print 'munging (before anolis)'

        filtered.seek(0)
        pre_anolis_buffer = StringIO()

        # Parse
        parser = html5lib.html5parser.HTMLParser(tree = html5lib.treebuilders.getTreeBuilder('lxml'))
        tree = parser.parse(filtered, encoding='utf-8')

        # Move introduction above conformance requirements
        introduction = tree.findall("//*[@id='introduction']")[0]
        intro_ps = introduction.xpath("following-sibling::*")
        target = tree.findall("//*[@id='conformance-requirements']")[0]
        target.addprevious(introduction)
        target = introduction
        target.addnext(intro_ps[2])
        target.addnext(intro_ps[1])
        target.addnext(intro_ps[0])

        # Serialize
        tokens = html5lib.treewalkers.getTreeWalker('lxml')(tree)
        serializer = html5lib.serializer.HTMLSerializer(quote_attr_values=True, inject_meta_charset=False)
        for text in serializer.serialize(tokens, encoding='utf-8'):
            pre_anolis_buffer.write(text)

        filtered = pre_anolis_buffer

    print 'indexing'
    filtered.seek(0)
    tree = generator.fromFile(filtered, **opts)
    filtered.close()

    # fixup nested dd's and dt's produced by lxml
    for dd in tree.findall('//dd/dd'):
        if list(dd) or dd.text.strip():
            dd.getparent().addnext(dd)
        else:
            dd.getparent().remove(dd)
    for dt in tree.findall('//dt/dt'):
        if list(dt) or dt.text.strip():
            dt.getparent().addnext(dt)
        else:
            dt.getparent().remove(dt)

    if spec == "microdata":
        print 'munging'
        import lxml
        # get the h3 for the misplaced section (it has no container)
        section = tree.xpath("//h3[@id = 'htmlpropertiescollection']")[0]
        # then get all of its following siblings that have the h2 for the next section as 
        # a following sibling themselves. Yeah, XPath doesn't suck.
        section_content = section.xpath("following-sibling::*[following-sibling::h2[@id='introduction']]")
        target = tree.xpath("//h2[@id = 'converting-html-to-other-formats']")[0].getparent()
        target.addprevious(section)
        for el in section_content: target.addprevious(el)
        section.xpath("span")[0].text = "6.1 "
        # move the toc as well
        link = tree.xpath("//ol[@class='toc']//a[@href='#htmlpropertiescollection']")[0]
        link.xpath("span")[0].text = "6.1 "
        tree.xpath("//ol[@class='toc']/li[a[@href='#microdata-dom-api']]")[0].append(link.getparent().getparent())

    if spec == "srcset":
        print 'munging (after anolis)'
        # In the WHATWG spec, srcset="" is simply an aspect of
        # HTMLImageElement and not a separate feature. In order to keep
        # the HTML WG's srcset="" spec organized, we have to move some
        # things around in the final document.

        # Move "The srcset IDL attribute must reflect..."
        reflect_the_content_attribute = tree.findall("//div[@class='impl']")[0]
        target = tree.find("//div[@class='note']")
        target.addprevious(reflect_the_content_attribute)

        # Move "The IDL attribute complete must return true..."
        note_about_complete = tree.findall("//p[@class='note']")[5]
        p_otherwise = note_about_complete.xpath("preceding-sibling::p[position()=1]")[0]
        ul_conditions = p_otherwise.xpath("preceding-sibling::ul[position()=1]")[0]
        p_start = ul_conditions.xpath("preceding-sibling::p[position()=1]")[0]
        target.addnext(note_about_complete)
        target.addnext(p_otherwise)
        target.addnext(ul_conditions)
        target.addnext(p_start)

    try:
        os.makedirs(spec_dir)
    except:
        pass

    if spec == 'html':
        print 'cleaning'
        from glob import glob
        for name in glob("%s/*.html" % spec_dir):
            os.remove(name)

        output = StringIO()
    else:
        output = open("%s/Overview.html" % spec_dir, 'wb')

    generator.toFile(tree, output, **opts)

    if spec != 'html':
        output.close()
    else:
        value = output.getvalue()
        if "<!--INTERFACES-->\n" in value:
            print 'interfaces'
            from interface_index import interface_index
            output.seek(0)
            index = StringIO()
            interface_index(output, index)
            value = value.replace("<!--INTERFACES-->\n", index.getvalue(), 1)
            index.close()
        output = open("%s/single-page.html" % spec_dir, 'wb')
        output.write(value)
        output.close()
        value = ''

        print 'splitting'
        import spec_splitter
        spec_splitter.w3c = True
        spec_splitter.no_split_exceptions = conf.get("no_split_exceptions", False)
        spec_splitter.minimal_split_exceptions = conf.get("minimal_split_exceptions", False)
        spec_splitter.main("%s/single-page.html" % spec_dir, spec_dir)

        print 'entities'
        entities = open(os.path.join(cur_dir, "boilerplate/entities.inc"))
        json = open("%s/entities.json" % spec_dir, 'w')
        from entity_processor_json import entity_processor_json
        entity_processor_json(entities, json)
        entities.close()
        json.close()

    # copying dependencies
    def copy_dependencies (targets):
        import types
        if not isinstance(targets, types.ListType): targets = [targets]
        for target in targets:
            os.system("/bin/csh -i -c '/bin/cp -R %s %s'" % (os.path.join(conf["path"], target), spec_dir))

    print "copying"
    if spec == "html":
        copy_dependencies(["images", "fonts", "404/*", "switcher", "js"])
    elif spec == "2dcontext":
        copy_dependencies(["images", "fonts"])
    else:
        copy_dependencies("fonts")

    # fix the styling of the 404
    if spec == "html":
        link = tree.xpath("//link[starts-with(@href, 'http://www.w3.org/StyleSheets/TR/')]")[0].get("href")
        path = os.path.join(spec_dir, "404.html")
        with open(path) as data: html404 = data.read()
        html404 = html404.replace("http://www.w3.org/StyleSheets/TR/W3C-ED", link)
        with open(path, "w") as data: data.write(html404)