Beispiel #1
0
def parse(source,
          prefixes=None,
          model=None,
          encoding=None,
          use_xhtml_ns=False):
    '''
    Parse an input source with HTML text into an Amara Bindery tree

    Warning: if you pass a string, you must make sure it's a byte string, not a Unicode object.  You might also want to wrap it with amara.lib.inputsource.text if it's not obviously XML or HTML (for example it could be confused with a file name)
    '''
    from amara.lib.util import set_namespaces
    #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247")
    #parser = html5lib.HTMLParser()
    if PRE_0_90:

        def get_tree_instance():
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory)
    else:

        def get_tree_instance(namespaceHTMLElements,
                              use_xhtml_ns=use_xhtml_ns):
            #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory, use_xhtml_ns)

    parser = html5lib.HTMLParser(tree=get_tree_instance)
    doc = parser.parse(inputsource(source, None).stream, encoding=encoding)
    if prefixes: set_namespaces(doc, prefixes)
    return doc
Beispiel #2
0
def parse(source,
          prefixes=None,
          model=None,
          encoding=None,
          use_xhtml_ns=False):
    '''
    
    '''
    from amara.lib.util import set_namespaces
    #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247")
    #parser = html5lib.HTMLParser()
    if PRE_0_90:

        def get_tree_instance():
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory)
    else:

        def get_tree_instance(namespaceHTMLElements,
                              use_xhtml_ns=use_xhtml_ns):
            #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory, use_xhtml_ns)

    parser = html5lib.HTMLParser(tree=get_tree_instance)
    doc = parser.parse(inputsource(source, None).stream, encoding=encoding)
    if prefixes: set_namespaces(doc, prefixes)
    return doc
Beispiel #3
0
def parse(obj, uri=None, entity_factory=None, standalone=False, validate=False, prefixes=None, model=None):
    if model:
        entity_factory = model.clone
    if not entity_factory:
        entity_factory = nodes.entity_base
    doc = tree.parse(obj, uri, entity_factory=entity_factory, standalone=standalone, validate=validate)
    if prefixes:
        set_namespaces(doc, prefixes)
    return doc
Beispiel #4
0
def parse(obj,
          uri=None,
          entity_factory=None,
          standalone=False,
          validate=False,
          prefixes=None,
          model=None):
    if model:
        entity_factory = model.clone
    if not entity_factory:
        entity_factory = nodes.entity_base
    doc = tree.parse(obj,
                     uri,
                     entity_factory=entity_factory,
                     standalone=standalone,
                     validate=validate)
    if prefixes: set_namespaces(doc, prefixes)
    return doc
Beispiel #5
0
def parse(source, prefixes=None, model=None, encoding=None, use_xhtml_ns=False):
    '''
    
    '''
    from amara.lib.util import set_namespaces
    #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247")
    #parser = html5lib.HTMLParser()
    if PRE_0_90:
        def get_tree_instance():
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory)
    else:
        def get_tree_instance(namespaceHTMLElements, use_xhtml_ns=use_xhtml_ns):
            #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory, use_xhtml_ns)
    parser = html5lib.HTMLParser(tree=get_tree_instance)
    doc = parser.parse(inputsource(source, None).stream, encoding=encoding)
    if prefixes: set_namespaces(doc, prefixes)
    return doc
Beispiel #6
0
def parse(source, prefixes=None, model=None, encoding=None, use_xhtml_ns=False):
    '''
    Parse an input source with HTML text into an Amara Bindery tree

    Warning: if you pass a string, you must make sure it's a byte string, not a Unicode object.  You might also want to wrap it with amara.lib.inputsource.text if it's not obviously XML or HTML (for example it could be confused with a file name)
    '''
    from amara.lib.util import set_namespaces
    #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247")
    #parser = html5lib.HTMLParser()
    if PRE_0_90:
        def get_tree_instance():
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory)
    else:
        def get_tree_instance(namespaceHTMLElements, use_xhtml_ns=use_xhtml_ns):
            #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory, use_xhtml_ns)
    parser = html5lib.HTMLParser(tree=get_tree_instance)
    doc = parser.parse(inputsource(source, None).stream, encoding=encoding)
    if prefixes: set_namespaces(doc, prefixes)
    return doc