コード例 #1
0
ファイル: cljdocs.py プロジェクト: deflexor/ClojureDoc-Search
def parse_doc(url):
    """Parsing the documentation."""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find("div", "docstring").find("pre")
    l = []
    if not stuff:
        ret = " \n" + "No documentation available!\n"
        l.append(ret.split("\n"))
        return l
    ret = "Documentation: \n" + stuff.text
    l.append(ret.split("\n"))
    return l
コード例 #2
0
ファイル: cljdocs.py プロジェクト: deflexor/ClojureDoc-Search
def parse_source(url):
    """Parsing source"""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find("div", "source_content")
    l = []
    if not stuff:
        ret = " \n" + "No source code available!\n"
        l.append(ret.split("\n"))
        return l
    stuff = stuff.find("pre", "brush: clojure")
    ret = "Source:        \n" + stuff.text
    l.append(ret.split("\n"))
    return l
コード例 #3
0
ファイル: cljdocs.py プロジェクト: Foxboron/ClojureDoc-Search
def parse_source(url):
    """Parsing source"""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find("div", "source_content")
    l = []
    if not stuff:
        ret = " \n" + "No source code available!\n"
        l.append(ret.split("\n"))
        return l
    stuff = stuff.find("pre", "brush: clojure")
    ret = "Source:        \n" + stuff.text
    l.append(ret.split("\n"))
    return l
コード例 #4
0
def diagnose(data):
    """Diagnostic suite for isolating common problems."""
    print "Diagnostic running on Beautiful Soup %s" % __version__
    print "Python version %s" % sys.version

    basic_parsers = ["html.parser", "html5lib", "lxml"]
    for name in basic_parsers:
        for builder in builder_registry.builders:
            if name in builder.features:
                break
        else:
            basic_parsers.remove(name)
            print(
                "I noticed that %s is not installed. Installing it may help." %
                name)

    if 'lxml' in basic_parsers:
        basic_parsers.append(["lxml", "xml"])
        from lxml import etree
        print "Found lxml version %s" % ".".join(map(str, etree.LXML_VERSION))

    if 'html5lib' in basic_parsers:
        import html5lib
        print "Found html5lib version %s" % html5lib.__version__

    if hasattr(data, 'read'):
        data = data.read()
    elif os.path.exists(data):
        print '"%s" looks like a filename. Reading data from the file.' % data
        data = open(data).read()
    elif data.startswith("http:") or data.startswith("https:"):
        print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
        print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
        return
    print

    for parser in basic_parsers:
        print "Trying to parse your markup with %s" % parser
        success = False
        try:
            soup = BeautifulSoup(data, parser)
            success = True
        except Exception, e:
            print "%s could not parse the markup." % parser
            traceback.print_exc()
        if success:
            print "Here's what %s did with the markup:" % parser
            print soup.prettify()

        print "-" * 80
コード例 #5
0
def diagnose(data):
    """Diagnostic suite for isolating common problems."""
    print "Diagnostic running on Beautiful Soup %s" % __version__
    print "Python version %s" % sys.version

    basic_parsers = ["html.parser", "html5lib", "lxml"]
    for name in basic_parsers:
        for builder in builder_registry.builders:
            if name in builder.features:
                break
        else:
            basic_parsers.remove(name)
            print (
                "I noticed that %s is not installed. Installing it may help." %
                name)

    if 'lxml' in basic_parsers:
        basic_parsers.append(["lxml", "xml"])
        from lxml import etree
        print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))

    if 'html5lib' in basic_parsers:
        import html5lib
        print "Found html5lib version %s" % html5lib.__version__

    if hasattr(data, 'read'):
        data = data.read()
    elif os.path.exists(data):
        print '"%s" looks like a filename. Reading data from the file.' % data
        data = open(data).read()
    elif data.startswith("http:") or data.startswith("https:"):
        print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
        print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
        return
    print

    for parser in basic_parsers:
        print "Trying to parse your markup with %s" % parser
        success = False
        try:
            soup = BeautifulSoup(data, parser)
            success = True
        except Exception, e:
            print "%s could not parse the markup." % parser
            traceback.print_exc()
        if success:
            print "Here's what %s did with the markup:" % parser
            print soup.prettify()

        print "-" * 80
コード例 #6
0
ファイル: cljdocs.py プロジェクト: Foxboron/ClojureDoc-Search
def parse_doc(url):
    """Parsing the documentation."""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find("div", "doc").find("div", "content")
    for e in stuff.findAll("br"):
        e.replace_with("\n")
    l = []
    if not stuff:
        ret = " \n" + "No documentation available!\n"
        l.append(ret.split("\n"))
        return l
    ret = "Documentation: \n" + stuff.text
    l.append(ret.split("\n"))
    return l
コード例 #7
0
ファイル: cljdocs.py プロジェクト: Foxboron/ClojureDoc-Search
def parse_doc(url):
    """Parsing the documentation."""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find("div", "doc").find("div", "content")
    for e in stuff.findAll("br"):
        e.replace_with("\n")
    l = []
    if not stuff:
        ret = " \n" + "No documentation available!\n"
        l.append(ret.split("\n"))
        return l
    ret = "Documentation: \n" + stuff.text
    l.append(ret.split("\n"))
    return l
コード例 #8
0
ファイル: cljdocs.py プロジェクト: deflexor/ClojureDoc-Search
def parse_example(url):
    """Parsing examples"""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find_all("div", "hidden plain_content")
    l = []
    if not stuff:
        ret = " \n" + "No examples available!\n"
        l.append(ret.split("\n"))
        return l
    num = 1
    for i in stuff:
        ret = "Example #" + str(num) + ":    \n" + i.text.rstrip("\n")
        l.append(ret.split("\n"))
        num += 1
    return l
コード例 #9
0
ファイル: cljdocs.py プロジェクト: Foxboron/ClojureDoc-Search
def parse_example(url):
    """Parsing examples"""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find_all("div", "hidden plain_content")
    l = []
    if not stuff:
        ret = " \n" + "No examples available!\n"
        l.append(ret.split("\n"))
        return l
    num = 1
    for i in stuff:
        ret = "Example #" + str(num) + ":    \n" + i.text.rstrip("\n")
        l.append(ret.split("\n"))
        num += 1
    return l
コード例 #10
0
ファイル: cljdocs.py プロジェクト: Foxboron/ClojureDoc-Search
def seealso_search(url):
    """Searches the 'See Also...' part. """
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.body.find_all("li", "see_also_item")
    items = []
    sites = []
    for i in stuff:
        item, web = new_parse(i)
        items.append(item)
        sites.append(web)
    return (items, sites)
コード例 #11
0
ファイル: cljdocs.py プロジェクト: Foxboron/ClojureDoc-Search
def bs4_parse(var):
    """Parses out the indvidual search item"""
    var = request(var)
    soup = BeautifulSoup(var)
    stuff = soup.body.find_all("div", "search_result")
    items = []
    sites = []
    for i in stuff:
        item, web = parse_list(i)
        items.append(item)
        sites.append(web)
    return (items, sites)
コード例 #12
0
def benchmark_parsers(num_elements=100000):
    """Very basic head-to-head performance benchmark."""
    print "Comparative parser benchmark on Beautiful Soup %s" % __version__
    data = rdoc(num_elements)
    print "Generated a large invalid HTML document (%d bytes)." % len(data)

    for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
        success = False
        try:
            a = time.time()
            soup = BeautifulSoup(data, parser)
            b = time.time()
            success = True
        except Exception, e:
            print "%s could not parse the markup." % parser
            traceback.print_exc()
        if success:
            print "BS4+%s parsed the markup in %.2fs." % (parser, b - a)