Python main Examples

Programming Language: Python

Namespace/Package Name: bs

Method/Function: main

Examples at hotexamples.com: 5

Python main - 5 examples found. These are the top rated real world Python examples of bs.main extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: app.py Project: emias11/Coursework

def search():
	results = []
	query = request.json["query"]  # get search term
	result1 = bs.main(query)
	result2 = bs2.main(query)
	if result1 and not result2:
		results = result1
	elif result2 and not result1:
		results = result2
	elif result1 and result2:
		results = result1 + result2
	search_results = {}
	for i in range(len(results) - 1):
		search_results[str(uuid.uuid4())] = results[i]
	amount = len(search_results)
	return jsonify({"results": search_results, "amount": amount, "keys": list(search_results.keys())})

Example #2

Show file

File: __main__.py Project: HAOzj/Crawler_of_professor

import conf_loader
from bs import main

# by default we connect to localhost:9200
es = Elasticsearch()


def save_es(index, doc_type, profiles):
    """profiles存入本地es

    Args:
        index(str) :- es索引
        doc_type(str) :- es文章类型
        profiles(iterables) :- 存入es的json列表,格式为[dict]
    """
    for id, profile in enumerate(profiles):
        es.index(index=index,
                 doc_type=doc_type,
                 id=id,
                 body=profile)

    res = es.search(index=index, body={"query": {"match_all": {}}})
    for profile in res["hits"]["hits"]:
        print(profile["_source"]["name"])


if __name__ == "__main__":
    profiles = main()
    save_es(index=conf_loader.index, doc_type=conf_loader.doc_type, profiles=profiles)

Example #3

Show file

from anolislib import generator, utils

if len(sys.argv)>1 and sys.argv[1] == 'html':
  select = 'w3c-html' 
  spec = 'html'
elif len(sys.argv)>1 and sys.argv[1] == '2dcontext':
  spec = select = '2dcontext'
else:
  sys.stderr.write("Usage: python %s [html|2dcontext]\n" % sys.argv[0])
  exit()

print 'parsing'
os.chdir(os.path.abspath(os.path.join(__file__, '../..')))
source = open('source')
succint = StringIO()
bs.main(source, succint)

succint.seek(0)
filtered = StringIO()
boilerplate.main(succint, filtered, select)
succint.close()

# See http://hg.gsnedders.com/anolis/file/tip/anolis
opts = {
  'allow_duplicate_dfns': True,
  'disable': None,
  'escape_lt_in_attrs': False,
  'escape_rcdata': False,
  'force_html4_id': False,
  'indent_char': u' ',
  'inject_meta_charset': False,

Example #4

Show file

def main(spec, spec_dir, branch="master"):
    conf = None
    try:
        conf = config.load_config()[spec]
    except KeyError:
        invoked_incorrectly()

    if 'select' in conf:
        select = conf['select']
    else:
        select = spec

    try:
        if not spec_dir:
            spec_dir = os.path.join(conf["output"], spec)
    except KeyError:
        sys.stderr.write("error: Must specify output directory for %s! \
Check default-config.json.\n" % spec)
        exit()

    cur_dir = os.path.abspath(os.path.dirname(__file__))
    os.chdir(conf["path"])

    print "parsing"
    source = open('source')
    after_microsyntax = StringIO()
    parser_microsyntax.main(source, after_microsyntax)
    after_microsyntax.seek(0)
    succint = StringIO()
    bs.main(after_microsyntax, succint)

    succint.seek(0)
    filtered = StringIO()
    try:
        boilerplate.main(succint, filtered, select, branch)
    except IOError:
        sys.stderr.write("error: Problem loading boilerplate for %s. \
Are you on the correct branch?\n" % spec)
        exit()
    succint.close()

    # See http://hg.gsnedders.com/anolis/file/tip/anolis
    opts = {
      'allow_duplicate_dfns': True,
      'disable': None,
      'escape_lt_in_attrs': False,
      'escape_rcdata': False,
      'force_html4_id': False,
      'indent_char': u' ',
      'inject_meta_charset': False,
      'max_depth': 6,
      'min_depth': 2,
      'minimize_boolean_attributes': False,
      'newline_char': u'\n',
      'omit_optional_tags': False,
      'output_encoding': 'utf-8',
      'parser': 'html5lib',
      'processes': set(['toc', 'xref', 'sub']),
      'profile': False,
      'quote_attr_values': True,
      'serializer': 'html5lib',
      'space_before_trailing_solidus': False,
      'strip_whitespace': None,
      'use_best_quote_char': False,
      'use_trailing_solidus': False,
      'w3c_compat_class_toc': False,
      'w3c_compat_crazy_substitutions': False,
      'w3c_compat_substitutions': False,
      'w3c_compat': True,
      'w3c_compat_xref_a_placement': False,
      'w3c_compat_xref_elements': False,
      'w3c_compat_xref_normalization': False,
    }
    if "anolis" in conf:
        opts.update(conf["anolis"])

    if spec == "srcset":
        import html5lib

        print 'munging (before anolis)'

        filtered.seek(0)
        pre_anolis_buffer = StringIO()

        # Parse
        parser = html5lib.html5parser.HTMLParser(tree = html5lib.treebuilders.getTreeBuilder('lxml'))
        tree = parser.parse(filtered, encoding='utf-8')

        # Move introduction above conformance requirements
        introduction = tree.findall("//*[@id='introduction']")[0]
        intro_ps = introduction.xpath("following-sibling::*")
        target = tree.findall("//*[@id='conformance-requirements']")[0]
        target.addprevious(introduction)
        target = introduction
        target.addnext(intro_ps[2])
        target.addnext(intro_ps[1])
        target.addnext(intro_ps[0])

        # Serialize
        tokens = html5lib.treewalkers.getTreeWalker('lxml')(tree)
        serializer = html5lib.serializer.HTMLSerializer(quote_attr_values=True, inject_meta_charset=False)
        for text in serializer.serialize(tokens, encoding='utf-8'):
            pre_anolis_buffer.write(text)

        filtered = pre_anolis_buffer

    print 'indexing'
    filtered.seek(0)
    tree = generator.fromFile(filtered, **opts)
    filtered.close()

    # fixup nested dd's and dt's produced by lxml
    for dd in tree.findall('//dd/dd'):
        if list(dd) or dd.text.strip():
            dd.getparent().addnext(dd)
        else:
            dd.getparent().remove(dd)
    for dt in tree.findall('//dt/dt'):
        if list(dt) or dt.text.strip():
            dt.getparent().addnext(dt)
        else:
            dt.getparent().remove(dt)

    if spec == "microdata":
        print 'munging'
        import lxml
        # get the h3 for the misplaced section (it has no container)
        section = tree.xpath("//h3[@id = 'htmlpropertiescollection']")[0]
        # then get all of its following siblings that have the h2 for the next section as 
        # a following sibling themselves. Yeah, XPath doesn't suck.
        section_content = section.xpath("following-sibling::*[following-sibling::h2[@id='introduction']]")
        target = tree.xpath("//h2[@id = 'converting-html-to-other-formats']")[0].getparent()
        target.addprevious(section)
        for el in section_content: target.addprevious(el)
        section.xpath("span")[0].text = "6.1 "
        # move the toc as well
        link = tree.xpath("//ol[@class='toc']//a[@href='#htmlpropertiescollection']")[0]
        link.xpath("span")[0].text = "6.1 "
        tree.xpath("//ol[@class='toc']/li[a[@href='#microdata-dom-api']]")[0].append(link.getparent().getparent())

    if spec == "srcset":
        print 'munging (after anolis)'
        # In the WHATWG spec, srcset="" is simply an aspect of
        # HTMLImageElement and not a separate feature. In order to keep
        # the HTML WG's srcset="" spec organized, we have to move some
        # things around in the final document.

        # Move "The srcset IDL attribute must reflect..."
        reflect_the_content_attribute = tree.findall("//div[@class='impl']")[0]
        target = tree.find("//div[@class='note']")
        target.addprevious(reflect_the_content_attribute)

        # Move "The IDL attribute complete must return true..."
        note_about_complete = tree.findall("//p[@class='note']")[5]
        p_otherwise = note_about_complete.xpath("preceding-sibling::p[position()=1]")[0]
        ul_conditions = p_otherwise.xpath("preceding-sibling::ul[position()=1]")[0]
        p_start = ul_conditions.xpath("preceding-sibling::p[position()=1]")[0]
        target.addnext(note_about_complete)
        target.addnext(p_otherwise)
        target.addnext(ul_conditions)
        target.addnext(p_start)

    try:
        os.makedirs(spec_dir)
    except:
        pass

    if spec == 'html':
        print 'cleaning'
        from glob import glob
        for name in glob("%s/*.html" % spec_dir):
            os.remove(name)

        output = StringIO()
    else:
        output = open("%s/Overview.html" % spec_dir, 'wb')

    generator.toFile(tree, output, **opts)

    if spec != 'html':
        output.close()
    else:
        value = output.getvalue()
        if "<!--INTERFACES-->\n" in value:
            print 'interfaces'
            from interface_index import interface_index
            output.seek(0)
            index = StringIO()
            interface_index(output, index)
            value = value.replace("<!--INTERFACES-->\n", index.getvalue(), 1)
            index.close()
        output = open("%s/single-page.html" % spec_dir, 'wb')
        output.write(value)
        output.close()
        value = ''

        print 'splitting'
        import spec_splitter
        spec_splitter.w3c = True
        spec_splitter.no_split_exceptions = conf.get("no_split_exceptions", False)
        spec_splitter.minimal_split_exceptions = conf.get("minimal_split_exceptions", False)
        spec_splitter.main("%s/single-page.html" % spec_dir, spec_dir)

        print 'entities'
        entities = open(os.path.join(cur_dir, "boilerplate/entities.inc"))
        json = open("%s/entities.json" % spec_dir, 'w')
        from entity_processor_json import entity_processor_json
        entity_processor_json(entities, json)
        entities.close()
        json.close()

    # copying dependencies
    def copy_dependencies (targets):
        import types
        if not isinstance(targets, types.ListType): targets = [targets]
        for target in targets:
            os.system("/bin/csh -i -c '/bin/cp -R %s %s'" % (os.path.join(conf["path"], target), spec_dir))

    print "copying"
    if spec == "html":
        copy_dependencies(["images", "fonts", "404/*", "switcher", "js"])
    elif spec == "2dcontext":
        copy_dependencies(["images", "fonts"])
    else:
        copy_dependencies("fonts")

    # fix the styling of the 404
    if spec == "html":
        link = tree.xpath("//link[starts-with(@href, 'http://www.w3.org/StyleSheets/TR/')]")[0].get("href")
        path = os.path.join(spec_dir, "404.html")
        with open(path) as data: html404 = data.read()
        html404 = html404.replace("http://www.w3.org/StyleSheets/TR/W3C-ED", link)
        with open(path, "w") as data: data.write(html404)

Example #5

Show file

File: publish.py Project: naltak/html-tools

def main(spec, spec_dir, branch="master"):
    conf = None
    try:
        conf = config.load_config()[spec]
    except KeyError:
        invoked_incorrectly()

    if "select" in conf:
        select = conf["select"]
    else:
        select = spec

    try:
        if not spec_dir:
            if conf.get("bareOutput", False):
                spec_dir = conf["output"]
            else:
                spec_dir = os.path.join(conf["output"], spec)
    except KeyError:
        sys.stderr.write(
            "error: Must specify output directory for %s! \
Check default-config.json.\n"
            % spec
        )
        exit()

    cur_dir = os.path.abspath(os.path.dirname(__file__))
    os.chdir(conf["path"])

    print "parsing"
    source = open("source")
    after_microsyntax = StringIO()
    parser_microsyntax.main(source, after_microsyntax)
    after_microsyntax.seek(0)
    succint = StringIO()
    bs.main(after_microsyntax, succint)

    succint.seek(0)
    filtered = StringIO()
    if spec == "microdata":
        md_content = succint.read()
        md_content = re.sub(
            '<h2 id="iana">IANA considerations</h2>',
            '<!--BOILERPLATE microdata-extra-section--><h2 id="iana">IANA considerations</h2>',
            md_content,
        )
        succint = StringIO()
        succint.write(md_content)
        succint.seek(0)

    try:
        boilerplate.main(succint, filtered, select, branch)
    except IOError:
        sys.stderr.write(
            "error: Problem loading boilerplate for %s. \
Are you on the correct branch?\n"
            % spec
        )
        exit()
    succint.close()

    # See http://hg.gsnedders.com/anolis/file/tip/anolis
    opts = {
        "allow_duplicate_dfns": True,
        "disable": None,
        "escape_lt_in_attrs": False,
        "escape_rcdata": False,
        "force_html4_id": False,
        "indent_char": u" ",
        "inject_meta_charset": False,
        "max_depth": 6,
        "min_depth": 2,
        "minimize_boolean_attributes": False,
        "newline_char": u"\n",
        "omit_optional_tags": False,
        "output_encoding": "utf-8",
        "parser": "html5lib",
        "processes": set(["toc", "xref", "sub"]),
        "profile": False,
        "quote_attr_values": True,
        "serializer": "html5lib",
        "space_before_trailing_solidus": False,
        "strip_whitespace": None,
        "use_best_quote_char": False,
        "use_trailing_solidus": False,
        "w3c_compat_class_toc": False,
        "w3c_compat_crazy_substitutions": False,
        "w3c_compat_substitutions": False,
        "w3c_compat": True,
        "w3c_compat_xref_a_placement": False,
        "w3c_compat_xref_elements": False,
        "w3c_compat_xref_normalization": False,
    }
    if "anolis" in conf:
        opts.update(conf["anolis"])

    if spec == "srcset":
        print "munging (before anolis)"

        filtered.seek(0)
        pre_anolis_buffer = StringIO()

        # Parse
        parser = html5lib.html5parser.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("lxml"))
        tree = parser.parse(filtered, encoding="utf-8")

        # Move introduction above conformance requirements
        introduction = tree.findall("//*[@id='introduction']")[0]
        intro_ps = introduction.xpath("following-sibling::*")
        target = tree.findall("//*[@id='conformance-requirements']")[0]
        target.addprevious(introduction)
        target = introduction
        target.addnext(intro_ps[2])
        target.addnext(intro_ps[1])
        target.addnext(intro_ps[0])

        # Serialize
        tokens = html5lib.treewalkers.getTreeWalker("lxml")(tree)
        serializer = html5lib.serializer.HTMLSerializer(quote_attr_values=True, inject_meta_charset=False)
        for text in serializer.serialize(tokens, encoding="utf-8"):
            pre_anolis_buffer.write(text)

        filtered = pre_anolis_buffer

    # replace data-x with data-anolis-xref
    print "fixing xrefs"
    filtered.seek(0)

    # Parse
    builder = treebuilders.getTreeBuilder("lxml", etree)
    try:
        parser = html5lib.HTMLParser(tree=builder, namespaceHTMLElements=False)
    except TypeError:
        parser = html5lib.HTMLParser(tree=builder)
    tree = parser.parse(filtered, encoding="utf-8")

    # Move introduction above conformance requirements
    data_x = tree.findall("//*[@data-x]")
    non_alphanumeric_spaces = re.compile(r"[^a-zA-Z0-9 \-\_\/\|]+")
    for refel in data_x:
        refel.attrib["data-anolis-xref"] = refel.get("data-x")
        if refel.tag == "dfn" and not refel.get("id", False) and refel.attrib["data-anolis-xref"]:
            refel.attrib["id"] = generateID(refel.attrib["data-anolis-xref"], refel)
        del refel.attrib["data-x"]
    # utils.ids = {}

    print "indexing"
    # filtered.seek(0)
    # tree = generator.fromFile(filtered, **opts)
    generator.process(tree, **opts)
    filtered.close()

    # fixup nested dd's and dt's produced by lxml
    for dd in tree.findall("//dd/dd"):
        if list(dd) or dd.text.strip():
            dd.getparent().addnext(dd)
        else:
            dd.getparent().remove(dd)
    for dt in tree.findall("//dt/dt"):
        if list(dt) or dt.text.strip():
            dt.getparent().addnext(dt)
        else:
            dt.getparent().remove(dt)

    # remove unused references
    print "processing references"
    for dt in tree.findall("//dt[@id]"):
        refID = dt.get("id")
        if refID.startswith("refs") and len(tree.findall("//a[@href='#%s']" % refID)) == 0:
            next = dt.getnext()
            while next.tag != "dd":
                next = next.getnext()
            dt.getparent().remove(next)
            dt.getparent().remove(dt)
        elif refID.startswith("refs"):
            dd = dt.getnext()
            while dd.tag != "dd":
                dd = dd.getnext()
            links = dd.findall(".//a[@href]")
            for link in links:
                if link is not None:
                    wrap = link.getparent()
                    link.tail = " (URL: "
                    idx = wrap.index(link)
                    url = etree.Element("a", href=link.get("href"))
                    url.text = link.get("href")
                    wrap.insert(idx + 1, url)
                    url.tail = ")"

    if spec == "microdata":
        print "munging (after anolis)"
        # get the h3 for the misplaced section (it has no container)
        section = tree.xpath("//h3[@id = 'htmlpropertiescollection']")[0]
        # then get all of its following siblings that have the h2 for the next section as
        # a following sibling themselves. Yeah, XPath doesn't suck.
        section_content = section.xpath("following-sibling::*[following-sibling::h2[@id='introduction']]")
        target = tree.xpath("//h2[@id = 'converting-html-to-other-formats']")[0].getparent()
        target.addprevious(section)
        for el in section_content:
            target.addprevious(el)
        section.xpath("span")[0].text = "6.1 "
        # move the toc as well
        link = tree.xpath("//ol[@class='toc']//a[@href='#htmlpropertiescollection']")[0]
        link.xpath("span")[0].text = "6.1 "
        tree.xpath("//ol[@class='toc']/li[a[@href='#microdata-dom-api']]")[0].append(link.getparent().getparent())

    if spec == "srcset":
        print "munging (after anolis)"
        # In the WHATWG spec, srcset="" is simply an aspect of
        # HTMLImageElement and not a separate feature. In order to keep
        # the HTML WG's srcset="" spec organized, we have to move some
        # things around in the final document.

        # Move "The srcset IDL attribute must reflect..."
        reflect_the_content_attribute = tree.findall("//div[@class='impl']")[0]
        target = tree.find("//div[@class='note']")
        target.addprevious(reflect_the_content_attribute)

        # Move "The IDL attribute complete must return true..."
        note_about_complete = tree.findall("//p[@class='note']")[4]
        p_otherwise = note_about_complete.xpath("preceding-sibling::p[position()=1]")[0]
        ul_conditions = p_otherwise.xpath("preceding-sibling::ul[position()=1]")[0]
        p_start = ul_conditions.xpath("preceding-sibling::p[position()=1]")[0]
        target.addnext(note_about_complete)
        target.addnext(p_otherwise)
        target.addnext(ul_conditions)
        target.addnext(p_start)

    try:
        os.makedirs(spec_dir)
    except:
        pass

    if spec == "html":
        print "cleaning"
        from glob import glob

        for name in glob("%s/*.html" % spec_dir):
            os.remove(name)

        output = StringIO()
    else:
        output = open("%s/Overview.html" % spec_dir, "wb")

    generator.toFile(tree, output, **opts)

    if spec != "html":
        output.close()
    else:
        value = output.getvalue()
        if "<!--INTERFACES-->\n" in value:
            print "interfaces"
            from interface_index import interface_index

            output.seek(0)
            index = StringIO()
            interface_index(output, index)
            value = value.replace("<!--INTERFACES-->\n", index.getvalue(), 1)
            index.close()
        output = open("%s/single-page.html" % spec_dir, "wb")
        output.write(value)
        output.close()
        value = ""

        print "splitting"
        import spec_splitter

        spec_splitter.w3c = True
        spec_splitter.no_split_exceptions = conf.get("no_split_exceptions", False)
        spec_splitter.minimal_split_exceptions = conf.get("minimal_split_exceptions", False)
        spec_splitter.main("%s/single-page.html" % spec_dir, spec_dir)

        print "entities"
        entities = open(os.path.join(cur_dir, "boilerplate/entities.inc"))
        json = open("%s/entities.json" % spec_dir, "w")
        from entity_processor_json import entity_processor_json

        entity_processor_json(entities, json)
        entities.close()
        json.close()

    # copying dependencies
    def copy_dependencies(targets):
        import types

        if not isinstance(targets, types.ListType):
            targets = [targets]
        if os.name == "nt":
            for target in targets:
                os.system("xcopy /s %s %s" % (os.path.join(conf["path"], target), spec_dir))
        else:
            for target in targets:
                os.system("/bin/csh -i -c '/bin/cp -R %s %s'" % (os.path.join(conf["path"], target), spec_dir))

    print "copying"
    if spec == "html":
        if os.name == "nt":
            dirs = ["images", "fonts", "404", "switcher", "js"]
        else:
            dirs = ["images", "fonts", "404/*", "switcher", "js"]
        copy_dependencies(dirs)
    elif spec == "2dcontext":
        copy_dependencies(["images", "fonts"])
    else:
        copy_dependencies("fonts")

    # fix the styling of the 404
    if spec == "html":
        link = tree.xpath("//link[starts-with(@href, 'http://www.w3.org/StyleSheets/TR/')]")[0].get("href")
        path = os.path.join(spec_dir, "404.html")
        with open(path) as data:
            html404 = data.read()
        html404 = html404.replace("http://www.w3.org/StyleSheets/TR/W3C-ED", link)
        with open(path, "w") as data:
            data.write(html404)