Esempio n. 1
0
    def load_answers(self, page):

        if not self.valid_url(page.url):
            return

        paccepted = '//div[@class="answer accepted-answer"]'
        panswers = '//div[@class="answer"]'
        panswer = '/table/tr/td[@class="answercell"]/div[@class="post-text"]'

        xaccepted = engine.Path(paccepted + panswer, link=False)
        xanswers = engine.Path(panswers + panswer, link=False)

        results = engine.search(page, [xaccepted, xanswers])

        self.results = None

        raccepted = results[0]
        ranswers = results[1]

        answers = []
        accepted = None

        if len(raccepted) > 0:
            accepted = etree.tostring(raccepted[0],
                                      method="text",
                                      encoding="UTF-8")
        for answer in ranswers:
            answers.append(
                etree.tostring(answer, method="text", encoding="UTF-8"))

        self.results = Answers("unimplemented", accepted, answers)
Esempio n. 2
0
    def load_results(self, page):

        xtitle = engine.Path('//h3[@class="r"]/a/text()',link=False)
        xlink = engine.Path('//h3[@class="r"]/a/@href')
        results = engine.search(page, [xtitle,xlink])

        self.titles = []
        self.links = []

        for t,l in zip(results[0], results[1]):
            self.titles.append(t)
            self.links.append(l)
Esempio n. 3
0
File: bing.py Progetto: krornus/misc
    def __init__(self, query):
        self.base = "https://www.bing.com/images/search"
        self.query = query
        self.images = []

        self.page = None

        self.path = engine.Path('//li/div/div[@class="imgpt"]',
            children=[
                engine.Path('./a/@m', parse=lambda x: loads(x)['murl'])
            ],
        )
Esempio n. 4
0
    def load_wiki(self, page):

        if not self.valid_wiki(page.url):
            return

        pwiki = '//div[@class="mw-parser-output"]/*'
        xwiki = engine.Path(pwiki, link=False)

        results = engine.search(page, [xwiki])

        if len(results) == 0:
            return

        rwiki = results[0]
        wiki = ""

        for child in rwiki:
            if self.header.match(child.tag):
                wiki += "\n"
                wiki += "-" * 80
                wiki += "\n"
                wiki += etree.tostring(child, method="text", encoding="UTF-8")
                wiki += "-" * 80
                wiki += "\n"
            if child.tag.decode("ascii", "ignore") == "p":
                wiki += etree.tostring(child, method="text", encoding="UTF-8")

        self.wiki = wiki
Esempio n. 5
0
    def load_results(self, page):

        xsection = engine.Path('//div[@class="section"]',link=False)
        results = engine.search(page, xsection)

        if len(results) != 1:
            return

        self.instruction = etree.tostring(results[0],method="text",encoding="UTF-8")
Esempio n. 6
0
    def load_search(self, page):

        if not self.valid_search(page.url):
            return

        presult = '//div[@class="mw-search-result-heading"]/a'
        xresult = engine.Path(presult, link=False)

        results = engine.search(page, xresult)

        for result in results:
            print "UNIMPLEMENTED, ADD SUBTREE TO ENGINE"
            print result.attrib['href'], result.attrib['title']
Esempio n. 7
0
    def load_questions(self, page):

        if not self.valid_search(page.url):
            return

        psummary = '//div[@class="summary"]/div[@class="result-link"]'
        xsummary = engine.Path(psummary, link=False)

        plink = "./span/a/@href"
        ptitle = "./span/a/text()"

        ptext = "./span"

        xtitle = engine.Path(ptitle, link=False)
        xlink = engine.Path(plink, link=True)

        results = engine.search(page, xsummary)

        if len(results) == 0:
            writeln("[-] no results found")
            return

        if not isinstance(self.results, Questions):
            self.results = Questions("ERR", [])

        for summary in results:
            page = engine.Page(page.url, tree=summary)

            link = engine.search(page, xlink)
            title = engine.search(page, xtitle)

            if len(link) == 0 or len(title) == 0:
                continue

            link = link[0].strip()
            title = title[0].strip()[3:]

            self.results.questions.append(Question(title, link))
Esempio n. 8
0
    def do_next(self, args):

        if not self.page:
            writeln("[-] no results loaded!")
            return
        xpath = engine.Path('//a[@id="pnnext"]/@href')
        n = engine.search(self.page, xpath)

        if not n:
            writeln("[-] no more results!")
            return

        self.page = engine.Page(n[0], load=True)
        self.lpr(self.page)