def load_answers(self, page): if not self.valid_url(page.url): return paccepted = '//div[@class="answer accepted-answer"]' panswers = '//div[@class="answer"]' panswer = '/table/tr/td[@class="answercell"]/div[@class="post-text"]' xaccepted = engine.Path(paccepted + panswer, link=False) xanswers = engine.Path(panswers + panswer, link=False) results = engine.search(page, [xaccepted, xanswers]) self.results = None raccepted = results[0] ranswers = results[1] answers = [] accepted = None if len(raccepted) > 0: accepted = etree.tostring(raccepted[0], method="text", encoding="UTF-8") for answer in ranswers: answers.append( etree.tostring(answer, method="text", encoding="UTF-8")) self.results = Answers("unimplemented", accepted, answers)
def load_results(self, page): xtitle = engine.Path('//h3[@class="r"]/a/text()',link=False) xlink = engine.Path('//h3[@class="r"]/a/@href') results = engine.search(page, [xtitle,xlink]) self.titles = [] self.links = [] for t,l in zip(results[0], results[1]): self.titles.append(t) self.links.append(l)
def __init__(self, query): self.base = "https://www.bing.com/images/search" self.query = query self.images = [] self.page = None self.path = engine.Path('//li/div/div[@class="imgpt"]', children=[ engine.Path('./a/@m', parse=lambda x: loads(x)['murl']) ], )
def load_wiki(self, page): if not self.valid_wiki(page.url): return pwiki = '//div[@class="mw-parser-output"]/*' xwiki = engine.Path(pwiki, link=False) results = engine.search(page, [xwiki]) if len(results) == 0: return rwiki = results[0] wiki = "" for child in rwiki: if self.header.match(child.tag): wiki += "\n" wiki += "-" * 80 wiki += "\n" wiki += etree.tostring(child, method="text", encoding="UTF-8") wiki += "-" * 80 wiki += "\n" if child.tag.decode("ascii", "ignore") == "p": wiki += etree.tostring(child, method="text", encoding="UTF-8") self.wiki = wiki
def load_results(self, page): xsection = engine.Path('//div[@class="section"]',link=False) results = engine.search(page, xsection) if len(results) != 1: return self.instruction = etree.tostring(results[0],method="text",encoding="UTF-8")
def load_search(self, page): if not self.valid_search(page.url): return presult = '//div[@class="mw-search-result-heading"]/a' xresult = engine.Path(presult, link=False) results = engine.search(page, xresult) for result in results: print "UNIMPLEMENTED, ADD SUBTREE TO ENGINE" print result.attrib['href'], result.attrib['title']
def load_questions(self, page): if not self.valid_search(page.url): return psummary = '//div[@class="summary"]/div[@class="result-link"]' xsummary = engine.Path(psummary, link=False) plink = "./span/a/@href" ptitle = "./span/a/text()" ptext = "./span" xtitle = engine.Path(ptitle, link=False) xlink = engine.Path(plink, link=True) results = engine.search(page, xsummary) if len(results) == 0: writeln("[-] no results found") return if not isinstance(self.results, Questions): self.results = Questions("ERR", []) for summary in results: page = engine.Page(page.url, tree=summary) link = engine.search(page, xlink) title = engine.search(page, xtitle) if len(link) == 0 or len(title) == 0: continue link = link[0].strip() title = title[0].strip()[3:] self.results.questions.append(Question(title, link))
def do_next(self, args): if not self.page: writeln("[-] no results loaded!") return xpath = engine.Path('//a[@id="pnnext"]/@href') n = engine.search(self.page, xpath) if not n: writeln("[-] no more results!") return self.page = engine.Page(n[0], load=True) self.lpr(self.page)