def parse_search_result(self, element, block_xpath, sample): search_result = Component() search_result.type = "SEARCH_RESULT" search_result.alignment = "LEFT" block_xpath = self.extract_xpath(block_xpath) page_url_xpath = self.extract_xpath( sample.page_url.xpath)[len(block_xpath):] search_result.page_url = self.get_attr( element.xpath(self.combine_xpath(page_url_xpath, True)), sample.page_url.attr) title_xpath = self.extract_xpath(sample.title.xpath)[len(block_xpath):] search_result.title = self.get_attr( element.xpath(self.combine_xpath(title_xpath, True)), sample.title.attr) snippet_xpath = self.extract_xpath( sample.snippet.xpath)[len(block_xpath):] search_result.snippet = self.get_attr( element.xpath(self.combine_xpath(snippet_xpath, True)), sample.snippet.attr) view_url_xpath = self.extract_xpath( sample.view_url.xpath)[len(block_xpath):] search_result.view_url = self.get_attr( element.xpath(self.combine_xpath(view_url_xpath, True)), sample.view_url.attr) return search_result
def parse_image(self, element): image = Component() image.type = "IMAGE" image.alignment = "LEFT" image.page_url = self.get_from_page(element, "./a", "href") image.view_url = self.get_from_page(element, "./cite", "title") image.title = self.get_from_page(element, ".", "string") return image
def parse_adv(self, element): adv = Component() adv.type = "ADV" adv.alignment = "LEFT" adv.page_url = self.get_from_page(element, "./h3/a", "href") adv.title = self.get_from_page(element, "./h3/a", "string") adv.snippet = self.get_from_page(element, "./div[2]", "string") adv.view_url = self.get_from_page(element, "./div/cite", "string") return adv
def parse_search_result(self, element): search_result = Component() search_result.type = "SEARCH_RESULT" search_result.alignment = "LEFT" search_result.page_url = self.get_from_page(element, "./h3/a", "href") search_result.title = self.get_from_page(element, "./h3/a", "string") search_result.snippet = self.get_from_page(element, "./div/span", "strings") search_result.view_url = self.get_from_page(element, "./div/div/cite", "string") return search_result
def parse_adv(self, element): adv = Component() adv.type = "ADV" adv.alignment = "LEFT" adv.page_url = self.get_from_page(element, "./h2/a", "href") adv.title = self.get_from_page(element, "./h2/a", "string") adv.snippet = self.get_from_page(element, "./div[@class='organic__content-wrapper clearfix']/div[1]", "string") adv.view_url = self.get_from_page(element, "./div[@class='organic__subtitle typo typo_type_greenurl']/div[1]/a", "string") return adv