Example #1
0
    def parse_search_result(self, element, block_xpath, sample):
        search_result = Component()
        search_result.type = "SEARCH_RESULT"
        search_result.alignment = "LEFT"

        block_xpath = self.extract_xpath(block_xpath)

        page_url_xpath = self.extract_xpath(
            sample.page_url.xpath)[len(block_xpath):]
        search_result.page_url = self.get_attr(
            element.xpath(self.combine_xpath(page_url_xpath, True)),
            sample.page_url.attr)

        title_xpath = self.extract_xpath(sample.title.xpath)[len(block_xpath):]
        search_result.title = self.get_attr(
            element.xpath(self.combine_xpath(title_xpath, True)),
            sample.title.attr)

        snippet_xpath = self.extract_xpath(
            sample.snippet.xpath)[len(block_xpath):]
        search_result.snippet = self.get_attr(
            element.xpath(self.combine_xpath(snippet_xpath, True)),
            sample.snippet.attr)

        view_url_xpath = self.extract_xpath(
            sample.view_url.xpath)[len(block_xpath):]
        search_result.view_url = self.get_attr(
            element.xpath(self.combine_xpath(view_url_xpath, True)),
            sample.view_url.attr)
        return search_result
 def parse_image(self, element):
     image = Component()
     image.type = "IMAGE"
     image.alignment = "LEFT"
     image.page_url = self.get_from_page(element, "./a", "href")
     image.view_url = self.get_from_page(element, "./cite", "title")
     image.title = self.get_from_page(element, ".", "string")
     return image
 def parse_adv(self, element):
     adv = Component()
     adv.type = "ADV"
     adv.alignment = "LEFT"
     adv.page_url = self.get_from_page(element, "./h3/a", "href")
     adv.title = self.get_from_page(element, "./h3/a", "string")
     adv.snippet = self.get_from_page(element, "./div[2]", "string")
     adv.view_url = self.get_from_page(element, "./div/cite", "string")
     return adv
 def parse_search_result(self, element):
     search_result = Component()
     search_result.type = "SEARCH_RESULT"
     search_result.alignment = "LEFT"
     search_result.page_url = self.get_from_page(element, "./h3/a", "href")
     search_result.title = self.get_from_page(element, "./h3/a", "string")
     search_result.snippet = self.get_from_page(element, "./div/span", "strings")
     search_result.view_url = self.get_from_page(element, "./div/div/cite", "string")
     return search_result
Example #5
0
 def parse_adv(self, element):
     adv = Component()
     adv.type = "ADV"
     adv.alignment = "LEFT"
     adv.page_url = self.get_from_page(element, "./h2/a", "href")
     adv.title = self.get_from_page(element, "./h2/a", "string")
     adv.snippet = self.get_from_page(element, "./div[@class='organic__content-wrapper clearfix']/div[1]", "string")
     adv.view_url = self.get_from_page(element, "./div[@class='organic__subtitle typo typo_type_greenurl']/div[1]/a", "string")
     return adv