Ejemplo n.º 1
0
    def parse_wizard_image(self, element, block_xpath, sample):
        wizard = Component()
        wizard.type = "WIZARD"
        wizard.wizard_type = "WIZARD_IMAGE"
        wizard.alignment = "LEFT"

        block_xpath = self.extract_xpath(block_xpath)

        inner_xpath = self.extract_xpath(sample.media_links[0].xpath)
        for img in sample.media_links:
            inner_xpath = self.great_common_prefix(
                inner_xpath, self.extract_xpath(img.xpath))
        inner_xpath = self.combine_xpath(inner_xpath[len(block_xpath):], True)

        wizard.media_links = list()
        img_list = element.xpath(inner_xpath)
        for img in img_list:
            wizard.media_links.append(
                self.get_attr(img, sample.media_links[0].attr))

        page_url_xpath = self.extract_xpath(
            sample.page_url.xpath)[len(block_xpath):]
        wizard.page_url = self.get_attr(
            element.xpath(self.combine_xpath(page_url_xpath, True)),
            sample.page_url.attr)

        title_xpath = self.extract_xpath(sample.title.xpath)[len(block_xpath):]
        wizard.title = self.get_attr(
            element.xpath(self.combine_xpath(title_xpath, True)),
            sample.title.attr)
        return wizard
Ejemplo n.º 2
0
 def parse_wizard_image(self, element):
     wizard = Component()
     wizard.type = "WIZARD"
     wizard.wizard_type = "WIZARD_IMAGE"
     wizard.alignment = "LEFT"
     img_list = element.xpath("./div/a/img")
     wizard.media_links = list()
     for img in img_list:
         wizard.media_links.append(self.get_from_page(img, ".", "src"))
     wizard.page_url = self.get_from_page(element, "./h3/a", "href")
     wizard.title = self.get_from_page(element, "./h3/a", "string")
     return wizard