def parse_wizard_image(self, element, block_xpath, sample): wizard = Component() wizard.type = "WIZARD" wizard.wizard_type = "WIZARD_IMAGE" wizard.alignment = "LEFT" block_xpath = self.extract_xpath(block_xpath) inner_xpath = self.extract_xpath(sample.media_links[0].xpath) for img in sample.media_links: inner_xpath = self.great_common_prefix( inner_xpath, self.extract_xpath(img.xpath)) inner_xpath = self.combine_xpath(inner_xpath[len(block_xpath):], True) wizard.media_links = list() img_list = element.xpath(inner_xpath) for img in img_list: wizard.media_links.append( self.get_attr(img, sample.media_links[0].attr)) page_url_xpath = self.extract_xpath( sample.page_url.xpath)[len(block_xpath):] wizard.page_url = self.get_attr( element.xpath(self.combine_xpath(page_url_xpath, True)), sample.page_url.attr) title_xpath = self.extract_xpath(sample.title.xpath)[len(block_xpath):] wizard.title = self.get_attr( element.xpath(self.combine_xpath(title_xpath, True)), sample.title.attr) return wizard
def parse_wizard_image(self, element): wizard = Component() wizard.type = "WIZARD" wizard.wizard_type = "WIZARD_IMAGE" wizard.alignment = "LEFT" img_list = element.xpath("./div/a/img") wizard.media_links = list() for img in img_list: wizard.media_links.append(self.get_from_page(img, ".", "src")) wizard.page_url = self.get_from_page(element, "./h3/a", "href") wizard.title = self.get_from_page(element, "./h3/a", "string") return wizard