def test_nested_replace(self): loader = ItemLoader(selector=self.selector) nl1 = loader.nested_xpath('//footer') nl2 = nl1.nested_xpath('a') loader.add_xpath('url', '//footer/a/@href') self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org']) nl1.replace_xpath('url', 'img/@src') self.assertEqual(loader.get_output_value('url'), ['/images/logo.png']) nl2.replace_xpath('url', '@href') self.assertEqual(loader.get_output_value('url'), ['http://www.scrapy.org'])
def test_nested_ordering(self): loader = ItemLoader(selector=self.selector) nl1 = loader.nested_xpath('//footer') nl2 = nl1.nested_xpath('a') nl1.add_xpath('url', 'img/@src') loader.add_xpath('url', '//footer/a/@href') nl2.add_xpath('url', 'text()') loader.add_xpath('url', '//footer/a/@href') self.assertEqual(loader.get_output_value('url'), [ '/images/logo.png', 'http://www.scrapy.org', 'homepage', 'http://www.scrapy.org', ])
def test_nested_load_item(self): loader = ItemLoader(selector=self.selector) nl1 = loader.nested_xpath('//footer') nl2 = nl1.nested_xpath('img') loader.add_xpath('name', '//header/div/text()') nl1.add_xpath('url', 'a/@href') nl2.add_xpath('image', '@src') item = loader.load_item() assert item is loader.item assert item is nl1.item assert item is nl2.item self.assertEqual(item['name'], ['marta']) self.assertEqual(item['url'], ['http://www.scrapy.org']) self.assertEqual(item['image'], ['/images/logo.png'])
def __callback(self, response: HtmlResponse): item = LeroymerlinItem() loader = ItemLoader(item=item, selector=response) loader.add_xpath(Fields.name, "//h1/text()") loader.add_value(Fields.link, response.url) loader.add_xpath(Fields.article_number, "//span[@slot='article']/text()") loader.add_xpath(Fields.price, "//uc-pdp-price-view/span/text()") loader.add_xpath(Fields.image_links, "//uc-pdp-media-carousel//img/@src") loader.add_value(Fields.image_paths, []) loader.add_value(Fields.category, self.__category) # 2)Написать универсальный обработчик характеристик товаров, который будет формировать данные вне зависимости от их типа и количества. loader.add_xpath( Fields.details, "//dl[@class='def-list']//dt/text() | //dl[@class='def-list']//dd/text()" ) yield loader.load_item()
def parse_application(self, response): loader = ItemLoader(item=TraidingSsgeItem(), selector=response) loader._add_value('statement_date', response.request.meta['appdate']) loader.add_xpath('new_or_used', "normalize-space(//div[@class='condition']/text())") loader.add_xpath('location', "//div[@class='location-time']/div[2]/p/span/text()") loader.add_xpath( 'last_updated', 'normalize-space(//div[@class="location-time"]/div[2]/descendant::span[2]/text())' ) loader.add_xpath('product', "normalize-space(//h2[@class='main-title']/text())") loader.add_xpath( 'price', "normalize-space(//div[@class='market-item-price ']/text())") loader.add_xpath( 'currency_symbol', "normalize-space(//div[@class='market-item-price ']/span/text())") loader.add_xpath( 'applicant', "normalize-space(//div[@class='author_type']/text())") loader.add_value('current_app_url', response.url) loader.add_xpath( 'all_apps_url', "//div[@class='author_type']/descendant::span/a/@href") loader.add_xpath( 'agent_or_person', "normalize-space((//div[@class='author_type'])[1]/span/a/text())") loader.add_xpath( 'number_of_apps', "normalize-space(//div[@class='author_type']/descendant::span[2]/text())" ) loader.add_xpath( 'product_description', "normalize-space(//span[@class='details_text']/text())") loader.add_xpath( 'product_specification', "normalize-space(//div[@class='jobs_details']/span/text())") loader.add_xpath( 'product_condition_description', "normalize-space(//div[@class='jobs_details'][2]/span[2]/text())") loader.add_xpath( 'seen', "normalize-space(//div[@class='article_views']/span/text())") loader.add_xpath( 'app_id', "normalize-space(//div[@class='market-item-id']/span/text())") loader.add_xpath( 'phone', "normalize-space(//div[@class='numbers-wrap']/a/@href)") print(loader.item) yield loader.load_item()