def parse_product(self, response): product = Product() product['url'] = response.url # NAME name = xtract(response, "//h1/text()") add_field(product, 'name', name) # ITEM NUM item_num = xtract(response, xcontains('span', 'style-number')) add_field(product, 'item_num', item_num, lambda x: x.split('#')[-1].strip()) # PRICE price = xtract(response, xcontains('span', 'price-current')) if len(price) == 0: price = xtract(response, xcontains('span', 'regular-price')) add_field(product, 'price', price, to_float) data_script = response.xpath("//script[contains(., 'initialData')]/text()").extract() if len(data_script) > 0: product['initial_data'] = transform_initial_data(data_script[0]) yield product
def test_xcontains(self): val = xcontains('span', 'a-class') assert val == "//span[contains(@class, 'a-class')]/text()" val = xcontains('a', 'ddd', 'href()') assert val == "//a[contains(@class, 'ddd')]/href()"