def find_entry(self, path, match): """Looks for a particular entry in the etree. Returns the element looked for/None. """ xp = path.xpath(self.xpath) f = filter(lambda x: x.text == match, xp) return f[0].text if len(f) else None
def get_all_entry(self, path): """All entries in the etree is converted to the dictionary Returns the list of dictionary/didctionary. """ xps = path.xpath(self.xpath) if type(xps) is not list: return self._get_one(xps) val = [] for xp in xps: val.append(self._get_one(xp)) return val
def book_pare(self, response): item = BookSiteMainItem() for path in response.xpath( '//*[@id="content_inner"]/article/div[1]/div[2]'): item['full_bookname'] = path.xpath( '//*[@id="content_inner"]/article/div[1]/div[2]/h1/text()' ).get() item['priceinpounds'] = path.xpath( '//*[@id="content_inner"]/article/div[1]/div[2]/p[1]/text()' ).get() item['rating'] = path.xpath( '//*[@id="content_inner"]/article/div[1]/div[2]/p[3]/@class' ).get() item['upc'] = path.xpath( '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td' )[0].get().split()[0] item['producttype'] = path.xpath( '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td' )[1].get().split()[0] item['priceexcludetax'] = path.xpath( '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td' )[2].get().split()[0] item['priceincludetax'] = path.xpath( '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td' )[3].get().split()[0] item['tax'] = path.xpath( '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td' )[4].get().split()[0] item['availability'] = path.xpath( '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td' )[5].get().split()[0:2] item['numberavailable'] = path.xpath( '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td' )[5].get().split()[2:] item['numberofreviews'] = path.xpath( '//*[@id="content_inner"]/article/table[@class="table table-striped"]//tr/td' )[6].get().split()[0] yield item