def parse(self, response): scrapedData = Selector(response).css('a.articleTitleListSmall') for data in scrapedData: item = DataItem() item['title'] = data.css('a.articleTitleListSmall::text').get(), item['source'] = 'IslamJesus.ws - Article - Abu Iyyad', item['url'] = data.css('a.articleTitleListSmall::attr(href)').get() yield item
def parse(self, response): scrapedData = Selector(response).css('a.articleLinkOrange') for data in scrapedData: item = DataItem() item['title'] = data.css('a.articleLinkOrange::text').get(), item['source'] = 'HealthyMuslim.com - Article - Abu Iyyad', item['url'] = data.css('a.articleLinkOrange::attr(href)').get() yield item
def parse(self, response): items = DataItem() items['title'] = response.xpath( '//h3[@class="info-name"]/a[1]/@title').extract() items['precio'] = response.xpath( '//div[@class="product-price "]/span[1]/text()').extract() save(items) yield items
def parse(self, response): scrapedData = Selector(response).css('a.uk-link-reset') for data in scrapedData: item = DataItem() item['title'] = data.css('a.uk-link-reset::text').get(), item['source'] = 'Salafi Publications - Article', item['url'] = "https://www.salafipubs.com/" + \ data.css('a.uk-link-reset::attr(href)').get() yield item
def parse(self, response): scrapedData = Selector(response).css('h3.mh-posts-list-title') for data in scrapedData: item = DataItem() item['title'] = data.css( 'h3.mh-posts-list-title > a::attr(title)').get(), item['source'] = 'Salafi Sounds - Audio', item['url'] = data.css( 'h3.mh-posts-list-title > a::attr(href)').get() yield item
def parse_datasets(self, response): xpath = "//td[1]/p[1]/span[2]/a[1]/@href" subSelector = response.xpath(xpath) preurl = "http://archive.ics.uci.edu/ml" for sub in subSelector: item = sub.get() url = preurl + item.replace("..", "") ditem = DataItem() ditem["tmpurl"] = url yield scrapy.Request(url, meta={"dataitem": ditem}, callback=self.parse_downloads)
def parse(self, response): for rep in response.css('li.public'): item = DataItem() item['name'] = rep.xpath( './/a[@itemprop="name codeRepository"]/text()').re_first( r'\n\s*(.*)') item['update_time'] = rep.xpath( './/relative-time/@datetime').extract_first() data_url = response.urljoin( rep.xpath('.//h3/a/@href').extract_first()) request = scrapy.Request(data_url, callback=self.parse_data) request.meta['item'] = item yield request
def parse(self, response): content = '//div[@class="content"]/article' items = [] for con in response.xpath(content): item = DataItem() item['module'] = response.xpath( '//div[@class="content"]/h1/strong/a/text()').extract() item['title'] = con.xpath('h2/a/text()').extract() item['note'] = con.xpath('p[@class="note"]/text()').extract() item['more'] = con.xpath('p[@class="more"]/a/@href').extract() items.append(item) yield scrapy.http.Request(item['more'][0], meta={'item': item}, callback=self.parse2) nextPage = response.xpath( '//li[@class="next-page"]/a/@href').extract() if nextPage: next = nextPage[0] yield scrapy.http.Request(next, callback=self.parse)