예제 #1
0
    def parse(self, response):
        scrapedData = Selector(response).css('a.articleTitleListSmall')

        for data in scrapedData:
            item = DataItem()
            item['title'] = data.css('a.articleTitleListSmall::text').get(),
            item['source'] = 'IslamJesus.ws - Article - Abu Iyyad',
            item['url'] = data.css('a.articleTitleListSmall::attr(href)').get()
            yield item
예제 #2
0
    def parse(self, response):
        scrapedData = Selector(response).css('a.articleLinkOrange')

        for data in scrapedData:
            item = DataItem()
            item['title'] = data.css('a.articleLinkOrange::text').get(),
            item['source'] = 'HealthyMuslim.com - Article - Abu Iyyad',
            item['url'] = data.css('a.articleLinkOrange::attr(href)').get()
            yield item
예제 #3
0
    def parse(self, response):
        items = DataItem()
        items['title'] = response.xpath(
            '//h3[@class="info-name"]/a[1]/@title').extract()
        items['precio'] = response.xpath(
            '//div[@class="product-price "]/span[1]/text()').extract()
        save(items)

        yield items
예제 #4
0
    def parse(self, response):
        scrapedData = Selector(response).css('a.uk-link-reset')

        for data in scrapedData:
            item = DataItem()
            item['title'] = data.css('a.uk-link-reset::text').get(),
            item['source'] = 'Salafi Publications - Article',
            item['url'] = "https://www.salafipubs.com/" + \
                data.css('a.uk-link-reset::attr(href)').get()
            yield item
예제 #5
0
    def parse(self, response):
        scrapedData = Selector(response).css('h3.mh-posts-list-title')

        for data in scrapedData:
            item = DataItem()
            item['title'] = data.css(
                'h3.mh-posts-list-title > a::attr(title)').get(),
            item['source'] = 'Salafi Sounds - Audio',
            item['url'] = data.css(
                'h3.mh-posts-list-title > a::attr(href)').get()
            yield item
예제 #6
0
 def parse_datasets(self, response):
     xpath = "//td[1]/p[1]/span[2]/a[1]/@href"
     subSelector = response.xpath(xpath)
     preurl = "http://archive.ics.uci.edu/ml"
     for sub in subSelector:
         item = sub.get()
         url = preurl + item.replace("..", "")
         ditem = DataItem()
         ditem["tmpurl"] = url
         yield scrapy.Request(url,
                              meta={"dataitem": ditem},
                              callback=self.parse_downloads)
예제 #7
0
 def parse(self, response):
     for rep in response.css('li.public'):
         item = DataItem()
         item['name'] = rep.xpath(
             './/a[@itemprop="name codeRepository"]/text()').re_first(
                 r'\n\s*(.*)')
         item['update_time'] = rep.xpath(
             './/relative-time/@datetime').extract_first()
         data_url = response.urljoin(
             rep.xpath('.//h3/a/@href').extract_first())
         request = scrapy.Request(data_url, callback=self.parse_data)
         request.meta['item'] = item
         yield request
예제 #8
0
    def parse(self, response):
        content = '//div[@class="content"]/article'
        items = []
        for con in response.xpath(content):
            item = DataItem()
            item['module'] = response.xpath(
                '//div[@class="content"]/h1/strong/a/text()').extract()
            item['title'] = con.xpath('h2/a/text()').extract()
            item['note'] = con.xpath('p[@class="note"]/text()').extract()
            item['more'] = con.xpath('p[@class="more"]/a/@href').extract()
            items.append(item)
            yield scrapy.http.Request(item['more'][0],
                                      meta={'item': item},
                                      callback=self.parse2)

            nextPage = response.xpath(
                '//li[@class="next-page"]/a/@href').extract()
            if nextPage:
                next = nextPage[0]
                yield scrapy.http.Request(next, callback=self.parse)