コード例 #1
0
ファイル: PD_spider.py プロジェクト: akurtovic/STL-Crimelog
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath('//ul[@class="bull-list"]/li').extract()
 
        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath('//ul[@class="bull-list"]/li/a/text()').extract()[story]
            item['url'] = "http://www.stltoday.com" + sel.xpath('//ul[@class="bull-list"]/li/a//@href').extract()[story]
            item['source'] = "Post-Dispatch"
            item['added'] = datetime.now()
            item.save()
コード例 #2
0
 def parse(self, response):
     sel = Selector(response)
     stories = sel.xpath('//div[@class="hero-story"]').extract()
     
     for story in range(len(stories)):
         item = Story()
         item['headline'] = sel.xpath('//div[@class="hero-story"]/h1/a/text()').extract()[story]
         item['url'] = "http://www.ksdk.com" + sel.xpath('//div[@class="hero-story"]/h1/a/@href').extract()[story]
         item['source'] = "KSDK"
         item['added'] = datetime.now()
         item.save()
コード例 #3
0
ファイル: KMOX_spider.py プロジェクト: akurtovic/STL-Crimelog
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath('//div[@class="feature "]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath('//div[@class="feature "]/div/h4/a/text()').extract()[story]
            item['url'] = sel.xpath('//div[@class="feature "]/div/h4/a/@href').extract()[story]
            item['source'] = "KMOX"
            item['added'] = datetime.now()
            item.save()
コード例 #4
0
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath('//h2[@class="entryHeadline"]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath('//h2[@class="entryHeadline"]/a/text()').extract()[story]
            item['url'] = sel.xpath('//h2[@class="entryHeadline"]/a/@href').extract()[story]
            item['source'] = "Riverfront Times"
            item['added'] = datetime.now()
            item.save()
コード例 #5
0
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath('//ul[@class="slides"]/li/a[@class="node"]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath('//ul[@class="slides"]/li/a[@class="node"]/div/p[@class="title"]/text()').extract()[story]
            item['url'] = sel.xpath('//ul[@class="slides"]/li/a[@class="node"]/@href').extract()[story]
            item['source'] = "KMOX"
            item['added'] = datetime.now()

            item.save()
コード例 #6
0
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath('//ul[@class="bull-list"]/li').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath(
                '//ul[@class="bull-list"]/li/a/text()').extract()[story]
            item['url'] = "http://www.stltoday.com" + sel.xpath(
                '//ul[@class="bull-list"]/li/a//@href').extract()[story]
            item['source'] = "Post-Dispatch"
            item['added'] = datetime.now()
            item.save()
コード例 #7
0
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath('//div[@class="feature "]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath(
                '//div[@class="feature "]/div/h4/a/text()').extract()[story]
            item['url'] = sel.xpath(
                '//div[@class="feature "]/div/h4/a/@href').extract()[story]
            item['source'] = "KMOX"
            item['added'] = datetime.now()
            item.save()
コード例 #8
0
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath('//h2[@class="entryHeadline"]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath(
                '//h2[@class="entryHeadline"]/a/text()').extract()[story]
            item['url'] = sel.xpath(
                '//h2[@class="entryHeadline"]/a/@href').extract()[story]
            item['source'] = "Riverfront Times"
            item['added'] = datetime.now()
            item.save()
コード例 #9
0
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath('//div[@class="hero-story"]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath(
                '//div[@class="hero-story"]/h1/a/text()').extract()[story]
            item['url'] = "http://www.ksdk.com" + sel.xpath(
                '//div[@class="hero-story"]/h1/a/@href').extract()[story]
            item['source'] = "KSDK"
            item['added'] = datetime.now()
            item.save()
コード例 #10
0
ファイル: KMOV_spider.py プロジェクト: akurtovic/STL-Crimelog
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath('//h3[@class="entry-title"]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath('//h3[@class="entry-title"]/a/text()').extract()[story]
            item['url'] = sel.xpath('//h3/a[@class="storyLink"]/@href').extract()[story]
            if "http" not in item['url']:
                pass
            else:
                item['source'] = "KMOV"
                item['added'] = datetime.now()
                item.save()
コード例 #11
0
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath('//h3[@class="entry-title"]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath(
                '//h3[@class="entry-title"]/a/text()').extract()[story]
            item['url'] = sel.xpath(
                '//h3/a[@class="storyLink"]/@href').extract()[story]
            if "http" not in item['url']:
                pass
            else:
                item['source'] = "KMOV"
                item['added'] = datetime.now()
                item.save()
コード例 #12
0
ファイル: BND_spider.py プロジェクト: akurtovic/STL-Crimelog
    def parse(self, response):
        ignored = ['CRIME', 'TOP STORIES', 'EDUCATION', 'Neighborhood watch', 'CRIME BLOTTER']
        sel = Selector(response)
        stories = sel.xpath('//div[@class="article_text"]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath('//div[@class="article_text"]/h3/a/text()').extract()[story]
            item['url'] = "http://www.bnd.com/" + sel.xpath('//div[@class="article_text"]/h3/a/@href').extract()[story]
            item['source'] = "Belleville News-Democrat"
            item['added'] = datetime.now()
            x = item['headline']
            
            if x == 'CRIME' or x == 'TOP STORIES' or x == 'EDUCATION' or x == 'CRIME BLOTTER' or 'Blotter' in x or x == 'Neighborhood watch':
                pass
            else:
                item.save()
コード例 #13
0
    def parse(self, response):
        sel = Selector(response)
        stories = sel.xpath(
            '//ul[@class="slides"]/li/a[@class="node"]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath(
                '//ul[@class="slides"]/li/a[@class="node"]/div/p[@class="title"]/text()'
            ).extract()[story]
            item['url'] = sel.xpath(
                '//ul[@class="slides"]/li/a[@class="node"]/@href').extract(
                )[story]
            item['source'] = "KMOX"
            item['added'] = datetime.now()

            item.save()
コード例 #14
0
ファイル: BND_spider.py プロジェクト: jamesfebin/STL-Crimelog
    def parse(self, response):
        ignored = [
            'CRIME', 'TOP STORIES', 'EDUCATION', 'Neighborhood watch',
            'CRIME BLOTTER'
        ]
        sel = Selector(response)
        stories = sel.xpath('//div[@class="article_text"]').extract()

        for story in range(len(stories)):
            item = Story()
            item['headline'] = sel.xpath(
                '//div[@class="article_text"]/h3/a/text()').extract()[story]
            item['url'] = "http://www.bnd.com/" + sel.xpath(
                '//div[@class="article_text"]/h3/a/@href').extract()[story]
            item['source'] = "Belleville News-Democrat"
            item['added'] = datetime.now()
            x = item['headline']

            if x == 'CRIME' or x == 'TOP STORIES' or x == 'EDUCATION' or x == 'CRIME BLOTTER' or 'Blotter' in x or x == 'Neighborhood watch':
                pass
            else:
                item.save()