def parse(self, response): sel = Selector(response) stories = sel.xpath('//ul[@class="bull-list"]/li').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath('//ul[@class="bull-list"]/li/a/text()').extract()[story] item['url'] = "http://www.stltoday.com" + sel.xpath('//ul[@class="bull-list"]/li/a//@href').extract()[story] item['source'] = "Post-Dispatch" item['added'] = datetime.now() item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath('//div[@class="hero-story"]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath('//div[@class="hero-story"]/h1/a/text()').extract()[story] item['url'] = "http://www.ksdk.com" + sel.xpath('//div[@class="hero-story"]/h1/a/@href').extract()[story] item['source'] = "KSDK" item['added'] = datetime.now() item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath('//div[@class="feature "]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath('//div[@class="feature "]/div/h4/a/text()').extract()[story] item['url'] = sel.xpath('//div[@class="feature "]/div/h4/a/@href').extract()[story] item['source'] = "KMOX" item['added'] = datetime.now() item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath('//h2[@class="entryHeadline"]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath('//h2[@class="entryHeadline"]/a/text()').extract()[story] item['url'] = sel.xpath('//h2[@class="entryHeadline"]/a/@href').extract()[story] item['source'] = "Riverfront Times" item['added'] = datetime.now() item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath('//ul[@class="slides"]/li/a[@class="node"]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath('//ul[@class="slides"]/li/a[@class="node"]/div/p[@class="title"]/text()').extract()[story] item['url'] = sel.xpath('//ul[@class="slides"]/li/a[@class="node"]/@href').extract()[story] item['source'] = "KMOX" item['added'] = datetime.now() item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath('//ul[@class="bull-list"]/li').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath( '//ul[@class="bull-list"]/li/a/text()').extract()[story] item['url'] = "http://www.stltoday.com" + sel.xpath( '//ul[@class="bull-list"]/li/a//@href').extract()[story] item['source'] = "Post-Dispatch" item['added'] = datetime.now() item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath('//div[@class="feature "]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath( '//div[@class="feature "]/div/h4/a/text()').extract()[story] item['url'] = sel.xpath( '//div[@class="feature "]/div/h4/a/@href').extract()[story] item['source'] = "KMOX" item['added'] = datetime.now() item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath('//h2[@class="entryHeadline"]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath( '//h2[@class="entryHeadline"]/a/text()').extract()[story] item['url'] = sel.xpath( '//h2[@class="entryHeadline"]/a/@href').extract()[story] item['source'] = "Riverfront Times" item['added'] = datetime.now() item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath('//div[@class="hero-story"]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath( '//div[@class="hero-story"]/h1/a/text()').extract()[story] item['url'] = "http://www.ksdk.com" + sel.xpath( '//div[@class="hero-story"]/h1/a/@href').extract()[story] item['source'] = "KSDK" item['added'] = datetime.now() item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath('//h3[@class="entry-title"]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath('//h3[@class="entry-title"]/a/text()').extract()[story] item['url'] = sel.xpath('//h3/a[@class="storyLink"]/@href').extract()[story] if "http" not in item['url']: pass else: item['source'] = "KMOV" item['added'] = datetime.now() item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath('//h3[@class="entry-title"]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath( '//h3[@class="entry-title"]/a/text()').extract()[story] item['url'] = sel.xpath( '//h3/a[@class="storyLink"]/@href').extract()[story] if "http" not in item['url']: pass else: item['source'] = "KMOV" item['added'] = datetime.now() item.save()
def parse(self, response): ignored = ['CRIME', 'TOP STORIES', 'EDUCATION', 'Neighborhood watch', 'CRIME BLOTTER'] sel = Selector(response) stories = sel.xpath('//div[@class="article_text"]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath('//div[@class="article_text"]/h3/a/text()').extract()[story] item['url'] = "http://www.bnd.com/" + sel.xpath('//div[@class="article_text"]/h3/a/@href').extract()[story] item['source'] = "Belleville News-Democrat" item['added'] = datetime.now() x = item['headline'] if x == 'CRIME' or x == 'TOP STORIES' or x == 'EDUCATION' or x == 'CRIME BLOTTER' or 'Blotter' in x or x == 'Neighborhood watch': pass else: item.save()
def parse(self, response): sel = Selector(response) stories = sel.xpath( '//ul[@class="slides"]/li/a[@class="node"]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath( '//ul[@class="slides"]/li/a[@class="node"]/div/p[@class="title"]/text()' ).extract()[story] item['url'] = sel.xpath( '//ul[@class="slides"]/li/a[@class="node"]/@href').extract( )[story] item['source'] = "KMOX" item['added'] = datetime.now() item.save()
def parse(self, response): ignored = [ 'CRIME', 'TOP STORIES', 'EDUCATION', 'Neighborhood watch', 'CRIME BLOTTER' ] sel = Selector(response) stories = sel.xpath('//div[@class="article_text"]').extract() for story in range(len(stories)): item = Story() item['headline'] = sel.xpath( '//div[@class="article_text"]/h3/a/text()').extract()[story] item['url'] = "http://www.bnd.com/" + sel.xpath( '//div[@class="article_text"]/h3/a/@href').extract()[story] item['source'] = "Belleville News-Democrat" item['added'] = datetime.now() x = item['headline'] if x == 'CRIME' or x == 'TOP STORIES' or x == 'EDUCATION' or x == 'CRIME BLOTTER' or 'Blotter' in x or x == 'Neighborhood watch': pass else: item.save()