def testProcessRss(self): source = NewsSource(name="name", feed="feed") source.save() feedContents = self.testData("readernews.xml") saveStories(feedContents, source) print len(WebStory.objects)
def createDB(): print "Creating the initial db" Company.create_table() NewsSource.create_table() News.create_table() Price.create_table() OpinionAPI.create_table() OpinionAPIResponse.create_table()
def testParseEntry(self): "should turn an entry into a WebStory by fetching" source = NewsSource() entries = getEntries(self.testData()) story = processEntry(entries[0], source) self.assertTrue(hasattr(story, "fullText")) link = "http://telegraph.feedsportal.com/c/32726/f/564430/s/139f1592/l/0L0Stelegraph0O0Cculture0Ctvandradio0Cbbc0C840A610A70CBBC0Eaxes0EMy0EFamily0Esitcom0Eafter0Emore0Ethan0E10A0A0Eepisodes0Bhtml/story01.htm" self.assertEqual(link, story.link) date = datetime.strptime("2011-03-25T10:57:35Z", D_FMT) self.assertEqual(date, story.date) title = 'BBC axes My Family sitcom after more than 100 episodes' self.assertEqual(title, story.title)
def newSource(request): source = NewsSource() source.title = request.POST["name"] source.description = request.POST["description"] source.url = request.POST["url"] source.save() return HttpResponseRedirect("/source/" + str(source.id))
def main(): #Select companies that don't have any articles companies = Company.raw( """ Select * FROM company WHERE id not in ( SELECT company_id from news ) """ ) ns = NewsSource.select().where(NewsSource.name=='Reuters').get() for c in companies: print "Finding stories about "+c.name firstURL = baseURL+'/search?blob="'+c.name.replace(' ','+')+'"' readSearchPage(c,ns,firstURL,1) print 'DONE!'
def addNewsSources(): print "Adding News Sources" NewsSource.create(name="Reuters", url="http://www.reuters.com")