def testStoriesSince(self): story_id = 88848861 mc = MediaCloud(None, None, True) stories = mc.storiesSince(story_id) self.assertEquals(len(stories), 15) for story in stories: self.assertTrue(int(story["stories_id"]) > story_id)
def testStoriesSinceForReal(self): mc = MediaCloud(self._config.get("api", "user"), self._config.get("api", "pass")) story_id = 88848861 stories = mc.storiesSince(story_id) self.assertEquals(len(stories), mc.DEFAULT_STORY_COUNT) for story in stories: self.assertTrue(int(story["stories_id"]) > story_id) self.assertTrue(int(story["stories_id"]) - story_id <= mc.DEFAULT_STORY_COUNT)
config = ConfigParser.ConfigParser() config.read('mc-client.config') # setup logging logging.basicConfig(filename='mc-realtime.log',level=logging.DEBUG) log = logging.getLogger('mc-realtime') log.info("---------------------------------------------------------------------------") # setup a connection to a local DB db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') ) # setup the mediacloud connection mc = MediaCloud( config.get('api','user'), config.get('api','pass') ) max_story_id = db.getMaxStoryId() results = mc.storiesSince( max_story_id, STORIES_TO_FETCH ) log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")") # set up my callback function that adds word count to the story pub.subscribe(mediacloud.examples.addWordCountToStory, StoryDatabase.EVENT_PRE_STORY_SAVE) # set up my callback function that adds the language guess to the story pub.subscribe(mediacloud.examples.addIsEnglishToStory, StoryDatabase.EVENT_PRE_STORY_SAVE) # set up my callback function that adds the reading grade level to the story pub.subscribe(mediacloud.examples.addFleshKincaidGradeLevelToStory, StoryDatabase.EVENT_PRE_STORY_SAVE) # save all the stories in the db saved = 0 for story in results: worked = db.addStory(story)
# setup a connection to a local DB of articles #db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') ) articles_db = StoryDatabase('articles', config.get('db','host'), config.get('db','port') ) # setup a connection to a local DB of twitter accounts #accounts_db = StoryDatabase('accounts', config.get('db','host'), config.get('db','port') ) server = couchdb.Server() accounts_db = server['accounts'] # setup the mediacloud connection mc = MediaCloud( config.get('api','user'), config.get('api','pass') ) # Must first seed database with latest Story ID, or else it will start at the beginning (2005) max_story_id = articles_db.getMaxStoryId() results = mc.storiesSince( max_story_id, STORIES_TO_FETCH, fetch_raw_text = True ) log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")") # set up a callback function that adds twitter username occurrences to the story pub.subscribe(mediacloud.examples.addTwitterReferencesToStory, StoryDatabase.EVENT_PRE_STORY_SAVE) # save all the stories in the db saved = 0 for story in results: print 'new story', worked = articles_db.addStory(story) if worked: saved = saved + 1 else: log.warning(" unable to save story "+str(story['stories_id']))