def testStoryExists(self): story = self._getFakeStory() db = StoryDatabase() db.createDatabase(self.TEST_DB_NAME) db.addStory(story) saved_story = db.getStory(str(story['stories_id'])) self.assertTrue(db.storyExists(str(story['stories_id']))) self.assertFalse(db.storyExists('43223535')) db.deleteDatabase(self.TEST_DB_NAME)
def testGetMaxStoryId(self): story1 = self._getFakeStory() story1['stories_id'] = "1000" story2 = self._getFakeStory() story1['stories_id'] = "2000" db = StoryDatabase() db.createDatabase(self.TEST_DB_NAME) db._db.save(mediacloud.examples.getAllExampleViews()) self.assertEquals(db.getMaxStoryId(),0) db.addStory(story1) db.addStory(story2) self.assertEquals(db.getMaxStoryId(),2000) db.deleteDatabase(self.TEST_DB_NAME)
def testAddStory(self): story = self._getFakeStory() db = StoryDatabase() db.createDatabase(self.TEST_DB_NAME) worked = db.addStory(story) self.assertTrue(worked) worked = db.addStory(story) self.assertFalse(worked) saved_story = db.getStory(str(story['stories_id'])) self.assertEquals(saved_story['_id'], str(story['stories_id'])) self.assertEquals(saved_story['story_sentences_count'], 2) db.deleteDatabase(self.TEST_DB_NAME)
to a 'mediacloud' CouchDB database. It adds in the extracted text readability via a pre-save event subscription. To Install: >>> import nltk >>> nltk.download() [ select d for Download ] [ enter "stopwords" as the identifier ] [ enter "punkt" as the identifier ] ''' config = ConfigParser.ConfigParser() config.read('mc-client.config') # set up a connection to a local DB db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') ) # connect to MC and fetch some articles mc = MediaCloud( config.get('api','user'), config.get('api','pass') ) results = mc.recentStories() print "Fetched "+str(len(results))+" stories" # set up my callback function that adds readability score to the story pub.subscribe(mediacloud.examples.addFleshKincaidGradeLevelToStory, StoryDatabase.EVENT_PRE_STORY_SAVE) # save all the stories in the db (this will fire the callback above) saved = 0 for story in results: worked = db.addStory(story) if worked: saved = saved + 1
created after the latest one it has in it's db. It saves the metadata for all those to a 'mediacloud' CouchDB database. ''' STORIES_TO_FETCH = 100 config = ConfigParser.ConfigParser() config.read('mc-client.config') # setup logging logging.basicConfig(filename='mc-realtime.log',level=logging.DEBUG) log = logging.getLogger('mc-realtime') log.info("---------------------------------------------------------------------------") # setup a connection to a local DB db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') ) # setup the mediacloud connection mc = MediaCloud( config.get('api','user'), config.get('api','pass') ) max_story_id = db.getMaxStoryId() results = mc.storiesSince( max_story_id, STORIES_TO_FETCH ) log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")") # set up my callback function that adds word count to the story pub.subscribe(mediacloud.examples.addWordCountToStory, StoryDatabase.EVENT_PRE_STORY_SAVE) # set up my callback function that adds the language guess to the story pub.subscribe(mediacloud.examples.addIsEnglishToStory, StoryDatabase.EVENT_PRE_STORY_SAVE) # set up my callback function that adds the reading grade level to the story
import mediacloud.examples STORIES_TO_FETCH = 10000 config = ConfigParser.ConfigParser() config.read('mc-client.config') # setup logging logging.basicConfig(filename='mc-realtime.log',level=logging.DEBUG) log = logging.getLogger('mc-realtime') log.info("---------------------------------------------------------------------------") # setup a connection to a local DB of articles #db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') ) articles_db = StoryDatabase('articles', config.get('db','host'), config.get('db','port') ) # setup a connection to a local DB of twitter accounts #accounts_db = StoryDatabase('accounts', config.get('db','host'), config.get('db','port') ) server = couchdb.Server() accounts_db = server['accounts'] # setup the mediacloud connection mc = MediaCloud( config.get('api','user'), config.get('api','pass') ) # Must first seed database with latest Story ID, or else it will start at the beginning (2005) max_story_id = articles_db.getMaxStoryId() results = mc.storiesSince( max_story_id, STORIES_TO_FETCH, fetch_raw_text = True ) log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")") # set up a callback function that adds twitter username occurrences to the story
def testCreateMaxIdView(self): db = StoryDatabase() db.createDatabase(self.TEST_DB_NAME) db._db.save(mediacloud.examples.getAllExampleViews()) self.assertEquals(db.getMaxStoryId(),0) db.deleteDatabase(self.TEST_DB_NAME)
def testManageDatabase(self): db = StoryDatabase() db.createDatabase(self.TEST_DB_NAME) db.deleteDatabase(self.TEST_DB_NAME)