def testStoryExists(self):
     story = self._getFakeStory()
     db = StoryDatabase()
     db.createDatabase(self.TEST_DB_NAME)
     db.addStory(story)
     saved_story = db.getStory(str(story['stories_id']))
     self.assertTrue(db.storyExists(str(story['stories_id'])))
     self.assertFalse(db.storyExists('43223535'))
     db.deleteDatabase(self.TEST_DB_NAME)
 def testGetMaxStoryId(self):
     story1 = self._getFakeStory()
     story1['stories_id'] = "1000"
     story2 = self._getFakeStory()
     story1['stories_id'] = "2000"
     db = StoryDatabase()
     db.createDatabase(self.TEST_DB_NAME)
     db._db.save(mediacloud.examples.getAllExampleViews())
     self.assertEquals(db.getMaxStoryId(),0)
     db.addStory(story1)
     db.addStory(story2)
     self.assertEquals(db.getMaxStoryId(),2000)
     db.deleteDatabase(self.TEST_DB_NAME)        
 def testAddStory(self):
     story = self._getFakeStory()
     db = StoryDatabase()
     db.createDatabase(self.TEST_DB_NAME)
     worked = db.addStory(story)
     self.assertTrue(worked)
     worked = db.addStory(story)
     self.assertFalse(worked)        
     saved_story = db.getStory(str(story['stories_id']))
     self.assertEquals(saved_story['_id'], str(story['stories_id']))
     self.assertEquals(saved_story['story_sentences_count'], 2)
     db.deleteDatabase(self.TEST_DB_NAME)
to a 'mediacloud' CouchDB database.  It adds in the extracted text readability via a 
pre-save event subscription.

To Install:
>>> import nltk
>>> nltk.download()
[ select d for Download ]
[ enter "stopwords" as the identifier ]
[ enter "punkt" as the identifier ]
'''

config = ConfigParser.ConfigParser()
config.read('mc-client.config')

# set up a connection to a local DB
db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') )

# connect to MC and fetch some articles
mc = MediaCloud( config.get('api','user'), config.get('api','pass') )
results = mc.recentStories()
print "Fetched "+str(len(results))+" stories"

# set up my callback function that adds readability score to the story
pub.subscribe(mediacloud.examples.addFleshKincaidGradeLevelToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# save all the stories in the db (this will fire the callback above)
saved = 0
for story in results:
    worked = db.addStory(story)
    if worked:
        saved = saved + 1
created after the latest one it has in it's db.  It saves the metadata for all those to 
a 'mediacloud' CouchDB database.
'''

STORIES_TO_FETCH = 100

config = ConfigParser.ConfigParser()
config.read('mc-client.config')

# setup logging
logging.basicConfig(filename='mc-realtime.log',level=logging.DEBUG)
log = logging.getLogger('mc-realtime')
log.info("---------------------------------------------------------------------------")

# setup a connection to a local DB
db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') )

# setup the mediacloud connection
mc = MediaCloud( config.get('api','user'), config.get('api','pass') )

max_story_id = db.getMaxStoryId()
results = mc.storiesSince( max_story_id, STORIES_TO_FETCH )
log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")")

# set up my callback function that adds word count to the story
pub.subscribe(mediacloud.examples.addWordCountToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# set up my callback function that adds the language guess to the story
pub.subscribe(mediacloud.examples.addIsEnglishToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# set up my callback function that adds the reading grade level to the story
import mediacloud.examples


STORIES_TO_FETCH = 10000

config = ConfigParser.ConfigParser()
config.read('mc-client.config')

# setup logging
logging.basicConfig(filename='mc-realtime.log',level=logging.DEBUG)
log = logging.getLogger('mc-realtime')
log.info("---------------------------------------------------------------------------")

# setup a connection to a local DB of articles
#db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') )
articles_db = StoryDatabase('articles', config.get('db','host'), config.get('db','port') )

# setup a connection to a local DB of twitter accounts
#accounts_db = StoryDatabase('accounts', config.get('db','host'), config.get('db','port') )
server = couchdb.Server()
accounts_db = server['accounts']

# setup the mediacloud connection
mc = MediaCloud( config.get('api','user'), config.get('api','pass') )

# Must first seed database with latest Story ID, or else it will start at the beginning (2005)
max_story_id = articles_db.getMaxStoryId()
results = mc.storiesSince( max_story_id, STORIES_TO_FETCH, fetch_raw_text = True )
log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")")

# set up a callback function that adds twitter username occurrences to the story
 def testCreateMaxIdView(self):
     db = StoryDatabase()
     db.createDatabase(self.TEST_DB_NAME)
     db._db.save(mediacloud.examples.getAllExampleViews())
     self.assertEquals(db.getMaxStoryId(),0)
     db.deleteDatabase(self.TEST_DB_NAME)        
 def testManageDatabase(self):
     db = StoryDatabase()
     db.createDatabase(self.TEST_DB_NAME)
     db.deleteDatabase(self.TEST_DB_NAME)