Ejemplo n.º 1
0
 def testStoriesSince(self):
     story_id = 88848861
     mc = MediaCloud(None, None, True)
     stories = mc.storiesSince(story_id)
     self.assertEquals(len(stories), 15)
     for story in stories:
         self.assertTrue(int(story["stories_id"]) > story_id)
Ejemplo n.º 2
0
 def testStoriesSinceForReal(self):
     mc = MediaCloud(self._config.get("api", "user"), self._config.get("api", "pass"))
     story_id = 88848861
     stories = mc.storiesSince(story_id)
     self.assertEquals(len(stories), mc.DEFAULT_STORY_COUNT)
     for story in stories:
         self.assertTrue(int(story["stories_id"]) > story_id)
         self.assertTrue(int(story["stories_id"]) - story_id <= mc.DEFAULT_STORY_COUNT)
Ejemplo n.º 3
0
config = ConfigParser.ConfigParser()
config.read('mc-client.config')

# setup logging
logging.basicConfig(filename='mc-realtime.log',level=logging.DEBUG)
log = logging.getLogger('mc-realtime')
log.info("---------------------------------------------------------------------------")

# setup a connection to a local DB
db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') )

# setup the mediacloud connection
mc = MediaCloud( config.get('api','user'), config.get('api','pass') )

max_story_id = db.getMaxStoryId()
results = mc.storiesSince( max_story_id, STORIES_TO_FETCH )
log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")")

# set up my callback function that adds word count to the story
pub.subscribe(mediacloud.examples.addWordCountToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# set up my callback function that adds the language guess to the story
pub.subscribe(mediacloud.examples.addIsEnglishToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# set up my callback function that adds the reading grade level to the story
pub.subscribe(mediacloud.examples.addFleshKincaidGradeLevelToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# save all the stories in the db
saved = 0
for story in results:
    worked = db.addStory(story)
# setup a connection to a local DB of articles
#db = StoryDatabase('mediacloud', config.get('db','host'), config.get('db','port') )
articles_db = StoryDatabase('articles', config.get('db','host'), config.get('db','port') )

# setup a connection to a local DB of twitter accounts
#accounts_db = StoryDatabase('accounts', config.get('db','host'), config.get('db','port') )
server = couchdb.Server()
accounts_db = server['accounts']

# setup the mediacloud connection
mc = MediaCloud( config.get('api','user'), config.get('api','pass') )

# Must first seed database with latest Story ID, or else it will start at the beginning (2005)
max_story_id = articles_db.getMaxStoryId()
results = mc.storiesSince( max_story_id, STORIES_TO_FETCH, fetch_raw_text = True )
log.info("Fetched "+str(len(results))+" stories (after "+str(max_story_id)+")")

# set up a callback function that adds twitter username occurrences to the story
pub.subscribe(mediacloud.examples.addTwitterReferencesToStory, StoryDatabase.EVENT_PRE_STORY_SAVE)

# save all the stories in the db
saved = 0
for story in results:
    print 'new story',
    worked = articles_db.addStory(story)
    if worked:
      saved = saved + 1
    else:
      log.warning("  unable to save story "+str(story['stories_id']))