コード例 #1
0
 def retrieveTweets(self,ID,Q,geoCode):
     '''retrieveTweets from twitter and store the feeds into MongoDB
     '''
     since_id = mongoInt.retrieveSinceID(ID)
     #since_id = long(785438635369738240)
     logger.debug('retrieve tweets')
     logger.debug(since_id)
     logger.debug('retrieve tweets123456')`
     #fetch the latest since_id and pass it in next twitter call
     #since_id = mongoInt.retrieveSinceID(ID)
     twits = twitterInt.retrieveTweets(Q,geoCode, since_id)
     
     mongoInt.collectionFeedFrequency(len(twits), ID)
     
     #map(lambda tw:tw.update({'created_time': timegm(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits)
     map(lambda tw:tw.update({'created_time': timegm(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y"))}),twits)
     
     #callinf directly instead of wrapper change it later
     #pass only twitter text & ID only here
     logger.info('tweets fetched are %s',twits)
     #similarTweet = self.topicModelLSI(twits, Q) # new feeds from service
     similarTweet = self.posAnalysis(twits)
     #topicModelObj.close()
     #map(lambda tw:tw.update({'created_time': int(time.mktime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits)
     #map(lambda tw:tw.update({'created_time': int(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits)
     #twits = twitterInt.retrieveTweetsBasedHashtag(Q)
     #if geoCode:
     #    twits.extend(twitterInt.retrieveTweetBasedLocation(geoCode))
     #logger.debug('storing tweets of twitter of both location based on keyword mongoDb')
     #twits=sparkInt.wowFieldTrueOrFalse(twits)
     #self.similarTopicRemoval(ID,similarTweet,twits, Q)
     #self.insertFeedData(ID,twits)
     #page_sanitized = json_util.dumps(twits)
     # below returning to be removed has to be done from mongoDB only
     return len(twits)
コード例 #2
0
ファイル: intercom.py プロジェクト: chelladurai89/wowgic
 def retrieveTweetsById(self,ID,feedId,count):
     tweets=[]
     fromSinceId = False
     if int(feedId) == 0: 
         feedId = mongoInt.retrieveSinceID(ID)
         fromSinceId = True
     if feedId == 0:
         return tweets
     else:
         tweets.extend(mongoInt.retrieveTweetsById(ID,feedId,count, fromSinceId))
         return tweets
コード例 #3
0
 def retrieveTweetsById(self, ID, feedId, count):
     tweets = []
     fromSinceId = False
     if int(feedId) == 0:
         feedId = mongoInt.retrieveSinceID(ID)
         fromSinceId = True
     if feedId == 0:
         return tweets
     else:
         tweets.extend(
             mongoInt.retrieveTweetsById(ID, feedId, count, fromSinceId))
         return tweets
コード例 #4
0
    def retrieveTweets(self, ID, Q, geoCode):
        '''retrieveTweets from twitter and store the feeds into MongoDB
        '''
        since_id = mongoInt.retrieveSinceID(ID)
        #since_id = long(785438635369738240)
        logger.debug('retrieve tweets')
        logger.debug(since_id)
        logger.debug('retrieve tweets123456')
        #fetch the latest since_id and pass it in next twitter call
        #since_id = mongoInt.retrieveSinceID(ID)
        twits = twitterInt.retrieveTweets(Q, geoCode, since_id)

        mongoInt.collectionFeedFrequency(len(twits), ID)

        def removeRetweets(tweet):
            if 'retweeted_status' in tweet:
                tweet = tweet['retweeted_status']
                tweet['alreadyRetweeted'] = True
                print tweet['text']
                #json_obj = json.dumps(obj)
            return tweet

        #map(lambda tw:tw.update({'created_time': timegm(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits)
        value = map(removeRetweets, twits)
        twits = value
        map(
            lambda tw: tw.update({
                'created_time':
                timegm(
                    time.strptime(tw['created_at'],
                                  "%a %b %d %H:%M:%S +0000 %Y"))
            }), twits)
        #callinf directly instead of wrapper change it later
        #pass only twitter text & ID only here
        logger.info('tweets fetched for %s are %s', ID, len(twits))
        if (len(twits)):
            uniqueTweetsFromDB = []
            uniqueTweetsFromDB = mongoInt.retrieveParentIdTrue(ID)
            logger.error('existing uniqueTweetsFromDB :%s',
                         len(uniqueTweetsFromDB))
            logger.debug('total uniqe feeds %s', uniqueTweetsFromDB)
            #twits.extend(uniqueTweetsFromDB)
            uniqueTweetsFromDB.extend(twits)
            logger.error('total combined tweets :%s', len(uniqueTweetsFromDB))
            #return uniqueTweetsFromDB
        else:
            return []

        similarTweet = self.topicModelLSI(uniqueTweetsFromDB,
                                          Q)  # new feeds from service
        if similarTweet != 0:
            self.updateRatio(ID, similarTweet, uniqueTweetsFromDB, Q)
        return self.runClassifier(ID)
コード例 #5
0
ファイル: intercom.py プロジェクト: chelladurai89/wowgic
    def retrieveTweets(self,ID,Q,geoCode):
        '''retrieveTweets from twitter and store the feeds into MongoDB
        '''
        since_id = mongoInt.retrieveSinceID(ID)
        #since_id = long(785438635369738240)
        logger.debug('retrieve tweets')
        logger.debug(since_id)
        logger.debug('retrieve tweets123456')
        #fetch the latest since_id and pass it in next twitter call
        #since_id = mongoInt.retrieveSinceID(ID)
        twits = twitterInt.retrieveTweets(Q,geoCode, since_id)
        
        mongoInt.collectionFeedFrequency(len(twits), ID)
        def removeRetweets(tweet):
            if 'retweeted_status' in tweet:
                tweet = tweet['retweeted_status']
                tweet['alreadyRetweeted'] = True
                print tweet['text']
                #json_obj = json.dumps(obj)
            return tweet

        
        #map(lambda tw:tw.update({'created_time': timegm(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits)
        value = map(removeRetweets, twits)
        print(value)
        twits = value
        map(lambda tw:tw.update({'created_time': timegm(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y"))}),twits)
        #callinf directly instead of wrapper change it later
        #pass only twitter text & ID only here
        logger.info('tweets fetched are chellaaa %s',len(twits))
        if(len(twits)):
            uniqueTweetsFromDB = mongoInt.retrieveParentIdTrue(ID)
            logger.debug('existing uniqueTweetsFromDB :%s',len(uniqueTweetsFromDB))
            #twits.extend(uniqueTweetsFromDB)
            uniqueTweetsFromDB.extend(twits)
            logger.debug('total combined tweets :%s',len(uniqueTweetsFromDB))
            similarTweet = self.topicModelLSI(uniqueTweetsFromDB, Q) # new feeds from service
            if similarTweet != 0:
                self.updateRatio(ID,similarTweet,uniqueTweetsFromDB, Q)
            return len(uniqueTweetsFromDB)
コード例 #6
0
 def retrieveTweetsById(self,ID,feedId,count):
     tweets=[]
     if int(feedId) == 0: 
         feedId = mongoInt.retrieveSinceID(ID)
     tweets.extend(mongoInt.retrieveTweetsById(ID,feedId,count))
     return tweets