def retrieveTweets(self,ID,Q,geoCode): '''retrieveTweets from twitter and store the feeds into MongoDB ''' since_id = mongoInt.retrieveSinceID(ID) #since_id = long(785438635369738240) logger.debug('retrieve tweets') logger.debug(since_id) logger.debug('retrieve tweets123456')` #fetch the latest since_id and pass it in next twitter call #since_id = mongoInt.retrieveSinceID(ID) twits = twitterInt.retrieveTweets(Q,geoCode, since_id) mongoInt.collectionFeedFrequency(len(twits), ID) #map(lambda tw:tw.update({'created_time': timegm(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) map(lambda tw:tw.update({'created_time': timegm(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y"))}),twits) #callinf directly instead of wrapper change it later #pass only twitter text & ID only here logger.info('tweets fetched are %s',twits) #similarTweet = self.topicModelLSI(twits, Q) # new feeds from service similarTweet = self.posAnalysis(twits) #topicModelObj.close() #map(lambda tw:tw.update({'created_time': int(time.mktime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) #map(lambda tw:tw.update({'created_time': int(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) #twits = twitterInt.retrieveTweetsBasedHashtag(Q) #if geoCode: # twits.extend(twitterInt.retrieveTweetBasedLocation(geoCode)) #logger.debug('storing tweets of twitter of both location based on keyword mongoDb') #twits=sparkInt.wowFieldTrueOrFalse(twits) #self.similarTopicRemoval(ID,similarTweet,twits, Q) #self.insertFeedData(ID,twits) #page_sanitized = json_util.dumps(twits) # below returning to be removed has to be done from mongoDB only return len(twits)
def retrieveTweetsById(self,ID,feedId,count): tweets=[] fromSinceId = False if int(feedId) == 0: feedId = mongoInt.retrieveSinceID(ID) fromSinceId = True if feedId == 0: return tweets else: tweets.extend(mongoInt.retrieveTweetsById(ID,feedId,count, fromSinceId)) return tweets
def retrieveTweetsById(self, ID, feedId, count): tweets = [] fromSinceId = False if int(feedId) == 0: feedId = mongoInt.retrieveSinceID(ID) fromSinceId = True if feedId == 0: return tweets else: tweets.extend( mongoInt.retrieveTweetsById(ID, feedId, count, fromSinceId)) return tweets
def retrieveTweets(self, ID, Q, geoCode): '''retrieveTweets from twitter and store the feeds into MongoDB ''' since_id = mongoInt.retrieveSinceID(ID) #since_id = long(785438635369738240) logger.debug('retrieve tweets') logger.debug(since_id) logger.debug('retrieve tweets123456') #fetch the latest since_id and pass it in next twitter call #since_id = mongoInt.retrieveSinceID(ID) twits = twitterInt.retrieveTweets(Q, geoCode, since_id) mongoInt.collectionFeedFrequency(len(twits), ID) def removeRetweets(tweet): if 'retweeted_status' in tweet: tweet = tweet['retweeted_status'] tweet['alreadyRetweeted'] = True print tweet['text'] #json_obj = json.dumps(obj) return tweet #map(lambda tw:tw.update({'created_time': timegm(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) value = map(removeRetweets, twits) twits = value map( lambda tw: tw.update({ 'created_time': timegm( time.strptime(tw['created_at'], "%a %b %d %H:%M:%S +0000 %Y")) }), twits) #callinf directly instead of wrapper change it later #pass only twitter text & ID only here logger.info('tweets fetched for %s are %s', ID, len(twits)) if (len(twits)): uniqueTweetsFromDB = [] uniqueTweetsFromDB = mongoInt.retrieveParentIdTrue(ID) logger.error('existing uniqueTweetsFromDB :%s', len(uniqueTweetsFromDB)) logger.debug('total uniqe feeds %s', uniqueTweetsFromDB) #twits.extend(uniqueTweetsFromDB) uniqueTweetsFromDB.extend(twits) logger.error('total combined tweets :%s', len(uniqueTweetsFromDB)) #return uniqueTweetsFromDB else: return [] similarTweet = self.topicModelLSI(uniqueTweetsFromDB, Q) # new feeds from service if similarTweet != 0: self.updateRatio(ID, similarTweet, uniqueTweetsFromDB, Q) return self.runClassifier(ID)
def retrieveTweets(self,ID,Q,geoCode): '''retrieveTweets from twitter and store the feeds into MongoDB ''' since_id = mongoInt.retrieveSinceID(ID) #since_id = long(785438635369738240) logger.debug('retrieve tweets') logger.debug(since_id) logger.debug('retrieve tweets123456') #fetch the latest since_id and pass it in next twitter call #since_id = mongoInt.retrieveSinceID(ID) twits = twitterInt.retrieveTweets(Q,geoCode, since_id) mongoInt.collectionFeedFrequency(len(twits), ID) def removeRetweets(tweet): if 'retweeted_status' in tweet: tweet = tweet['retweeted_status'] tweet['alreadyRetweeted'] = True print tweet['text'] #json_obj = json.dumps(obj) return tweet #map(lambda tw:tw.update({'created_time': timegm(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) value = map(removeRetweets, twits) print(value) twits = value map(lambda tw:tw.update({'created_time': timegm(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y"))}),twits) #callinf directly instead of wrapper change it later #pass only twitter text & ID only here logger.info('tweets fetched are chellaaa %s',len(twits)) if(len(twits)): uniqueTweetsFromDB = mongoInt.retrieveParentIdTrue(ID) logger.debug('existing uniqueTweetsFromDB :%s',len(uniqueTweetsFromDB)) #twits.extend(uniqueTweetsFromDB) uniqueTweetsFromDB.extend(twits) logger.debug('total combined tweets :%s',len(uniqueTweetsFromDB)) similarTweet = self.topicModelLSI(uniqueTweetsFromDB, Q) # new feeds from service if similarTweet != 0: self.updateRatio(ID,similarTweet,uniqueTweetsFromDB, Q) return len(uniqueTweetsFromDB)
def retrieveTweetsById(self,ID,feedId,count): tweets=[] if int(feedId) == 0: feedId = mongoInt.retrieveSinceID(ID) tweets.extend(mongoInt.retrieveTweetsById(ID,feedId,count)) return tweets