def fetchInterestFeeds(self,ID): '''fetch the all neo4j interest nodes returning name & city then using those tags look for mongoDb collection if not then do search in twitter & instagram and store the output in mongoDb in a collection mapped to interest nodes''' recordList = neo4jInt.getInterestNode(graphDB,ID) geoDict = {} tweets=[] #parse the recordList and frame the has tags here for record in recordList: if record[0]['lat'] is not None: geoDict.update({'lat':record[0]['lat']}) geoDict.update({'lng':record[0]['lng']}) geoDict.update({'distance':'.5'})#default radius =500m logger.info('recordList output of neo4j:%s',record[0]['name']) if record[0]['city'] is not None: Q=record[0]['name'] +' '+ record[0]['city'] else: Q=record[0]['name'] ID=record[0]['id'] logger.debug('fetchInterestFeeds ID:%s Q=%s geo cordinates =%s',ID,Q,geoDict) if mongoInt.checkCollExists(ID) > 1: tweets.extend(mongoInt.retrieveCollection(ID)) else: tweets.extend(self.retrieveTweets(ID,Q,geoDict)) tweets.extend(self.retrieveMediaBasedTags(ID,Q,geoDict)) geoDict = {}#revert the geo dictionary #sparkInt.Parallelized(tweets) #feedJson=sparkInt.wowFieldTrueOrFalse(tweets) return tweets
def retrieveTwitterAccessTokens(collName): ''' retrieve access tokens from DB and pass it to twitterInt ''' if mongoInt.checkCollExists(collName) < 1: ''' if the collection twitter_Access_Tokens is not availble initially populate the document with default tokens read from cfg files ''' logger.debug('default twitter token is:%s',globalS.dictDb['SATHISH_TOKEN']) if mongoInt.insertTwitteTokens(collName,globalS.dictDb['SATHISH_TOKEN']): logger.warn('twitter_Access_Tokens was empty added default token now') tokens = mongoInt.retrieveTwitterTokens(collName) logger.debug('tokens retrieved key secret : %s',tokens) return tokens
def retrieveTwitterAccessTokens(collName = 'twitter_Access_Tokens'): ''' retrieve access tokens from DB and pass it to twitterInt ''' if mongoInt.checkCollExists(collName) < 1: ''' if the collection twitter_Access_Tokens is not availble initially populate the document with default tokens read from cfg files ''' logger.debug('default twitter token is:%s',globalS.dictDb['SATHISH_TOKEN']) if mongoInt.insertTwitteTokens(collName,globalS.dictDb['SATHISH_TOKEN']): logger.warn('twitter_Access_Tokens was empty added default token now') tokens = mongoInt.retrieveTwitterTokens(collName) logger.debug('tokens retrieved key secerte : %s',tokens) return tokens
def fetchInterestFeeds(self,ID,lastTimeStamp): '''fetch the all neo4j interest nodes returning name & city then using those tags look for mongoDb collection if not then do search in twitter & instagram and store the output in mongoDb in a collection mapped to interest nodes''' recordList = neo4jInt.getInterestNode(graphDB,ID) #intialise the variables geoDict = {} tweets=[] jobsArgs =[] collectionList = [] #parse the recordList and frame the has tags here for record in recordList: geoDict = {}#revert the geo dictionary if record[0]['lat'] is not None: geoDict.update({'lat':record[0]['lat']}) geoDict.update({'lng':record[0]['lng']}) geoDict.update({'distance':'.5'})#default radius =500m logger.info('recordList output of neo4j:%s',record[0]['name']) if record[0]['city'] is not None: Q=record[0]['name'] +' '+ record[0]['city'] else: Q=record[0]['name'] ID=record[0]['id'] logger.debug('fetchInterestFeeds ID:%s Q=%s geo cordinates =%s',ID,Q,geoDict) if mongoInt.checkCollExists(ID) > 1: collectionList.append(ID) else: jobsArgs.append([ID,Q,geoDict]) ## auxiliary funciton to make it work #first time login logic to be defined if len(collectionList): def recCursor(lastTimeStamp): ''' not an effective method to query across multiple connection ''' for collName in collectionList: logger.debug('collName = %s & time = %s',collName,lastTimeStamp) docLists = mongoInt.retrieveCollection(collName,lastTimeStamp,globalS.dictDb['MONGODB_COUNT_LIMIT']) if globalS.dictDb['APP_DEBUG']: def insertQueryData(twit,*argv): twit.update({'queryDetails':argv}) map(lambda twit: insertQueryData(twit,ID), docLists); if len(docLists): logger.info('fetched %s docs from collection:%s appending to tweets',len(docLists),collName) #print the ID's of feeds so that we verify any dup feeds are obtained map(lambda twit: logger.debug('doc ID is %s',twit['id']), docLists); tweets.extend(docLists) if len(tweets) < 10: lastTimeStamp=int(lastTimeStamp)-globalS.dictDb['DELTA_FEEDS_TIME'] logger.info('Docs are not available so recursive calling %s',lastTimeStamp) return recCursor(lastTimeStamp) logger.info('collectively returned %s docs for multiple documents %s',len(tweets),collectionList) return 1 recCursor(lastTimeStamp) elif (len(tweets) == 0) and len(jobsArgs): logger.warn('Collection is empty invoking worker pools:%s',jobsArgs) def retrieveMedias_helper(args): tweets.extend(self.retrieveMediaBasedTags(*args)) def retrieveTweets_helper(args): '''commenting this as its taking too much of time''' tweets.extend(self.retrieveTweets(*args)) #map(retrieveTweets_helper,jobsArgs) map(retrieveMedias_helper,jobsArgs) logger.debug('multiprocessing pool has returned %s feeds',len(tweets)) #tweets = tweets[:20] #sparkInt.Parallelized(tweets) #feedJson=sparkInt.wowFieldTrueOrFalse(tweets) return tweets
def fetchInterestFeeds(self, ID, lastTimeStamp): '''fetch the all neo4j interest nodes returning name & city then using those tags look for mongoDb collection if not then do search in twitter & instagram and store the output in mongoDb in a collection mapped to interest nodes''' recordList = neo4jInt.getInterestNode(graphDB, ID) geoDict = {} tweets = [] jobsArgs = [] #parse the recordList and frame the has tags here for record in recordList: geoDict = {} #revert the geo dictionary if record[0]['lat'] is not None: geoDict.update({'lat': record[0]['lat']}) geoDict.update({'lng': record[0]['lng']}) geoDict.update({'distance': '.5'}) #default radius =500m logger.info('recordList output of neo4j:%s', record[0]['name']) if record[0]['city'] is not None: Q = record[0]['name'] + ' ' + record[0]['city'] else: Q = record[0]['name'] ID = record[0]['id'] logger.debug('fetchInterestFeeds ID:%s Q=%s geo cordinates =%s', ID, Q, geoDict) if mongoInt.checkCollExists(ID) > 1: #docs = mongoInt.retrieveCollection(ID,lastTimeStamp) #tweets.extend(docs) if len(docs) else 0 tweets.extend( mongoInt.retrieveCollection( ID, lastTimeStamp, globalS.dictDb['MONGODB_COUNT_LIMIT'])) else: #tweets.extend(self.retrieveTweets(ID,Q,geoDict)) #tweets.extend(self.retrieveMediaBasedTags(ID,Q,geoDict)) jobsArgs.append([ID, Q, geoDict]) #with Pool(processes=4) as pool: # pool.map() #jobs = [] #job.append(Process(target=self.retrieveTweets, args=(ID,Q,geoDict))) #job.append(Process(target=self.retrieveMediaBasedTags, args=(ID,Q,geoDict))) #feeds = self.retrieveTweets(ID,Q,geoDict) #tweets.extend(feeds) if len(feeds) else 0 #medias = self.retrieveMediaBasedTags(ID,Q,geoDict) #tweets.extend(medias) if len(medias) else 0 ## auxiliary funciton to make it work if len(jobsArgs): logger.warn('Collection is empty invoking worker pools:%s', jobsArgs) def retrieveMedias_helper(args): tweets.extend(self.retrieveMediaBasedTags(*args)[:20]) def retrieveTweets_helper(args): tweets.extend(self.retrieveTweets(*args)[:20]) #pool = Pool(2) #tweets.extend(pool.map(retrieveTweets_helper,jobsArgs)) #tweets.extend(pool.map(retrieveMedias_helper,jobsArgs)) map(retrieveTweets_helper, jobsArgs) map(retrieveMedias_helper, jobsArgs) #pool.close() #pool.join() logger.debug('multiprocessing pool has returned %s feeds', len(tweets)) #tweets = tweets[:20] if globalS.dictDb['APP_DEBUG']: def insertQueryData(twit, *argv): twit.update({'queryDetails': argv}) #return twit map(lambda twit: insertQueryData(twit, ID, Q, geoDict), tweets) #sparkInt.Parallelized(tweets) #feedJson=sparkInt.wowFieldTrueOrFalse(tweets) return tweets
def fetchInterestFeeds(self,ID,lastTimeStamp): '''fetch the all neo4j interest nodes returning name & city then using those tags look for mongoDb collection if not then do search in twitter & instagram and store the output in mongoDb in a collection mapped to interest nodes''' recordList = neo4jInt.getInterestNode(graphDB,ID) #intialise the variables geoDict = {} tweets=[] jobsArgs =[] collectionList = [] #parse the recordList and frame the has tags here for record in recordList: geoDict = {}#revert the geo dictionary if record[0]['lat'] is not None: geoDict.update({'lat':record[0]['lat']}) geoDict.update({'lng':record[0]['lng']}) geoDict.update({'distance':'.5'})#default radius =500m logger.info('recordList output of neo4j:%s',record[0]['name']) if record[0]['city'] is not None: Q=record[0]['name'] +' '+ record[0]['city'] else: Q=record[0]['name'] ID=record[0]['id'] logger.debug('fetchInterestFeeds ID:%s Q=%s geo cordinates =%s',ID,Q,geoDict) if mongoInt.checkCollExists(ID) > 1: collectionList.append(ID) else: jobsArgs.append([ID,Q,geoDict]) if globalS.dictDb['APP_DEBUG']: def insertQueryData(twit,*argv): twit.update({'queryDetails':argv}) #return twit map(lambda twit: insertQueryData(twit,ID,Q,geoDict), tweets); ## auxiliary funciton to make it work #first time login logic to be defined if len(collectionList): def recCursor(lastTimeStamp): for collName in collectionList: logger.debug('collName = %s & time = %s',ID,lastTimeStamp) tweets.extend( mongoInt.retrieveCollection(ID,lastTimeStamp,globalS.dictDb['MONGODB_COUNT_LIMIT'])) if len(tweets) < 2: lastTimeStamp=int(lastTimeStamp)-globalS.dictDb['DELTA_FEEDS_TIME'] logger.info('Docs are not available so recursive calling %s',lastTimeStamp) return recCursor() logger.info('collectively returned %s docs for multiple documents',len(tweets)) return recCursor() elif len(jobsArgs): logger.warn('Collection is empty invoking worker pools:%s',jobsArgs) def retrieveMedias_helper(args): tweets.extend(self.retrieveMediaBasedTags(*args)[:20]) def retrieveTweets_helper(args): '''commenting this as its taking too much of time''' tweets.extend(self.retrieveTweets(*args)[:20]) ##map(retrieveTweets_helper,jobsArgs) ##map(retrieveMedias_helper,jobsArgs) logger.debug('multiprocessing pool has returned %s feeds',len(tweets)) #tweets = tweets[:20] #sparkInt.Parallelized(tweets) #feedJson=sparkInt.wowFieldTrueOrFalse(tweets) return tweets
def fetchInterestFeeds(self,ID,lastTimeStamp): '''fetch the all neo4j interest nodes returning name & city then using those tags look for mongoDb collection if not then do search in twitter & instagram and store the output in mongoDb in a collection mapped to interest nodes''' recordList = neo4jInt.getInterestNode(graphDB,ID) geoDict = {} tweets=[] jobsArgs =[] #parse the recordList and frame the has tags here for record in recordList: geoDict = {}#revert the geo dictionary if record[0]['lat'] is not None: geoDict.update({'lat':record[0]['lat']}) geoDict.update({'lng':record[0]['lng']}) geoDict.update({'distance':'.5'})#default radius =500m logger.info('recordList output of neo4j:%s',record[0]['name']) if record[0]['city'] is not None: Q=record[0]['name'] +' '+ record[0]['city'] else: Q=record[0]['name'] ID=record[0]['id'] logger.debug('fetchInterestFeeds ID:%s Q=%s geo cordinates =%s',ID,Q,geoDict) if mongoInt.checkCollExists(ID) > 1: #docs = mongoInt.retrieveCollection(ID,lastTimeStamp) #tweets.extend(docs) if len(docs) else 0 tweets.extend(mongoInt.retrieveCollection(ID,lastTimeStamp,globalS.dictDb['MONGODB_COUNT_LIMIT'])) else: #tweets.extend(self.retrieveTweets(ID,Q,geoDict)) #tweets.extend(self.retrieveMediaBasedTags(ID,Q,geoDict)) jobsArgs.append([ID,Q,geoDict]) #with Pool(processes=4) as pool: # pool.map() #jobs = [] #job.append(Process(target=self.retrieveTweets, args=(ID,Q,geoDict))) #job.append(Process(target=self.retrieveMediaBasedTags, args=(ID,Q,geoDict))) #feeds = self.retrieveTweets(ID,Q,geoDict) #tweets.extend(feeds) if len(feeds) else 0 #medias = self.retrieveMediaBasedTags(ID,Q,geoDict) #tweets.extend(medias) if len(medias) else 0 ## auxiliary funciton to make it work if len(jobsArgs): logger.warn('Collection is empty invoking worker pools:%s',jobsArgs) def retrieveMedias_helper(args): tweets.extend(self.retrieveMediaBasedTags(*args)[:20]) def retrieveTweets_helper(args): tweets.extend(self.retrieveTweets(*args)[:20]) #pool = Pool(2) #tweets.extend(pool.map(retrieveTweets_helper,jobsArgs)) #tweets.extend(pool.map(retrieveMedias_helper,jobsArgs)) map(retrieveTweets_helper,jobsArgs) map(retrieveMedias_helper,jobsArgs) #pool.close() #pool.join() logger.debug('multiprocessing pool has returned %s feeds',len(tweets)) #tweets = tweets[:20] if globalS.dictDb['APP_DEBUG']: def insertQueryData(twit,*argv): twit.update({'queryDetails':argv}) #return twit map(lambda twit: insertQueryData(twit,ID,Q,geoDict), tweets); #sparkInt.Parallelized(tweets) #feedJson=sparkInt.wowFieldTrueOrFalse(tweets) return tweets