def retrieveMediaBasedTags(self, ID, Q, geoDict): '''instagram feeds this function is hanging correct it ''' passCnt = 0 logger.debug('retrieve instagram medias') #tag search in instagram remove comma #Q.replace(',','') feedJson = instagramInt.retrieveMediaBasedTags(Q, geoDict) if geoDict: logger.debug('geoDict for instagram based media retrieve %s', geoDict) feedJson.extend(instagramInt.getLocationSearch(geoDict)) #else: # # Example addr: 875 N Michigan Ave, Chicago, IL 60611 # results = Geocoder.geocode(Q) # latlng=results.coordinates # logger.debug('google geocode api coordinate pair:%s',latlng) # geoDict.update({'lat':latlng[0]}) # geoDict.update({'lng':latlng[1]}) # geoDict.update({'distance':'.5'})#default radius =500m #logger.debug('geo cord to search in instagram is %s',geoDict) #feedJson.extend(instagramInt.getLocationSearch(geoDict)) #feedJson = json.loads(feedJson) map(lambda tw: tw.update({'created_time': int(tw['created_time'])}), feedJson) #convert string to int logger.debug('store instagram media in mongoDb') #use spark removed unwanted feilds in json & add a key:value #feedJson=sparkInt.wowFieldTrueOrFalse(feedJson) passCnt += mongoInt.insertFeedData(ID, feedJson) # below returning to be removed has to be done from mongoDB only return feedJson
def retrieveTweets(self, ID, Q, geoCode): '''retrieveTweets from twitter and store the feeds into MongoDB ''' passCnt = 0 logger.debug('retrieve tweets') #fetch the latest since_id and pass it in next twitter call #since_id = mongoInt.retrieveSinceID(ID) twits = twitterInt.retrieveTweets(Q, geoCode) map( lambda tw: tw.update({ 'created_time': timegm( time.gmtime( time.strptime(tw['created_at'], "%a %b %d %H:%M:%S +0000 %Y"))) }), twits) #map(lambda tw:tw.update({'created_time': int(time.gmtime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y")))}),twits) #twits = twitterInt.retrieveTweetsBasedHashtag(Q) #if geoCode: # twits.extend(twitterInt.retrieveTweetBasedLocation(geoCode)) logger.debug( 'storing tweets of twitter of both location based on keyword mongoDb' ) #twits=sparkInt.wowFieldTrueOrFalse(twits) if len(twits): passCnt += mongoInt.insertFeedData(ID, twits) else: if not mongoInt.createCollection(ID): logger.warn('unable to create collection in mongodb') #page_sanitized = json_util.dumps(twits) # below returning to be removed has to be done from mongoDB only return twits
def retrieveMediaBasedTags(self,ID,Q,geoDict): '''instagram feeds this function is hanging correct it ''' passCnt = 0 logger.debug('retrieve instagram medias') #tag search in instagram remove comma #Q.replace(',','') feedJson = instagramInt.retrieveMediaBasedTags(Q,geoDict) if geoDict: logger.debug('geoDict for instagram based media retrieve %s',geoDict) feedJson.extend(instagramInt.getLocationSearch(geoDict)) #else: # # Example addr: 875 N Michigan Ave, Chicago, IL 60611 # results = Geocoder.geocode(Q) # latlng=results.coordinates # logger.debug('google geocode api coordinate pair:%s',latlng) # geoDict.update({'lat':latlng[0]}) # geoDict.update({'lng':latlng[1]}) # geoDict.update({'distance':'.5'})#default radius =500m #logger.debug('geo cord to search in instagram is %s',geoDict) #feedJson.extend(instagramInt.getLocationSearch(geoDict)) #feedJson = json.loads(feedJson) map(lambda tw:tw.update({'created_time': int(tw['created_time'])}),feedJson) #convert string to int logger.debug('store instagram media in mongoDb') #use spark removed unwanted feilds in json & add a key:value #feedJson=sparkInt.wowFieldTrueOrFalse(feedJson) passCnt += mongoInt.insertFeedData(ID,feedJson) # below returning to be removed has to be done from mongoDB only return feedJson
def insertFeedData(self,collName,docs): ''' mock for mongo int inserting documents ''' passCnt = 0 if len(docs): passCnt += mongoInt.insertFeedData(collName,docs) else: logger.debug('docList is empty') if not mongoInt.createCollectionIfnot(collName): logger.warn('unable to create collection in mongodb') return passCnt
def retrieveTweets(self,ID,Q,geoCode): '''retrieveTweetsBasedHashtag from twitter ''' passCnt = 0 logger.debug('retrieve tweets') twits = twitterInt.retrieveTweets(Q,geoCode) #twits = twitterInt.retrieveTweetsBasedHashtag(Q) #if geoCode: # twits.extend(twitterInt.retrieveTweetBasedLocation(geoCode)) logger.debug('storing tweets of twitter of both location baseed on keyworad mongoDb') #twits=sparkInt.wowFieldTrueOrFalse(twits) passCnt += mongoInt.insertFeedData(ID,twits) #page_sanitized = json_util.dumps(twits) # below returning to be removed has to be done from mongoDB only return twits
def retrieveTweets(self,ID,Q,geoCode): '''retrieveTweets from twitter and store the feeds into MongoDB ''' passCnt = 0 logger.debug('retrieve tweets') #fetch the latest since_id and pass it in next twitter call #since_id = mongoInt.retrieveSinceID(ID) twits = twitterInt.retrieveTweets(Q,geoCode) map(lambda tw:tw.update({'created_time': str(int(time.mktime(time.strptime(tw['created_at'],"%a %b %d %H:%M:%S +0000 %Y"))))}),twits) #twits = twitterInt.retrieveTweetsBasedHashtag(Q) #if geoCode: # twits.extend(twitterInt.retrieveTweetBasedLocation(geoCode)) logger.debug('storing tweets of twitter of both location based on keyword mongoDb') #twits=sparkInt.wowFieldTrueOrFalse(twits) if len(twits): passCnt += mongoInt.insertFeedData(ID,twits) else: if not mongoInt.createCollection(ID): logger.warn('unable to create collection in mongodb') #page_sanitized = json_util.dumps(twits) # below returning to be removed has to be done from mongoDB only return twits