def geocodeFromCacheById(cacheId, inMemoryOnly=None): if inMemoryOnly is None: inMemoryOnly = False if 'importance_rating' in cacheId: importanceRating = cacheId['importance_rating'] cacheId = dict(cacheId) # don't modify what was passed in. del cacheId['importance_rating'] # remove so doesn't conflict with mongoDB query. else: importanceRating = None if isinstance(cacheId, list): success = False for item in cacheId: if isIntendedForDirectUse(GeocodeResultAbstract.getProviderIdFromCacheId(item)): cacheId = item success = True break if not success: logger.error('Could not find useful ID from ID list %s' % (unicode(cacheId))) return None if isIntendedForDirectUse(GeocodeResultAbstract.getProviderIdFromCacheId(cacheId)): tup = GeocodeResultAbstract.buildTupleFromCacheId(cacheId) returnVal = inMemoryCacheGeocodeData.get(tup,None) if returnVal is None: if not inMemoryOnly: db = getDatabase() assert isinstance(db, Database) result = db.place.find_one({'_id' : cacheId}) if result is None: logger.warn('Could not find place cache ID in database: %s' % unicode(cacheId)) return None else: return None returnVal = buildGeocodeResult(result['place_data'], cacheId['providerId'], importanceRating) # Always update, this will move the item to the back of the ordered dict # meaning we have a 'least recently used' cache. if returnVal is not None: inMemoryCacheGeocodeData[tup] = returnVal return returnVal else: geocode = GeocodeResultAbstract.getGnsByPlaceId(GeocodeResultAbstract.getPlaceIdFromCacheId(cacheId)) if geocode is None: logger.error('Failed to retrieve GNS data with cache ID: %s' % unicode(cacheId)) return geocode
def writeGeocodeResultToCache(query, countryCode, acceptableTypes, results): db = getDatabase() assert(isinstance(db, Database)) placeIdList = [] for result in results: assert(isinstance(result,GeocodeResultAbstract)) cacheId = result.cache_id cacheIdForPlaceList = cacheId if result.has_importance_rating: cacheIdForPlaceList = dict(cacheIdForPlaceList) cacheIdForPlaceList['importance_rating'] = result.importance_rating placeIdList.append(cacheIdForPlaceList) # probably should make result.geocodeData part of GeocodeResultAbstract. db.place.update({'_id': cacheId}, {'_id' : cacheId, 'place_data': result.geocodeData}, upsert=True) geocodeId = buildKey(query, countryCode, acceptableTypes) db.geocode.update({'_id': geocodeId}, {'_id' : geocodeId, 'place': placeIdList}, upsert=True)
def _geocodeFromCache(query, providerId, countryCode=None, acceptableTypes=None, inMemoryOnly=None, allowPartial=None): if inMemoryOnly is None: inMemoryOnly = False if allowPartial is None: allowPartial = False geocodeId = buildKey(query, countryCode, acceptableTypes) memoryLookupKey = (geocodeId, countryCode, providerId) queryMapping = inMemoryCacheGeocodeQuery.get(memoryLookupKey) if queryMapping is None: if not inMemoryOnly: db = getDatabase() queryMapping = db.geocode.find_one({'_id': geocodeId, 'place.providerId' : providerId}) if queryMapping is None: return None inMemoryCacheGeocodeQuery[memoryLookupKey] = queryMapping else: return None assert geocodeId == queryMapping['_id'] placeIdList = queryMapping['place'] geocodeResults = [] for placeId in placeIdList: place = geocodeFromCacheById(placeId, inMemoryOnly) if place is not None: geocodeResults.append(place) else: # Don't return partial result if not in memory, but strictly, # we cannot goto database, even if 1 out of 100 records requires it. if inMemoryOnly and not allowPartial: return None if len(geocodeResults) < 0: return None return geocodeResults
def geocodeSearch(providerId, placeName, maxResults = 10): db = getDatabase() assert isinstance(db, Database) logger.info('Searching for location: %s' % placeName) regularExpression = re.compile('%s' % placeName,re.IGNORECASE) mongoPath = '.'.join(['place_data'] + getGeocodeSearchNamePath(providerId)) search = {'_id.providerId' : providerId, mongoPath : regularExpression} cursor = db.place.find(search) results = list() count = 0 for item in cursor: result = buildGeocodeResult(item['place_data'],providerId) if result is not None: results.append(result) count += 1 if count > maxResults: break return results
def getInstanceCodeCollection(): db = getDatabase() return db.instance_codes
def getInstanceLifetimeCollection(): return getDatabase().instance_lifetime
realtimePerformance], userAnalysers=userAnalysers) tweetQueue = resultDic['tweet_queue'] followerExtractorGateThread = resultDic['follower_extractor_gate_thread'] userEnrichFollowersPage = UserFollowerEnrichPage(webApplication, dataCollection, followerExtractorGateThread) # Load tweet queue into web application so that new instances can be created. webApplication.tweet_queue = tweetQueue assert webApplication.tweet_queue is not None if args.clear_geocode_data: logger.info('Clearing geocode data..') db = getDatabase() db.place.drop() db.geocode.drop() logger.info('Geocode data cleared') if args.setup_instance_code: print 'Running in setup instance code mode' try: while True: print 'Setting up instance code...' result = raw_input('Enter the maximum number of instances that can consume this code at any one time: ') result = parseInteger(result,0,default=1) code = getCode(result) print 'Instance code with ID: \'%s\' setup, with consume limit: %d' % (code, result)
from api.caching.caching_shared import getDatabase __author__ = 'Michael Pryor' if __name__ == '__main__': db = getDatabase() db.place.remove() db.geocode.remove()
userAnalysers=userAnalysers) tweetQueue = resultDic['tweet_queue'] followerExtractorGateThread = resultDic['follower_extractor_gate_thread'] userEnrichFollowersPage = UserFollowerEnrichPage( webApplication, dataCollection, followerExtractorGateThread) # Load tweet queue into web application so that new instances can be created. webApplication.tweet_queue = tweetQueue assert webApplication.tweet_queue is not None if args.clear_geocode_data: logger.info('Clearing geocode data..') db = getDatabase() db.place.drop() db.geocode.drop() logger.info('Geocode data cleared') if args.setup_instance_code: print 'Running in setup instance code mode' try: while True: print 'Setting up instance code...' result = raw_input( 'Enter the maximum number of instances that can consume this code at any one time: ' ) result = parseInteger(result, 0, default=1)