def get(self): allCountries = CountryStats.gql('order by count desc').fetch(5000) for c in allCountries: memcache_key='city_queue'+'_'+c.countryCode+str(datetime.datetime.now().date()) if memcache.get(memcache_key): logging.info('processed country %s skipping' % c.countryCode) continue logging.info('adding to queue %s' % c.countryCode) taskqueue.add(queue_name='redundant',url='/stats/country/redundant/task', params={'country':c.countryCode}) memcache.set(memcache_key, 1)
def countryFeed(self, reqId): logging.info('Country stats feed') countryStats= CountryStats.gql('WHERE count > 10 ORDER BY count desc ').fetch(40) if countryStats is None: logging.info('Not enough data for graph') self.repsonse.out.write('Not enough data for graph') return countryStats = [ x for x in countryStats if x.countryCode != 'XX' and x.countryCode != 'EU' ] logging.info('retrieved %s stats' % len(countryStats)) description = {"countryCode": ("string", "Country Code"), "count":("number", "Count")} columnnames = [ "countryCode", "count" ] data_table = gviz_api.DataTable(description) cntrCnt = [] for countryCnt in countryStats: entry = {"countryCode": countryCnt.countryCode, "count":math.log(countryCnt.count)} cntrCnt.append(entry) data_table.LoadData(cntrCnt) self.response.headers['Content-Type'] = 'text/plain' self.response.out.write(data_table.ToJSonResponse(columns_order=(columnnames) , req_id=reqId))
def post(self): countryCode=self.request.get('country',None) memcache_key='reduncant_country'+'_'+countryCode+'_'+str(datetime.datetime.now().date()) if memcache.get(memcache_key): logging.info('already processed %s %s' %countryCode) return redundant=CountryStats.gql('WHERE countryCode = :1', countryCode).fetch(1000) if len(redundant) == 1: logging.info('no duplicates for %s ' % countryCode) return logging.info('country: %s has duplidates: %s' %(countryCode, len(redundant))) pivot = max(redundant, key=redundant.count) logging.info('max stats:%s' % pivot.count) redundant.remove(pivot) for c in redundant: pivot.count += c.count if c.dateUpdated is not None and (pivot.dateUpdated is None or c.dateUpdated > pivot.dateUpdated): pivot.dateUpdated = c.dateUpdated c.delete() pivot.put() memcache.set(memcache_key, '1') logging.info('new count %s' % pivot.count)
def aggregateData(self, currentSession, upper_limit_date): logging.info("agregate data for %s" % currentSession.date) locationData = RequestUtils.ipResolverAPI(currentSession.ip) if len(locationData) == 2: logging.info("updating location data") city = locationData[0] countryCode = locationData[1] # self.response.out.write('location api response:<BR> city : %s; country: %s ' % (locationData[1], locationData[3])) logging.info("location api response: %s " % locationData) userLocation = UserLocationModel() userLocation.user = currentSession.instaright_account userLocation.city = city userLocation.countryCode = countryCode userLocation.date = currentSession.date userLocation.put() # update country stats and city stats logging.info("country update") existingCountryStat = CountryStats.gql("WHERE countryCode = :1 ", countryCode).get() if existingCountryStat: # hack to avoid exception if existingCountryStat.count is None: existingCountryStat.count = 1 else: existingCountryStat.count += 1 logging.info("updating count %s" % existingCountryStat.count) existingCountryStat.dateUpdated = upper_limit_date existingCountryStat.put() else: logging.info("new country") countryStat = CountryStats() countryStat.countryCode = countryCode countryStat.count = 1 countryStat.count = upper_limit_date countryStat.put() logging.info("city update") existingCityStat = CityStats.gql("WHERE city = :1 and countryCode = :2", city, countryCode).get() if existingCityStat: # hack to avoid exception if existingCityStat.count is None: existingCityStat.count = 1 else: existingCityStat.count += 1 existingCityStat.dateUpdated = upper_limit_date logging.info("updating count %s" % existingCityStat.count) existingCityStat.put() else: logging.info("new city") cityStat = CityStats() cityStat.countryCode = countryCode cityStat.city = city cityStat.count = 1 cityStat.updateDate = upper_limit_date cityStat.put() existingLinkStat = LinkStats.gql("WHERE link = :1", currentSession.url).get() logging.info("link stats update") if existingLinkStat: logging.info("new link %s" % currentSession.url) existingLinkStat.count = existingLinkStat.count + 1 existingLinkStat.countUpdated = currentSession.date existingLinkStat.lastUpdatedBy = currentSession.instaright_account existingLinkStat.put() else: logging.info("updating link stats: %s" % currentSession.url) linkStat = LinkStats() linkStat.link = currentSession.url linkStat.count = 1 linkStat.countUpdated = currentSession.date linkStat.lastUpdatedBy = currentSession.instaright_account linkStat.put() # domain update shouldUpdateSession = 0 mode = "" if currentSession.domain is None or currentSession.domain == "": currentSession.domain = RequestUtils.getDomain(currentSession.url) shouldUpdateSession = 1 mode = "domain change: %s" % currentSession.domain if currentSession.date is None or currentSession.date == "": date = datetime.datetime.strptime("2009-11-15", "%Y-%m-%d").date() currentSession.date = date shouldUpdateSession = 2 mode = "date change: %s" % date if shouldUpdateSession > 0: logging.info("updating session mode: %s" % mode) currentSession.put() else: logging.info("unchanged session") logging.info("done data aggregation")