def post(self): broadcaster = BroadcastMessage() userUtil = UserUtils() title = self.request.get('title', None) link = self.request.get('link', None) domain = self.request.get('domain', None) user_id = self.request.get('user_id', None) updated = self.request.get('updated', None) embeded = self.request.get('e', None) link_category = self.request.get('link_category', None) subscribers = simplejson.loads(self.request.get('subscribers', None)) message = Message( title = title, link = link , domain = domain) user = SessionModel.gql('WHERE __key__ = :1', db.Key(user_id)).get() if user is None: logging.info('can\'t determine user by id: %s' % user_id) return logging.info('user %s' % user.instaright_account) avatar = userUtil.getAvatar(user.instaright_account) logging.info('avatar %s' %avatar) messageAsJSON = [{'u':{'id':user_id, 't':title,'ol':link, 'l':LinkUtils.generate_instaright_link(user.url_encode26, LinkUtils.make_title(title)),'d':domain,'dd': LinkUtils.generate_domain_link(domain), 'a':avatar, 'u':updated, 'source': user.client, 'lc':link_category, 'html_lc':LinkUtils.getLinkCategoryHTML(user), 'e': embeded, 'n': int(time.mktime(datetime.datetime.now().timetuple()))}}] logging.info('sending message %s ' %messageAsJSON) broadcaster.send_message(messageAsJSON) xmpp_handler.send_message(subscribers, message)
def getBadge(self): returnBadge=5 existingBadge=UserBadge.gql('WHERE user = :1 and badge = :2', self.user, returnBadge).get() if existingBadge is not None: logging.info('Already assigned 5 day usage badge. Skipping.') return None if self.version is None: logging.info('Older version of addon not usage badge defined!') return None yesterday=datetime.datetime.now().date() - datetime.timedelta(days=1) limit=datetime.datetime.now().date() - datetime.timedelta(days=4) active=True while yesterday >= limit: s=SessionModel.gql('WHERE date = :1 and instaright_account = :2', yesterday, self.user).get() if s is None: logging.info('user %s NOT active for date %s' %(self.user, yesterday)) active=False return None else: logging.info('user %s active for date %s' %(self.user, yesterday)) yesterday-=datetime.timedelta(days=1) if active: logging.info('user %s has been active in last %s' %(self.user, returnBadge)) return '5' logging.info('usage badge %s: not initialized' %self.user)
def get(self): memcache_key='feed_json_cache' cached_feed= memcache.get(memcache_key) format = self.request.get('format', None); cache_exp = datetime.datetime.now() + datetime.timedelta(minutes=5) cache_exp_ts = time.mktime(cache_exp.timetuple()) userUtil = UserUtils() if format == 'json' and cached_feed: logging.info('getting json from cache') self.response.headers['Content-Type'] = "application/json" self.response.out.write(simplejson.dumps(cached_feed, default=lambda o: {'u':{'id':str(o.key()), 't':unicode(o.title), 'dd': LinkUtils.generate_domain_link(o.domain), 'd':o.domain, 'user': urllib.unquote(o.instaright_account), 'source': o.client, 'u': int(time.mktime(o.date.timetuple())), 'l':LinkUtils.generate_instaright_link(o.url_encode26,LinkUtils.make_title(o.title)),'a':userUtil.getAvatar(o.instaright_account),'ol':o.url, 'lc':LinkUtils.getLinkCategory(o), 'html_lc':LinkUtils.getLinkCategoryHTML(o), 'e': o.embeded, 'n': int(time.mktime(datetime.datetime.now().timetuple()))}})) return entries = SessionModel.gql('ORDER by date DESC').fetch(10) memcache.set(memcache_key, entries, time = cache_exp_ts) if not entries: self.response.out.write('Nothing here') #now = datetime.datetime.now().strftime("%Y-%m-%dT%H\:%i\:%sZ") if format is None or format == 'xml' or format == 'valid_xml': updated_entries = [ (str(o.key()), unicode(o.title), LinkUtils.generate_domain_link(o.domain), LinkUtils.generate_instaright_link(o.url_encode26,LinkUtils.make_title(o.title)),userUtil.getAvatar(o.instaright_account), o.date, LinkUtils.generate_instaright_link(o.url_encode26,LinkUtils.make_title(o.title)) ) for o in entries ] template_variables = { 'entries' : updated_entries, 'dateupdated' : datetime.datetime.today()} if format == 'valid_xml': path= os.path.join(os.path.dirname(__file__), 'templates/feed_valid.html') else: path= os.path.join(os.path.dirname(__file__), 'templates/feed.html') self.response.headers['Content-Type'] = "application/atom+xml" self.response.out.write(template.render(path,template_variables)) return if format == 'json': self.response.headers['Content-Type'] = "application/json" self.response.out.write(simplejson.dumps(entries, default=lambda o: {'u':{'id':str(o.key()), 't':unicode(o.title), 'dd': LinkUtils.generate_domain_link(o.domain), 'd':o.domain, 'user': o.instaright_account, 'u': int(time.mktime(o.date.timetuple())), 'l':LinkUtils.generate_instaright_link(o.url_encode26,LinkUtils.make_title(o.title)), 'a':userUtil.getAvatar(o.instaright_account),'ol':o.url, 'source': o.client, 'e': o.embeded, 'lc':LinkUtils.getLinkCategory(o), 'html_lc':LinkUtils.getLinkCategoryHTML(o), 'n': int(time.mktime(datetime.datetime.now().timetuple()))}})) return
def get(self, url_hash, title): try: self.redirect_perm() self.get_user() url_hash = urllib.unquote(url_hash) logging.info('url hash: %s' % url_hash) logging.info('category screen_name %s' %self.screen_name) category=None if self.avatar is None: self.avatar='/static/images/noavatar.png' sessionModel = SessionModel.gql('where url_encode26 = :1', url_hash).get() if sessionModel is None: logging.info('not article with hash %s ... redirecting' % url_hash) self.redirect('/') return generated_title = LinkUtils.make_title(sessionModel.title) if title != generated_title: self.redirect('/article/'+url_hash+'/'+generated_title) return instaright_link = LinkUtils.generate_instaright_link(url_hash, generated_title) links = Links.gql('where url_hash = :1', url_hash).get() userUtil = UserUtils() if links is not None: category = links.categories sessionTitle = LinkUtils.generateUrlTitle(sessionModel.title) template_variables = {'page_footer': PageUtils.get_footer(), 'user':self.screen_name, 'logout_url':'/account/logout', 'avatar':self.avatar,'story_avatar': userUtil.getAvatar(sessionModel.instaright_account), 'story_user': sessionModel.instaright_account, 'domain': sessionModel.domain, 'title':sessionModel.title, 'link': sessionModel.url, 'updated':sessionModel.date, 'id': str(sessionModel.key()), 'instaright_link': instaright_link, 'category': LinkUtils.getLinkCategoryHTML(sessionModel), 'dd': LinkUtils.generate_domain_link(sessionModel.domain)} path = os.path.join(os.path.dirname(__file__), 'templates/article.html') self.response.headers["Content-Type"] = "text/html; charset=utf-8" self.response.out.write(template.render(path, template_variables)) except: e,e0 = sys.exc_info()[0], sys.exc_info()[1] logging.error('handled error : %s, %s ' %( e, e0 )) self.redirect('/')
def getnytbadge(self): midnight = datetime.datetime.now().date() nyTotal=SessionModel.gql('where domain in :1 and date >= :2 and instaright_account = :3', self.nyDomains, midnight, self.user).count() logging.info('site specific badger(NY): fetched stats %s' % nyTotal) if nyTotal >= self.ny_tresshold: logging.info('setting ny badge for user %s ' %self.user) return 'ny' else: logging.info('for user %s still tresshold of %s still not reached %s' %(self.user, self.ny_tresshold, nyTotal)) return None
def get(self, c): logging.info('updates for category %s' % c) lc=LinkCategory.gql('WHERE category = :1 order by updated desc', c).fetch(50) for l in lc: if hasattr(l,'model_details') and l.model_details is not None: logging.info('url %s already has details, skipping update' %l.url) continue logging.info('updating url details %s ' %l.url) s=SessionModel.gql('WHERE url = :1', l.url).get() if s is None: s=SessionModel.gql('WHERE feed_url = :1', l.url).get() if s is None: s=SessionModel.gql('WHERE feed_url = :1', l.url).get() if s is None: logging.info('ERROR: no session model url for %s' % l.url) continue logging.info('session model for url %s FOUND' %l.url) l.model_details=s.key() l.put()
def getmoviebadge(self): midnight = datetime.datetime.now().date() currentCount=SessionModel.gql('where domain in :1 and date >= :2 and instaright_account = :3', self.movieDomains, midnight, self.user).count() logging.info('site specific badger(movie): fetched stats %s' % currentCount) if currentCount >= self.movie_tresshold: logging.info('setting movie badge for user %s ' %self.user) return 'movie' else: logging.info('for user %s still tresshold of %s still not reached %s' %(self.user, self.movie_tresshold, currentCount)) return None
def get(self, user): if user is None or len(user) == 0: logging.error('Empty user. Skipping') return user_decoded = urllib.unquote(user) sessions = SessionModel.gql('WHERE instaright_account = :1 ORDER by date desc ' , user_decoded).fetch(1000) links = [ s.url for s in sessions ] template_variables = {'links' : links } path= os.path.join(os.path.dirname(__file__), 'templates/user_links.html') self.response.headers["Content-type"] = "text/html" self.response.out.write(template.render(path,template_variables))
def post(self): key = self.request.get("sessionKey", None) upper_limit_date = self.request.get("update_limit_date", None) currentSession = None currentSessionKey = db.Key(key) if currentSessionKey is not None: currentSession = SessionModel.gql("WHERE __key__ = :1", currentSessionKey).get() if currentSession is None: logging.info("Can't process None session model") return self.aggregateData(currentSession, upper_limit_date)
def get(self, user): if user is None or len(user) == 0: logging.error('Empty user. Skipping') return user_decoded = urllib.unquote(user) logging.info('user: %s' %user_decoded) ud=UserDetails.gql('WHERE instapaper_account = :1' , user_decoded).get() if ud is None: logging.info('non existing user. redirect to home') self.redirect('/') return # sanity check if ud.instaright_account is None: ud.instaright_account = ud.instapaper_account ud.put() memcache_key ='user_info_' + user_decoded+'_'+str(datetime.datetime.now().date()) sessions = SessionModel.gql('WHERE instaright_account = :1 ORDER by date desc ' , user_decoded).fetch(100) links = [ s for s in sessions if s is not None ] cached_info = memcache.get(memcache_key) if cached_info: logging.info('getting from cache' ) template_variables = {'user':cached_info,'links':links} path= os.path.join(os.path.dirname(__file__), 'templates/user_info.html') self.response.headers["Content-type"] = "text/html" self.response.out.write(template.render(path,template_variables)) return user_detail= UserDetails.gql('WHERE mail = :1', user_decoded).get() if user_detail is None: logging.info('new user %s added to queue' %user_decoded) fetch_url = '/user/'+user+'/fetch' taskqueue.add(queue_name='user-info', url= fetch_url) ud = UserDetails() ud.name = user_decoded ud.instapaper_account = user_decoded ud.instaright_account = user_decoded ud.links_added = SessionModel.countAllForUser(user_decoded) # tmp put until we find more info for user ud.put() template_variables = {'user':ud, 'links': links} path= os.path.join(os.path.dirname(__file__), 'templates/user_info.html') self.response.headers["Content-type"] = "text/html" self.response.headers["Accept-Charset"] = "utf-8" self.response.out.write(template.render(path,template_variables)) return if user_detail.instaright_account is None: user_detail.instaright_account = user_decoded user_detail.put() memcache.set(memcache_key, user_detail) template_variables = {'user':user_detail, "links" : links} path= os.path.join(os.path.dirname(__file__), 'templates/user_info.html') self.response.headers["Content-type"] = "text/html" self.response.out.write(template.render(path,template_variables))
def getsportbadge(self): midnight = datetime.datetime.now().date() currentCount=SessionModel.gql('where domain in :1 and date >= :2 and instaright_account = :3', self.sportDomains, midnight, self.user).count() categoryCount = LinkCategory.gql('WHERE category in :1 and date >= :2', self.sportCategories, midnight).fetch(1000) categoryRefined = [ lc for lc in categoryCount if lc.model_details.instaright_account == self.user ] cat_user_count = len(categoryRefined) logging.info('site specific badger(sport): fetched stats %s and category count %s' % (currentCount, cat_user_count)) if currentCount + cat_user_count >= self.sport_tresshold: logging.info('setting news badge for user %s ' %self.user) return 'sport' else: logging.info('for user %s still tresshold of %s still not reached %s' %(self.user, self.sport_tresshold, currentCount)) return None
def test_update_encode(self): e = EncodeUtils() ss = SessionModel.gql('ORDER by url_counter_id desc').fetch(1000) test = SessionModel.countAllForUser('*****@*****.**') logging.info('count for user %s' % test) logging.info("fetch %s " %len(ss)) for s in ss: cnt = s.url_counter_id encode26 = e.enbase(cnt) logging.info("e26: before %s after %s" % (s.url_encode26, encode26)) s.url_encode26 = encode26 #s.put() self.assertEquals(True, True)
def get(self): dateStr = self.request.get("date", None) if dateStr is None: date = datetime.datetime.now().date() - datetime.timedelta(days=1) else: date = datetime.datetime.strptime(dateStr, "%Y-%m-%d").date() upperLimitDate = date + datetime.timedelta(days=1) cursor = None query = SessionModel.gql(" WHERE date >= :1 and date < :2 ", date, upperLimitDate) data = query.fetch(1000) logging.info("initial fetch got: %s" % len(data)) taskqueue.add(queue_name="data-consolidation", url="/user_consolidation", params={"date": date}) logging.info("added to queue task") while len(data) == 1000: cursor = query.cursor() query = SessionModel.gql(" WHERE date >= :1 and date < :2 ", date, upperLimitDate).with_cursor(cursor) data = query.fetch(1000) logging.info("fetch got: %s" % len(data)) taskqueue.add( queue_name="data-consolidation", url="/user_consolidation", params={"date": date, "last_cursor": cursor} ) logging.info("added to queue task")
def getBadge(self): midnight = datetime.datetime.now().date() currentCount=SessionModel.gql('WHERE date >= :1 and instaright_account = :2', midnight, self.user).count() logging.info('current daily user count : %s -> %s' %(self.user, currentCount)) if currentCount >= 105: return '105' if currentCount >= 65: return '65' if currentCount >= 55: return '55' if currentCount >= 25: return '25' logging.info('speed limit badge %s: not initialized' %self.user) return None
def get(self): feedproxy = "feedproxy.google.com" # 00:00:00 today = datetime.datetime.now().date() feedLinks = SessionModel.gql("WHERE domain= :1 and date < :2", feedproxy, today).fetch(5000) logging.info("fetched feedproxy links %s" % len(feedLinks)) for l in feedLinks: memcache_key = "link_transform" + str(l.key()) + "_" + str(datetime.datetime.now().date()) if memcache.get(memcache_key): logging.info("allready processed link %s" % l.url) continue logging.info("transforming link %s" % l.url) taskqueue.add(queue_name="default", url="/link/transform/feed", params={"key": l.key()}) memcache.set(memcache_key, 1)
def get(self): short_bitly = "bit.ly" # 00:00:00 today = datetime.datetime.now().date() shortLinks = SessionModel.gql("WHERE domain= :1 and date < :2", short_bitly, today).fetch(5000) # TODO identify other shortners logging.info("fetched short links %s" % len(shortLinks)) for l in shortLinks: memcache_key = "link_transform" + str(l.key()) + "_" + str(datetime.datetime.now().date()) if memcache.get(memcache_key): logging.info("allready processed link %s" % l.url) continue logging.info("transforming link %s" % l.url) taskqueue.add(queue_name="link-queue", url="/link/transform/short", params={"key": l.key()}) memcache.set(memcache_key, 1)
def get(self): allData=LinkCategory.getAll() all_categories= [ c.category for c in allData if c is not None ] uniq_categories = set(all_categories) for c in uniq_categories: logging.info('updates for category %s' % c) lc=LinkCategory.gql('WHERE category = :1 order by updated desc', c).fetch(50) for l in lc: if hasattr(l,'model_details') and l.model_details is not None: #logging.info('url %s already has details, skipping update' %l.url) continue logging.info('updating url details %s ' %l.url) s=SessionModel.gql('WHERE url = :1 order by date desc', l.url).get() if s is None: logging.info('no session model for url %s trying feed url' %l.url) s=SessionModel.gql('WHERE feed_url = :1', l.url).get() if s is None: logging.info('no session model for url %s trying shprt url' %l.url) s=SessionModel.gql('WHERE feed_url = :1', l.url).get() if s is None: logging.info('ERROR: no session model url for %s' % l.url) continue l.model_details=s.key() l.put()
def get(self, domain): format=self.request.get('format',None) if domain is None or len(domain) == 0: logging.info('not category in request. return empty') return if format == 'json': logging.info('domain %s json feed' % domain) userUtil = UserUtils() entries = SessionModel.gql('WHERE domain = :1 order by date desc', domain).fetch(10) self.response.headers['Content-Type'] = "application/json" #TODO insert categories for domain's view self.response.out.write(simplejson.dumps(entries, default=lambda o: {'u':{'id':str(o.key()), 't':unicode(o.title), 'l': LinkUtils.generate_instaright_link(o.url_encode26, LinkUtils.make_title(o.title), o.url), 'user': urllib.unquote(o.instaright_account), 'source': o.client, 'html_lc':LinkUtils.getLinkCategoryHTML(o), 'd': o.domain, 'lc': LinkUtils.getLinkCategory(o), 'dd':LinkUtils.generate_domain_link(o.domain), 'u': o.date.strftime("%Y-%m-%dT%I:%M:%SZ"), 'a':userUtil.getAvatar(o.instaright_account),'ol':o.url}})) return self.response.headers['Content-Type'] = "application/json" self.response.out.write("[{}]")
def get(self): #redirect from appengine domain self.redirect_perm() self.get_user() if self.user_uuid is None or len(str(self.user_uuid)) == 0 or self.screen_name is None or self.user_detail_key is None: logging.info('No cookies, redirecting to home page') self.redirect('/?redirect=/user/dashboard&show_login=1') return logging.info('user: %s' %self.instaright_account) sessions = SessionModel.gql('WHERE instaright_account = :1 ORDER by date desc ' , self.instaright_account).fetch(self.link_batch) score = 0 links = None if sessions is not None: links = [ s for s in sessions if s is not None ] ud_key=db.Key(self.user_detail_key) logging.info('user detail key %s' % self.user_detail_key) template_variables=[] now=datetime.datetime.now().date() #start_of_week= time.asctime(time.strptime('%s %s 1' %(now.year, now.isocalendar()[1]), '%Y %W %w')) start_of_week= datetime.datetime.strptime('%s %s 1' %(now.year, now.isocalendar()[1]), '%Y %W %w') memcache_key='user_'+self.user_detail_key+'_score' cached_score=memcache.get(memcache_key) if cached_score is not None: logging.info('got score from cache( %s ): %s' %( memcache_key, cached_score )) score=cached_score else: logging.info('parameters: start of week %s now %s for user_key %s ' % ( start_of_week,now, ud_key)) score_entities = ScoreUsersDaily.gql('WHERE user = :1 and date >= :2', ud_key, start_of_week).fetch(100) #score_entities = ScoreUsersDaily.gql('WHERE user = :1 and date <= :2 and date >= :3', ud_key, now , start_of_week).fetch(100) logging.info('got %s score entities' % len(score_entities)) if score_entities is not None: scores = [ s.score for s in score_entities if s is not None ] score=sum(scores) logging.info('calculated score : %s' % score ) exp_ts=time.mktime((datetime.datetime.now() + datetime.timedelta(days=1)).timetuple()) memcache.set(memcache_key, score, time=exp_ts) badges = None all_badges = UserBadge.gql('WHERE user = :1 order by date desc', self.instaright_account).fetch(1000) if all_badges is not None: badges = set([ (b.badge, b.badge_property.badge_desc) for b in all_badges if b is not None and b.badge_property is not None ]) template_variables = {'user':self.screen_name, 'avatar':self.avatar,'instaright_account':self.instaright_account,'facebook_token':self.facebook_oauth_token,'facebook_profile': self.facebook_profile, 'twitter_profile': self.twitter_profile, 'twitter_token': self.twitter_oauth_token, 'google_profile': self.google_profile, 'google_token':self.google_oauth_token, 'picplz_profile': self.picplz_name, 'picplz_token': self.picplz_oauth_token, 'evernote_profile': self.evernote_name, 'evernote_token': self.evernote_oauth_token, 'links':links, 'score': score, 'visible_items_num': self.link_batch, 'badges': badges,'logout_url':'/account/logout'} logging.info('templates %s' %template_variables) path= os.path.join(os.path.dirname(__file__), 'templates/user_info.html') self.response.headers["Content-type"] = "text/html" self.response.out.write(template.render(path,template_variables))
def post(self): domain = self.request.get('domain',None) if domain is None: logging.info('no domain in request') logging.info('fetching categories for domain %s' % domain) memcache_key='domain_lookup_%s' % domain logging.info('checking cache for key %s' %memcache_key) if memcache.get(memcache_key) is None: logging.info('domain already processed skipping. key %s expires %s' % (memcache_key, memcache.get(memcache_key))) return else: next_week=datetime.datetime.now().date() + datetime.timedelta(days=2) next_week_ts=time.mktime(next_week.timetuple()) memcache.set(memcache_key,1,time=next_week_ts) sessions = SessionModel.gql('WHERE domain = :1', domain).fetch(1000) for s in sessions: logging.info('task: determine categories for url %s ( domain: %s)' % (s.url, domain)) taskqueue.add(queue_name='category-queue', url='/link/category/task', params={'url':s.url, 'domain':domain})
def post(self): k = self.request.get('key') key = db.Key(k) s = SessionModel.gql('WHERE __key__ = :1', key).get() util = LinkUtils() long_url=util.getShortOriginalUrl(s.url) if long_url is None: logging.info('could not retrieve long link.skipping') return logging.info('expanded url: %s' % long_url) if long_url.startswith('itms://'): logging.info('Skipping itunes item: %s' % long_url) return domain = RequestUtils.getDomain(long_url) s.short_url = s.url s.url = long_url s.domain = domain s.put() util.updateStats(s)
def post(self): user_details_key = self.request.get("user_details_key", None) if user_details_key is None: logging.info("user details key not defined ... skipping services submit") return session_key = self.request.get("session_key", None) if session_key is None: logging.info("session key not defined ... skipping services submit") return session = SessionModel.gql("WHERE __key__ = :1", db.Key(session_key)).get() user_token = UserTokens.gql("WHERE user_details = :1", db.Key(user_details_key)).get() if user_token is None: logging.info("skipping service submit no tokens found") return service_util = ServiceUtil() evernote_token = user_token.evernote_token evernote_token_additional_info = user_token.evernote_additional_info evernote_enabled = user_token.evernote_enabled flickr_token = user_token.flickr_token flickr_token_additional_info = user_token.flickr_additional_info flickr_enabled = user_token.flickr_enabled facebook_token = user_token.facebook_token facebook_enabled = user_token.facebook_enabled twitter_token = user_token.twitter_token twitter_secret = user_token.twitter_secret twitter_enabled = user_token.twitter_enabled picplz_token = user_token.picplz_token picplz_enabled = user_token.picplz_enabled if ( evernote_token is not None and evernote_enabled == True and session.selection is not None and session.selection != "None" ): service_util.send_to_evernote(urllib.unquote(evernote_token), session, evernote_token_additional_info) if picplz_token is not None and session.isImage(): service_util.send_to_picplz(picplz_token, session)
def domainScore(cls,user, domain): score=0 if user is None or domain is None: logging.info('domain score not enpugh data ... skipping') return score logging.info('domain score calc for for user %s' %user) config=ConfigParser.ConfigParser() config.read(os.path.split(os.path.realpath(__file__))[0]+'/../properties/score.ini') domain_points=int(config.get('domain_points','new_domain')) domain_memcache_key='visit_'+user+'_domain_'+domain visitedDomain=memcache.get(domain_memcache_key) if visitedDomain is None: visitedDomain=SessionModel.gql('WHERE domain = :1 and instaright_account = :2', domain, user).get() if visitedDomain is None: logging.info('new domain %s score for %s ' %(domain, user)) score=domain_points memcache.set(domain_memcache_key, '1') else: logging.info('user %s already visited domain %s ' %(user, domain)) return score
def post(self): k=self.request.get('key',None) if k is None: logging.info('error key has not been specified') return key=db.Key(k) if key is None: logging.info('error not valid key') return s = SessionModel.gql('WHERE __key__ = :1', key).get() logging.info('feedproxt url %s' % unicode(s.url)) util = LinkUtils() url = util.getFeedOriginalUrl(s.url) if url is None: logging.info('could not fetch original url. skipping.') return logging.info('original url %s' % url) domain = RequestUtils.getDomain(url) s.domain = domain s.feed_url=s.url s.url=url s.put() util.updateStats(s)
def get(self): logging.info('fetching more user links ...') cookie = self.request.get('cookie', None) offset = Cast.toInt(self.request.get('offset', None), 0) logging.info('row offset %s' % offset) offset = offset * self.link_batch ud = UserUtils.getUserDetailsFromCookie(cookie) sessions = SessionModel.gql('WHERE instaright_account = :1 ORDER by date desc ', ud.instaright_account ).fetch(self.link_batch,offset) if sessions is None or len(sessions) == 0: logging.info('returned no sessions for offset %s' %offset) self.response.headers["Content-type"] = "application/json" self.response.out.write('{}') return logging.info('fetched %s sessions for user %s' %(len(sessions), ud.instaright_account)) d = {} for d_te, j in itertools.groupby(sessions, key= lambda s: s.date.date()): ss = [ {'t':ss.title,'l':ss.url,'d':ss.domain,'h':ss.url_hash} for ss in list(j) ] d[str(d_te)] = ss import operator #order by dates desc dates_sorted=sorted(d.iteritems(), key=operator.itemgetter(0), reverse=True) self.response.headers["Content-type"] = "application/json" self.response.out.write(simplejson.dumps(dates_sorted))
def linkScore(cls,user, link): score=0 if user is None or link is None: logging.info('link score not enpugh data ... skipping') return score logging.info('link score ...') config=ConfigParser.ConfigParser() config.read(os.path.split(os.path.realpath(__file__))[0]+'/../properties/score.ini') link_points=int(config.get('link_points','new_link')) link_memcache_key='visit_'+user+'_domain_'+link visitedLink=memcache.get(link_memcache_key) if visitedLink is None: try: visitedLink=SessionModel.gql('WHERE url = :1 and instaright_account = :2', link, user).get() except: logging.info('expection fetching %s' % link) if visitedLink is None: logging.info('new link %s score for %s ' %(link, user)) score=link_points memcache.set(link_memcache_key, '1') else: logging.info('user %s already visited link %s ' %(user, link)) return score
def post(self): category=self.request.get('category', None) url_hash=self.request.get('url', None) userUtil=UserUtils() if category is None or len(category) == 0: logging.info('no category in request. skipping ...') return if url_hash is None: logging.info('no url in request. skipping ...') return model = SessionModel.gql('WHERE url_hash = :1 order by date desc', url_hash).get() if model is None: logging.error('no session model for url hash %s ' %url_hash) return category_path='/category/%s' %category broadcaster = BroadcastMessage() date_published='' if model.date is not None: date_published=model.date.strftime("%Y-%m-%dT%I:%M:%SZ") messageAsJSON = [{'u':{'id':str(model.key()), 't':unicode(model.title),'l':model.url,'d':model.domain,'u': date_published, 'a':userUtil.getAvatar(model.instaright_account),'ol':model.url,'c':category, 'lc':category}}] logging.info('sending category message %s for users on path %s' % (messageAsJSON, category_path)) broadcaster.send_message(messageAsJSON,category_path)
def post(self): url=self.request.get('url',None) url_hash = LinkUtils.getUrlHash(url) if url is None: logging.info('no link in request. skipping') return category_api='http://access.alchemyapi.com/calls/url/URLGetCategory?apikey=%s&url=%s&outputMode=json' %(self.alchemy_key, urllib2.quote(url.encode('utf-8'))) logging.info('trying to fetch shared count info %s' %category_api) link=None language=None category=None try: link = Links.gql('WHERE url_hash = :1', url_hash).get() if link is None: link = Links.gql('WHERE url = :1', url).get() except BadValueError: logging.info('url property too long') if link is None: link = Links() else: link.date_updated = datetime.datetime.now().date() json = LinkUtils.getJsonFromApi(category_api) if json is None: logging.info('alchemy api returned no category.skipping') return try: language=json['language'] category=json['category'] score=Cast.toFloat(json['score'],0) if score is not None and score > 0.5 and category is not None: logging.info('category %s score %s' %(category, score)) cats=category.split("_") if cats is None: logging.info('no categories. exit') return memcache_key=url_hash+'_category' current_categories=memcache.get(memcache_key) merge_cat=[] if current_categories is not None: logging.info('merging with existing cats %s' %current_categories) merge_cat.extend(current_categories) merge_cat.extend(cats) else: merge_cat=cats model=None try: model=SessionModel.gql('WHERE url_hash = :1 order by date desc', url).get() if model is None: model=SessionModel.gql('WHERE url = :1 order by date desc', url).get() except BadValueError: logging.info('url too long ... %s' %url) if model is None: logging.info('model not defined ... skipping') return linkDetail=Links.gql('WHERE url_hash = :1' , url_hash).get() if linkDetail is None: linkDetail=Links.gql('WHERE url = :1' , url).get() if linkDetail is not None and linkDetail.categories is not None: logging.info('category found from link details %s' % linkDetail.categories) delic_cats=eval(linkDetail.categories) d_cats=[ c for c in delic_cats ] merge_cat.extend(d_cats) merge_cat=set(merge_cat) logging.info('caching cats %s for url %s' %(merge_cat, url)) memcache.set(memcache_key, list(set(merge_cat))[:4]) for c in merge_cat: taskqueue.add(queue_name='message-broadcast-queue', url='/category/stream', params={'category':c, 'url': url_hash}) existingLinkCat = LinkCategory.gql('WHERE url_hash = :1 and category = :2', url_hash, c).get() if existingLinkCat is None: existingLinkCat = LinkCategory.gql('WHERE url = :1 and category = :2', url, c).get() if existingLinkCat is not None: existingLinkCat.updated=datetime.datetime.now() if existingLinkCat.url_hash is None: existingLinkCat.url_hash = url_hash existingLinkCat.put() logging.info('updated exisitng url(%s) category(%s) update time %s' % (url, c, existingLinkCat.updated)) else: logging.info('new pair: url%s) category(%s) ' % (url, c)) linkCategory=LinkCategory() linkCategory.url=url linkCategory.url_hash = url_hash linkCategory.category=c if model is not None: linkCategory.model_details=model.key() linkCategory.put() if language is not None: link.language = language link.url=url link.url_hash=url_hash link.put() except KeyError: e0, e1 = sys.exc_info()[0],sys.exc_info()[1] logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))
def post(self): user=self.request.get('user',None) user=urllib.unquote(user) url=self.request.get('url',None) domain=self.request.get('domain',None) title=self.request.get('title',None) share_mode=self.request.get('share_mode',None) if not RequestUtils.checkUrl(url): logging.info('skipping since url is not good!') return lu = LinkUtils() link_info = lu.getLinkInfo(url) description = link_info["d"] embeded = link_info["e"] logging.info('got post title %s' % title) title_new = link_info["t"] if title is None and title_new is not None and len(title_new) > 0: title = title_new if title is None or title == 'None' or title == 'null': title=LinkUtils.getLinkTitle(url) if title is not None: title = title[:199] logging.info('final link title %s' %title) logging.info("link info desc: %s embede: %s" %( description, embeded)) version=self.request.get('version',None) client=self.request.get('client',None) selection = self.request.get('selection', None) user_agent = self.request.get('user_agent',None) UserScoreUtility.updateLinkScore(user,url) UserScoreUtility.updateDomainScore(user, domain) taskqueue.add(url='/user/badge/task', queue_name='badge-queue', params={'url':url, 'domain':domain, 'user':user, 'version': version, 'client': client}) taskqueue.add(url='/link/traction/task', queue_name='link-queue', params={'url':url, 'user': user, 'title': title}) taskqueue.add(url='/link/recommendation/task', queue_name='default', params={'url':url }) name = "url" generic_counter.increment(name) url_cnt = generic_counter.get_count(name) logging.info("total url count %s " % url_cnt) e = EncodeUtils() enbased=e.encode(url_cnt) url_encode26 = e.enbase(enbased) logging.info("url encode: %s and enbase : %s" % (enbased, url_encode26)) url_hash = LinkUtils.getUrlHash(url) today = datetime.datetime.now().date() model = SessionModel.gql('WHERE instaright_account = :1 and url_hash = :2 and date > :3', user, url_hash, today).get() new_entity=False if model is None: logging.info('did not find save dafined by: %s %s for date %s', user, url, str(today)) model = SessionModel() new_entity=True else: logging.info('existing url(key %s) updating certain params' %str(model.key())) logging.info('link: %s title: %s' %(url, title)) try: #remove for local testing model.ip = self.request.remote_addr model.instaright_account = user model.date = datetime.datetime.now() if new_entity == True: model.url = url model.url_hash = url_hash model.url_counter_id = url_cnt model.url_encode26 = url_encode26 model.title = title model.user_agent=user_agent model.domain = domain model.short_link = None model.feed_link = None model.version = version model.client = client model.selection = selection model.embeded = embeded while True: timeout_ms= 100 try: model.put() break except datastore_errors.Timeout: logging.info('model save timeout retrying in %s' % timeout_ms) time.sleep(timeout_ms) timeout_ms *= 2 logging.info('send link : url_hash %s title %s user_id %s updated %s client: %s' %(model.url_hash, model.title, str(model.key()), str(model.date), model.client)) except BadValueError, apiproxy_errors.DeadlineExceededError: e0, e1 = sys.exc_info()[0], sys.exc_info()[1] logging.error('error while saving url %s ( %s, %s)' % (url, e0, e1))
from google.appengine.ext import db from models import SessionModel ss = SessionModel.gql('order by short_url desc').fetch(5000) print len(ss) for s in ss: print s.short_url print s.url print s.domain