Ejemplo n.º 1
0
	def get(self, url_hash, title):
                try:
                        self.redirect_perm()
                        self.get_user()
                        url_hash = urllib.unquote(url_hash)
		        logging.info('url hash: %s' % url_hash)
                        logging.info('category screen_name %s' %self.screen_name)
			category=None
                        if self.avatar is None:
                                self.avatar='/static/images/noavatar.png'

                        sessionModel = SessionModel.gql('where url_encode26 = :1', url_hash).get()
                        if sessionModel is None:
                                logging.info('not article with hash %s ... redirecting' % url_hash)
                                self.redirect('/')
                                return
                        generated_title =  LinkUtils.make_title(sessionModel.title)
                        if title != generated_title:
                                self.redirect('/article/'+url_hash+'/'+generated_title)
                                return
                        instaright_link =  LinkUtils.generate_instaright_link(url_hash, generated_title)
                        links = Links.gql('where url_hash = :1', url_hash).get()
			userUtil = UserUtils()
                        if links is not None:
                                category = links.categories
                        sessionTitle = LinkUtils.generateUrlTitle(sessionModel.title)
                        template_variables = {'page_footer': PageUtils.get_footer(), 'user':self.screen_name, 'logout_url':'/account/logout', 'avatar':self.avatar,'story_avatar': userUtil.getAvatar(sessionModel.instaright_account), 'story_user': sessionModel.instaright_account, 'domain': sessionModel.domain, 'title':sessionModel.title, 'link': sessionModel.url, 'updated':sessionModel.date, 'id': str(sessionModel.key()), 'instaright_link': instaright_link, 'category': LinkUtils.getLinkCategoryHTML(sessionModel), 'dd': LinkUtils.generate_domain_link(sessionModel.domain)}
		        path = os.path.join(os.path.dirname(__file__), 'templates/article.html')
                        self.response.headers["Content-Type"] = "text/html; charset=utf-8"
		        self.response.out.write(template.render(path, template_variables))
                except:
                        e,e0 = sys.exc_info()[0], sys.exc_info()[1]
                        logging.error('handled error : %s, %s ' %( e, e0 ))
		        self.redirect('/')
Ejemplo n.º 2
0
        def textOldStyle(self,link, prepend_text=''):
                lu=LinkUtils()
                short_link = lu.shortenLink(link.url)
		if short_link is None:
			self.text=None
			return
                self.text = "check out this story: %s " %short_link
                if link.facebook_like is not None and link.facebook_like > 5:
                                self.text+=" #facebooklikes %s" %link.facebook_like
                if link.redditups is not None and link.redditups > 5:#reddit ups %s #delicious save %s #instapaper %s #twitter %s
                                self.text+=" #reddit ups %s" % link.redditups
                if link.delicious_count is not None and link.delicious_count > 5:
                                self.text+=" #delicious saves %s" % link.delicious_count
                if link.instapaper_count is not None and link.instapaper_count > 5:
                                self.text+=" #instaright %s" %link.instapaper_count
                if link.tweets is not None and link.tweets > 5:
                                self.text+=" #twitter %s #RTs" %link.tweets
                top_category=None
                if link.categories is not None and len(link.categories) > 0:
                                logging.info('init cat : %s' % str(link.categories))
                                #dicti = ast.literal_eval(link.categories)
                                dicti = eval(link.categories)
                                if len(dicti) > 0:
                                        import operator
                                        logging.info('categories:'+str(dicti))
                                        sorteddict = sorted(dicti.iteritems(), key=operator.itemgetter(1))
                                        top_category = sorteddict[len(sorteddict)-1]
                if len(self.text) <= 140 - 1 - len(prepend_text):
                                if top_category is not None and top_category[0] not in self.text and len(top_category[0]) + len(self.text) +2 <= 140:
                                        self.text +=" #%s" % unicode(top_category[0])
                                if link.diggs is not None and link.diggs > 4 and 8 + len(self.text) +2 <= 140:
                                        self.text +=" #digg %s" % link.diggs
                self.text += " " + prepend_text
                logging.info('self.text: %s' % self.text)
Ejemplo n.º 3
0
 def generate_post_message(cls, session):
         if session is None or session.title is None or session.url is None:
                 return None
         lu = LinkUtils()
         short_link = lu.shortenLink(session.url)
         if short_link is None:
                 return None
         return "%s %s via http://www.bit.ly/instarightapp"  % (session.title[0:59], short_link)
Ejemplo n.º 4
0
 def _test_get_title(self):
         url="http://www.reddit.com/r/videos/"
         title=LinkUtils.getLinkTitle(url)
         self.assertEquals("videos", title)
         #test jpn chars
         url="http://blog.ohmynews.com/booking/168260"
         title=LinkUtils.getLinkTitle(url)
         #self.assertEquals("", title)
         url="https://appengine.google.com/logs?&app_idnstaright&version_id=49.350893459209639378"
         title=LinkUtils.getLinkTitle(url)
         self.assertEquals("Google Accounts", title)
Ejemplo n.º 5
0
        def textNewStyle(self,link, title_from_url, prepend_text=None):
                lu=LinkUtils()
                short_link = lu.shortenLink(link.url)
                if short_link is None:
                        logging.info('something is wrong with bitly link from %s ... ' % link.url)
                        self.text=None
                        return
                logging.info('new style title %s' %title_from_url)

                if (link.title is None and title_from_url is None) or (title_from_url is not None and len(title_from_url) < 15):
                        logging.info('title not known going back to old style')
                        return self.textOldStyle(link,prepend_text)
                categories = CategoryUtil.getTwitCategories(link.categories)
                self.text =  Twit.generateTwitText(categories, title_from_url, short_link, prepend_text)
Ejemplo n.º 6
0
 def post(self):
     count = self.request.get("count", None)
     url = self.request.get("url", None)
     url = urllib2.unquote(url)
     domain = RequestUtils.getDomain(url)
     if not domain or len(domain) == 0:
         self.response.out.write("not url: %s skipping!\n" % url)
         return
     logging.info("url %s" % url)
     logging.info("count %s" % count)
     lu = LinkUtils()
     link = lu.getAllData(url, count)
     self.update_link(url, link)
     self.response.out.write("put %s \n " % url)
Ejemplo n.º 7
0
	def get(self):
                memcache_key='feed_json_cache'
                cached_feed= memcache.get(memcache_key)
		format = self.request.get('format', None);
                cache_exp = datetime.datetime.now() + datetime.timedelta(minutes=5)
                cache_exp_ts = time.mktime(cache_exp.timetuple())
                userUtil = UserUtils()
                if format == 'json' and cached_feed:
			logging.info('getting json from cache')
			self.response.headers['Content-Type'] = "application/json"
                        self.response.out.write(simplejson.dumps(cached_feed, default=lambda o: {'u':{'id':str(o.key()), 't':unicode(o.title), 'dd': LinkUtils.generate_domain_link(o.domain), 'd':o.domain, 'user': urllib.unquote(o.instaright_account), 'source': o.client, 'u': int(time.mktime(o.date.timetuple())), 'l':LinkUtils.generate_instaright_link(o.url_encode26,LinkUtils.make_title(o.title)),'a':userUtil.getAvatar(o.instaright_account),'ol':o.url, 'lc':LinkUtils.getLinkCategory(o), 'html_lc':LinkUtils.getLinkCategoryHTML(o), 'e': o.embeded, 'n': int(time.mktime(datetime.datetime.now().timetuple()))}}))
                        return
		entries = SessionModel.gql('ORDER by date DESC').fetch(10)
		memcache.set(memcache_key, entries, time = cache_exp_ts)
		if not entries:
			self.response.out.write('Nothing here')
		#now = datetime.datetime.now().strftime("%Y-%m-%dT%H\:%i\:%sZ")
		if format is None or format == 'xml' or format == 'valid_xml':
                        updated_entries = [ (str(o.key()), unicode(o.title), LinkUtils.generate_domain_link(o.domain), LinkUtils.generate_instaright_link(o.url_encode26,LinkUtils.make_title(o.title)),userUtil.getAvatar(o.instaright_account), o.date, LinkUtils.generate_instaright_link(o.url_encode26,LinkUtils.make_title(o.title)) ) for o in entries ]
                        template_variables = { 'entries' : updated_entries, 'dateupdated' : datetime.datetime.today()}
                        if format == 'valid_xml':
			        path= os.path.join(os.path.dirname(__file__), 'templates/feed_valid.html')
                        else:
			        path= os.path.join(os.path.dirname(__file__), 'templates/feed.html')
			self.response.headers['Content-Type'] = "application/atom+xml"
			self.response.out.write(template.render(path,template_variables))
			return
		if format == 'json':
			self.response.headers['Content-Type'] = "application/json"
                        self.response.out.write(simplejson.dumps(entries, default=lambda o: {'u':{'id':str(o.key()), 't':unicode(o.title), 'dd': LinkUtils.generate_domain_link(o.domain), 'd':o.domain, 'user': o.instaright_account, 'u': int(time.mktime(o.date.timetuple())), 'l':LinkUtils.generate_instaright_link(o.url_encode26,LinkUtils.make_title(o.title)), 'a':userUtil.getAvatar(o.instaright_account),'ol':o.url, 'source': o.client, 'e': o.embeded, 'lc':LinkUtils.getLinkCategory(o), 'html_lc':LinkUtils.getLinkCategoryHTML(o), 'n': int(time.mktime(datetime.datetime.now().timetuple()))}}))
			return
Ejemplo n.º 8
0
        def get(self):
		template_variables=[]
                json = LinkUtils.getJsonFromApi('http://www.instaright.com/feed?format=json')
                if json is None:
                        logging.info('default view')
                        self.response.headers["Content-Type"] = "text/xml; charset=utf-8"
		        path = os.path.join(os.path.dirname(__file__), 'templates/sitemap.xml')
		        self.response.out.write(template.render(path, template_variables))
                else:
                        logging.info('dynamic view')
                        links = []
                        for j in json:
                                logging.info('json entry: %s' % j)
                                dd = j['u']['dd']
                                if dd is not None:
                                        links.append((dd, 'hourly'))
                                l = j['u']['l']
                                if l is not None:
                                        links.append((l, 'daily'))
                                lc = j['u']['lc']
                                if lc is not None:
                                        llc = lc.split(',')
                                        for ll in llc:
                                                links.append(('http://www.instaright.com/%s' % ll, 'hourly'))
                        logging.info('list of links: %s ' % len(links))
                        template_variables = { 'links': links }
		        path = os.path.join(os.path.dirname(__file__), 'templates/sitemap_dyn.xml')
		        self.response.out.write(template.render(path, template_variables))
Ejemplo n.º 9
0
 def post(self):
         k = self.request.get('key')
         key = db.Key(k)
         s = SessionModel.gql('WHERE __key__ = :1', key).get()
         util = LinkUtils()
         long_url=util.getShortOriginalUrl(s.url)
         if long_url is None:
                 logging.info('could not retrieve long link.skipping')
                 return
         logging.info('expanded url: %s' % long_url)
         if long_url.startswith('itms://'):
                 logging.info('Skipping itunes item: %s' % long_url)
                 return
         domain = RequestUtils.getDomain(long_url)
         s.short_url = s.url
         s.url = long_url
         s.domain = domain
         s.put()
         util.updateStats(s)
Ejemplo n.º 10
0
	def send_to_twitter(self, twitter_token, twitter_secret, session):
               api = twitter.Api(
                                consumer_key=self.twitter_consumer_key,
                                consumer_secret=self.twitter_consumer_secret,
                                access_token_key=twitter_token,
                                access_token_secret=twitter_secret
                                ) 
               twit = Twit()
               lu=LinkUtils()
               link = lu.getAllData(session.url)
               if link.overall_score == 0:
                       logging.debug('skipping to send twitt since content is not popular %s ' % session.url)
                       return
               twit.generate_content(link, session.title, "via:@instaright")
               logging.info('twit: %s' % twit.text)
               if twit.text is None:
                       logging.info('twit has no text ... skipping')
                       return
               try:
                   api.PostUpdate(twit.text)
               except:
                   logging.error('Error while sending tweet %s: %s => %s ' % (twit.text, sys.exc_info()[0],sys.exc_info()[1]))
Ejemplo n.º 11
0
        def post(self):
                url=self.request.get('url',None)
                if url is None:
                        logging.info('no url no recommendations')
                        return
		url = url.encode('utf-8')
                logging.info('getting url hash %s' %url)
                url_hash = LinkUtils.getUrlHash(url)
                if url_hash is None:
                        logging.error("can't determing url hash %s" % url)
                        return
                try:
                        l = Links.gql('WHERE url_hash = :1' , url_hash).get()
                        if l is None:
                                l = Links.gql('WHERE url = :1' , url).get()
                except:
                        l = None
                if l is None:
                        logging.info('no link saved with url %s' % url)
                        l = Links()
                        l.url  = url
                        l.url_hash = url_hash
                        l.put()
                api_call= 'http://api.zemanta.com/services/rest/0.0/'
                args ={'method': 'zemanta.suggest',
                               'api_key': self.z_key,
                               'text': url,
                               'return_categories': 'dmoz',
                               'format': 'json'}
                args_enc = urllib.urlencode(args)
                json= None
                result=None
                try:
                        result = urlfetch.fetch(url=api_call, payload=args_enc,method = urlfetch.POST, headers={'Content-Type': 'application/x-www-form-urlencoded'})
                        json = simplejson.loads(result.content)
                except:
                        logging.info('bad json data from zemanta: %s' % result)

                if json is None or json['status'] != 'ok':
                        logging.info('error while fetching recommendations')
                        return
                articles = json['articles']
                #TODO apply DMOZ categories
                categories = json['categories']
                #relevant_articles = [ (c["title"], c["url"]) for c in articles if c["confidence"] > 0.01 ]
                relevant_articles = [ (c["title"], c["url"]) for c in articles ]
                l.recommendation=str(simplejson.dumps(relevant_articles[0:4]))
                if l.url_hash is None:
                        l.url_hash = url_hash
                l.put()
Ejemplo n.º 12
0
 def post(self):
         k=self.request.get('key',None)
         if k is None:
                 logging.info('error key has not been specified')
                 return
         key=db.Key(k)
         if key is None:
                 logging.info('error not valid key')
                 return
         s = SessionModel.gql('WHERE __key__ = :1', key).get()
         logging.info('feedproxt url %s' % unicode(s.url))
         util = LinkUtils()
         url = util.getFeedOriginalUrl(s.url)
         if url is None:
                 logging.info('could not fetch original url. skipping.')
                 return
         logging.info('original url %s' % url)
         domain = RequestUtils.getDomain(url)
         s.domain = domain
         s.feed_url=s.url
         s.url=url
         s.put()
         util.updateStats(s)
Ejemplo n.º 13
0
	def get(self, service, method):
                key_name = create_uuid()
        	oauth_code = self.request.get("code")

                oauth_token=''
                self.service = service
        	if service == 'picplz' and oauth_code is not None:
                        logging.info('code %s' % oauth_code)
                        json=LinkUtils.getJsonFromApi('https://picplz.com/oauth2/access_token?client_id=BnYEDMYMrqaKP7DYvQS55meeMHG6s2CA&client_secret=DjZ7DEjzT273tFHvdRPQ49kTA3XJXKpk&grant_type=authorization_code&redirect_uri=http://www.instaright.com/oauth2/picplz/callback&code=%s' % oauth_code)
                	if json is not None:
                                logging.info('picplz response %s' % json)
                        	oauth_token = json['access_token']
                        	logging.info('just got picplz access token %s' % oauth_token)
		self.token = OAuthAccessToken(key_name=key_name, service=service, oauth_token=oauth_token)
        	self.token.put()
                self.set_cookie(key_name)
        	self.redirect('/user/dashboard')
Ejemplo n.º 14
0
	def html_snapshot(self, condition=None, type=None):
		template_variables=[]
                if condition is None and type is None:
                        url = 'http://www.instaright.com/feed?format=json'
                        title = 'Instaright - SocialBookmarking and recommendation engine'
                        description = 'Instaright provides most productive tools for reading, sharing and discovering online content. Install Instaright addon for Firefox, Chrome or bookmarlet. Discover content using Instaright Realtime Stream or receive message via Instant Messanger of your choice.' 
                elif type is not None and condition is not None:
                        #NOTE: condition contains trailing slash
                        if condition[-1] == '/':
                                condition = condition[:-1]
                        url = 'http://www.instaright.com/%s/%s/feed?format=json' %(type, condition)
                        title = 'Instaright %s articles - %s ' % ( type, condition)
                        description = 'Discover, save and share trending stories from %s' % condition 
                logging.info('feed lookup %s ' % url)
                json = LinkUtils.getJsonFromApi(url)
		if json is None:
			return None
                logging.info('list of links: %s ' % len(json))
                template_variables = { 'links': json, 'condition': condition, 'type': type, 'title': title, 'description': description }
		path = os.path.join(os.path.dirname(__file__), 'templates/html_snapshot.html')
		self.response.out.write(template.render(path, template_variables))
Ejemplo n.º 15
0
 def delicious_data(self, url):
     delicious_api = "http://feeds.delicious.com/v2/json/urlinfo/data?url=%s&type=json" % url
     logging.info("trying to fetch delicious info %s " % delicious_api)
     json = LinkUtils.getJsonFromApi(delicious_api)
     link = Links()
     if json:
         try:
             if not link.title:
                 link.title = json[0]["title"].strip()
             link.categories = db.Text(unicode(simplejson.dumps(json[0]["top_tags"])))
             if link.categories is not None:
                 taskqueue.add(
                     queue_name="category-queue",
                     url="/link/category/delicious",
                     params={"url": url, "categories": link.categories},
                 )
             link.delicious_count = Cast.toInt(json[0]["total_posts"], 0)
             logging.info("delicious count %s" % link.delicious_count)
         except KeyError:
             e0, e1 = sys.exc_info()[0], sys.exc_info()[1]
             logging.info("key error [[%s, %s]] in %s" % (e0, e1, json))
     return link
Ejemplo n.º 16
0
        def get(self):
                logging.info('category list handler ')
                self.redirect_perm()
                self.get_user()
                logging.info('category screen_name %s' %self.screen_name)
                if self.avatar is None:
                        self.avatar='/static/images/noavatar.png'

                memcache_key='category_list'
                cached_category=memcache.get(memcache_key)
                categories={}
                if cached_category is not None:
                        logging.info('picking up cats from cache')
                        categories=cached_category
                else:
                        logging.info('fetching trending cats' )
                        categories = LinkCategory.get_trending()
                        next_hour = datetime.datetime.now() + datetime.timedelta(hours=1)
                        next_hour_ts = time.mktime(next_hour.timetuple())
                        if categories is not None and len(categories) > 0:
                                memcache.set(memcache_key, categories, time = next_hour_ts)

		template_variables = []
                if categories is None:
                        categories = LinkCategory.get_trending(24)
                else:
                        logging.info("got %s categories" % len(categories))
                template_variables = {'page_footer': PageUtils.get_footer(), 'user':self.screen_name, 'logout_url':'/account/logout', 'avatar':self.avatar,'categories':LinkUtils.getCategoryListHTML(categories)}
		path= os.path.join(os.path.dirname(__file__), 'templates/category_list.html')
                self.response.headers["Content-Type"] = "text/html; charset=utf-8"
		self.response.out.write(template.render(path,template_variables))
Ejemplo n.º 17
0
        def post(self):
		        broadcaster = BroadcastMessage()
                        userUtil = UserUtils()
			title = self.request.get('title', None)
			link = self.request.get('link', None)
			domain = self.request.get('domain', None)
                        user_id = self.request.get('user_id', None)
                        updated = self.request.get('updated', None)
                        embeded = self.request.get('e', None)
                        link_category = self.request.get('link_category', None)
                        subscribers = simplejson.loads(self.request.get('subscribers', None))

			message = Message( title = title, link = link , domain = domain)

                        user = SessionModel.gql('WHERE __key__ = :1', db.Key(user_id)).get()
                        if user is None:
                                logging.info('can\'t determine user by id: %s' % user_id)
                                return
                        logging.info('user %s' % user.instaright_account)
                        avatar = userUtil.getAvatar(user.instaright_account)
                        logging.info('avatar %s' %avatar)
                        messageAsJSON = [{'u':{'id':user_id, 't':title,'ol':link, 'l':LinkUtils.generate_instaright_link(user.url_encode26, LinkUtils.make_title(title)),'d':domain,'dd': LinkUtils.generate_domain_link(domain), 'a':avatar, 'u':updated, 'source': user.client, 'lc':link_category, 'html_lc':LinkUtils.getLinkCategoryHTML(user),  'e': embeded, 'n': int(time.mktime(datetime.datetime.now().timetuple()))}}]
                        logging.info('sending message %s ' %messageAsJSON)
			broadcaster.send_message(messageAsJSON)
			xmpp_handler.send_message(subscribers, message)
Ejemplo n.º 18
0
    def post(self):

        url = self.request.get("url", None)
        url_hash = LinkUtils.getUrlHash(url)
        user = self.request.get("user", None)
        title = self.request.get("title", None)

        if url is None:
            logging.info("no url detected. skipping...")
            return
        count = 1
        url = urllib2.unquote(url)
        domain = RequestUtils.getDomain(url)
        if not domain or len(domain) == 0:
            self.response.out.write("not url: %s skipping!\n" % url)
            return
        if domain in self.skip_domains:
            logging.info("filering out %s" % url)
            return
        lu = LinkUtils()
        link = lu.getAllData(url, count)
        logging.info("link overall score: %s" % link.overall_score)
        existingLink = None
        try:
            existingLink = Links.gql("WHERE url_hash = :1", url_hash).get()
            if existingLink is None:
                existingLink = Links.gql("WHERE url = :1", url).get()
        except BadValueError:
            logging.info("bad value url %s" % url)
        klout_score = UserUtils.getKloutScore(user, self.klout_api_key)
        share_margin = self.tw_margin
        if klout_score is not None:
            link.overall_score = link.overall_score * int(klout_score)
            logging.info("adjusted overall score %s" % link.overall_score)
            share_margin = share_margin * self.klout_correction
            logging.info("adjusting twit margin: %s" % share_margin)

        logging.info("link score %s tweet margin %s ( existing %s )" % (link.overall_score, share_margin, existingLink))
        if link.overall_score > share_margin and (existingLink is None or not existingLink.shared):
            t = Twit()
            t.generate_content(link, title, "")
            # skip tweets is text emtpy and for root domains
            if t.text is None or LinkUtils.isRootDomain(link.url):
                logging.info("twit with no body. aborting")
                return
            execute_time = TaskUtil.execution_time()
            logging.info("scheduling tweet for %s" % str(execute_time))
            mail.send_mail(
                sender="*****@*****.**",
                to="*****@*****.**",
                subject="Twit to queue!",
                html="Twitt: %s <br> score: %s" % (t.text, link.overall_score),
                body="Twitt: %s <br> score: %s" % (t.text[:500], link.overall_score),
            )

            # taskqueue.add(url='/util/twitter/twit/task', eta=execute_time, queue_name='twit-queue', params={'twit':t.text})
            taskqueue.add(url="/util/twitter/twit/task", queue_name="twit-queue", params={"twit": t.text})
            # update article shared status
            if existingLink is not None:
                existingLink.shared = True
                existingLink.put()
            logging.info("updated link share status")
        else:
            logging.info("not scheduled for tweeting")
        lh = LinkHandler()
        lh.update_link(url, link)
Ejemplo n.º 19
0
 def update_link(self, url, link):
     existingLink = None
     url_hash = LinkUtils.getUrlHash(url)
     link.url_hash = url_hash
     # qfix for title TODO: find proper solution
     if link.title is not None:
         link.title = link.title.strip()[:199]
     try:
         existingLink = Links.gql("WHERE url_hash  = :1", url_hash).get()
         if existingLink is None:
             existingLink = Links.gql("WHERE url = :1", url).get()
     except:
         logging.info("bad value for url %s" % url)
     if existingLink is not None:
         existingLink.date_updated = link.date_updated
         existingLink.influence_score = link.influence_score
         existingLink.instapaper_count = link.instapaper_count
         existingLink.instaright_count = link.instaright_count
         existingLink.redditups = link.redditups
         existingLink.redditdowns = link.redditdowns
         existingLink.tweets = link.tweets
         existingLink.diggs = link.diggs
         existingLink.excerpt = link.excerpt
         existingLink.categories = link.categories
         existingLink.delicious_count = link.delicious_count
         existingLink.facebook_like = link.facebook_like
         existingLink.domain = link.domain
         if existingLink.url_hash is None:
             existingLink.url_hash = url_hash
         if link.title is not None:
             existingLink.title = link.title.strip()[:199]
         # if increase in score is more then 20%
         if (
             existingLink.overall_score is None
             or existingLink.overall_score == 0
             or link.overall_score / existingLink.overall_score >= 1.2
         ):
             existingLink.shared = False
         existingLink.overall_score = link.overall_score
         existingLink.put()
     else:
         # greater probability for db timeout of new links
         try:
             while True:
                 timeout_ms = 100
                 try:
                     link.put()
                     break
                 except datastore_errors.Timeout:
                     time.sleep(timeout_ms)
                     timeout_ms *= 2
         except apiproxy_errors.DeadlineExceededError:
             logging.info("run out of retries for writing to db")
     logging.info(
         "url %s : influence_score %s, instapaper_count %s, redditups %s, redditdowns %s, tweets %s, diggs %s, delicious count %s facebook like %s"
         % (
             url,
             link.influence_score,
             link.instapaper_count,
             link.redditups,
             link.redditdowns,
             link.tweets,
             link.diggs,
             link.delicious_count,
             link.facebook_like,
         )
     )
Ejemplo n.º 20
0
        def post(self):
		url=self.request.get('url',None)
                url_hash = LinkUtils.getUrlHash(url)
                if url is None:
                        logging.info('no link in request. skipping')
                        return
                category_api='http://access.alchemyapi.com/calls/url/URLGetCategory?apikey=%s&url=%s&outputMode=json' %(self.alchemy_key, urllib2.quote(url.encode('utf-8')))
                logging.info('trying to fetch shared count info %s' %category_api)
                link=None
                language=None
                category=None

		try:
                	link = Links.gql('WHERE url_hash = :1', url_hash).get()
                        if link is None:
                	        link = Links.gql('WHERE url = :1', url).get()
		except BadValueError:
			logging.info('url property too long')
                if link is None:
                        link = Links()
                else:
                        link.date_updated = datetime.datetime.now().date()
                json = LinkUtils.getJsonFromApi(category_api)
                if json is None:
                        logging.info('alchemy api returned no category.skipping')
                        return
                try:
                    language=json['language']
                    category=json['category']
                    score=Cast.toFloat(json['score'],0)
                    if score is not None and score > 0.5 and category is not None:
                            logging.info('category %s score %s' %(category, score))
                            cats=category.split("_")
                            if cats is None:
                                    logging.info('no categories. exit')
                                    return
                            memcache_key=url_hash+'_category'
                            current_categories=memcache.get(memcache_key)
                            merge_cat=[]
                            if current_categories is not None:
                                    logging.info('merging with existing cats %s' %current_categories)
                                    merge_cat.extend(current_categories)
                                    merge_cat.extend(cats)
                            else: 
                                    merge_cat=cats 
                            model=None 
                            try: 
                                    model=SessionModel.gql('WHERE url_hash = :1 order by date desc', url).get() 
                                    if model is None:
                                        model=SessionModel.gql('WHERE url = :1 order by date desc', url).get()
                            except BadValueError:
                                logging.info('url too long ... %s' %url)
                            if model is None:
                                logging.info('model not defined ... skipping')
                                return

                            linkDetail=Links.gql('WHERE url_hash = :1' , url_hash).get()
                            if linkDetail is None:
                                linkDetail=Links.gql('WHERE url = :1' , url).get()
                            if linkDetail is not None and linkDetail.categories is not None:
                                    logging.info('category found from link details %s' % linkDetail.categories)
                                    delic_cats=eval(linkDetail.categories)
                                    d_cats=[ c for c in  delic_cats ]
                                    merge_cat.extend(d_cats)
                            merge_cat=set(merge_cat)
                            logging.info('caching cats %s for url %s' %(merge_cat, url))
                            memcache.set(memcache_key, list(set(merge_cat))[:4])

                            for c in merge_cat:
                                taskqueue.add(queue_name='message-broadcast-queue', url='/category/stream', params={'category':c, 'url': url_hash})
                                existingLinkCat = LinkCategory.gql('WHERE url_hash = :1 and category = :2', url_hash, c).get()
                                if existingLinkCat is None:
                                        existingLinkCat = LinkCategory.gql('WHERE url = :1 and category = :2', url, c).get()
                                if existingLinkCat is not None:
                                        existingLinkCat.updated=datetime.datetime.now()
                                        if existingLinkCat.url_hash is None:
                                                existingLinkCat.url_hash = url_hash
                                        existingLinkCat.put()
                                        logging.info('updated exisitng url(%s) category(%s) update time %s' % (url, c, existingLinkCat.updated))
                                else:
                                        logging.info('new pair: url%s) category(%s) ' % (url, c))
                                        linkCategory=LinkCategory()
                                        linkCategory.url=url
                                        linkCategory.url_hash = url_hash
                                        linkCategory.category=c
                                        if model is not None:
                                                linkCategory.model_details=model.key()
                                        linkCategory.put()

                    if language is not None:
                            link.language = language
                    link.url=url
                    link.url_hash=url_hash
                    link.put()
                except KeyError:
                    e0, e1 = sys.exc_info()[0],sys.exc_info()[1]
                    logging.info('key error [[%s, %s]] in %s' %(e0, e1, json))
Ejemplo n.º 21
0
        def get(self, category):
                format=self.request.get('format',None)
                if category is None or category == 0:
                        logging.info('not category in request. return empty')
                        return
                if format == 'json':
                        logging.info('catefory %s json feed' % category)
                        userUtil = UserUtils()
                        allentries = LinkCategory.gql('WHERE category = :1 order by updated desc', category).fetch(50)
                        entries= [ e for e in allentries if hasattr(e,'model_details') and e.model_details is not None ]
                        entries = entries[:10]
			self.response.headers['Content-Type'] = "application/json"
                        self.response.out.write(simplejson.dumps(entries, default=lambda o: {'u':{'id':str(o.model_details.key()), 't':unicode(o.model_details.title), 'dd': LinkUtils.generate_domain_link(o.model_details.domain),'l':LinkUtils.generate_instaright_link(o.model_details.url_encode26, LinkUtils.make_title(o.model_details.title), o.model_details.url), 'd':o.model_details.domain, 'user': urllib.unquote(o.model_details.instaright_account), 'source': o.model_details.client, 'html_lc':LinkUtils.getLinkCategoryHTML(o), 'u': o.updated.strftime("%Y-%m-%dT%I:%M:%SZ"), 'a':userUtil.getAvatar(o.model_details.instaright_account),'ol':o.url,'c':category, 'lc':LinkUtils.getLinkCategory(o.model_details)}}))
			return
                self.response.headers['Content-Type'] = "application/json"
                self.response.out.write("[{}]")
Ejemplo n.º 22
0
        def post(self):
		user=self.request.get('user',None)
		user=urllib.unquote(user)
		url=self.request.get('url',None)
		domain=self.request.get('domain',None)
                title=self.request.get('title',None)
                share_mode=self.request.get('share_mode',None)
                if not RequestUtils.checkUrl(url):
                    logging.info('skipping since url is not good!')
                    return
                lu = LinkUtils()
                link_info = lu.getLinkInfo(url)
                description = link_info["d"]
                embeded = link_info["e"]
                logging.info('got post title %s' % title)
                title_new = link_info["t"]
		if title is None and title_new is not None and len(title_new) > 0:
			title = title_new
                if title is None or title == 'None' or title == 'null':
                        title=LinkUtils.getLinkTitle(url)
                if title is not None:
                        title = title[:199]
                logging.info('final link title %s' %title)
                logging.info("link info desc: %s embede: %s" %( description, embeded))
                version=self.request.get('version',None)
                client=self.request.get('client',None)
                selection = self.request.get('selection', None)
                user_agent = self.request.get('user_agent',None)

                UserScoreUtility.updateLinkScore(user,url)
                UserScoreUtility.updateDomainScore(user, domain)

                taskqueue.add(url='/user/badge/task', queue_name='badge-queue', params={'url':url, 'domain':domain, 'user':user, 'version': version, 'client': client})
                taskqueue.add(url='/link/traction/task', queue_name='link-queue', params={'url':url, 'user': user, 'title': title})
                taskqueue.add(url='/link/recommendation/task', queue_name='default', params={'url':url })

                name = "url"
                generic_counter.increment(name)
                url_cnt = generic_counter.get_count(name)
                logging.info("total url count %s " % url_cnt)
                e = EncodeUtils()
                enbased=e.encode(url_cnt)
                url_encode26 = e.enbase(enbased)
                logging.info("url encode: %s and enbase : %s" % (enbased, url_encode26))
		url_hash = LinkUtils.getUrlHash(url)
		today = datetime.datetime.now().date()
		model = SessionModel.gql('WHERE instaright_account = :1 and url_hash = :2 and date > :3', user, url_hash, today).get()
                new_entity=False
		if model is None:
			logging.info('did not find save dafined by: %s %s for date %s', user, url, str(today))
			model = SessionModel()
                        new_entity=True
		else:
			logging.info('existing url(key %s) updating certain params' %str(model.key()))
                logging.info('link: %s title: %s' %(url, title))
		try:
                        #remove for local testing
                	model.ip = self.request.remote_addr
                	model.instaright_account = user
                	model.date = datetime.datetime.now()
                        if new_entity == True:
                	        model.url = url
                                model.url_hash = url_hash
                                model.url_counter_id = url_cnt
                                model.url_encode26 = url_encode26
                	model.title = title
                        model.user_agent=user_agent
                	model.domain = domain
                	model.short_link = None
                	model.feed_link = None
                	model.version = version
                        model.client = client
                        model.selection = selection 
                        model.embeded = embeded
			while True:
				timeout_ms= 100
				try:
					model.put()
					break
				except datastore_errors.Timeout:
					logging.info('model save timeout retrying in %s' % timeout_ms)
					time.sleep(timeout_ms)
					timeout_ms *= 2
                        logging.info('send link : url_hash %s title %s user_id %s updated %s client: %s' %(model.url_hash, model.title, str(model.key()), str(model.date), model.client))
		except BadValueError, apiproxy_errors.DeadlineExceededError:
		        e0, e1 = sys.exc_info()[0], sys.exc_info()[1]
			logging.error('error while saving url %s ( %s, %s)' % (url, e0, e1))
Ejemplo n.º 23
0
        def get(self, domain):
                format=self.request.get('format',None)
                if domain is None or len(domain) == 0:
                        logging.info('not category in request. return empty')
                        return
                if format == 'json':
                        logging.info('domain %s json feed' % domain)
                        userUtil = UserUtils()
                        entries = SessionModel.gql('WHERE domain = :1 order by date desc', domain).fetch(10)
			self.response.headers['Content-Type'] = "application/json"
			#TODO insert categories for domain's view
                        self.response.out.write(simplejson.dumps(entries, default=lambda o: {'u':{'id':str(o.key()), 't':unicode(o.title), 'l': LinkUtils.generate_instaright_link(o.url_encode26, LinkUtils.make_title(o.title), o.url), 'user': urllib.unquote(o.instaright_account), 'source': o.client, 'html_lc':LinkUtils.getLinkCategoryHTML(o), 'd': o.domain, 'lc': LinkUtils.getLinkCategory(o), 'dd':LinkUtils.generate_domain_link(o.domain), 'u': o.date.strftime("%Y-%m-%dT%I:%M:%SZ"), 'a':userUtil.getAvatar(o.instaright_account),'ol':o.url}}))
			return
                self.response.headers['Content-Type'] = "application/json"
                self.response.out.write("[{}]")