def get(self, url): user = users.get_current_user() if user and users.is_current_user_admin(): context = {'user' : user.nickname()} logout_url = users.create_logout_url(self.request.uri) context['logout_url'] = logout_url if url == 'algorithm.html': context['alg'] = model.getOrderingAlgorithmParams() elif url == 'background.html': activities = model.getActivities(False) i = 0 for activity in activities: activity.index = i i += 1 newActivity = model.ActivityParams() newActivity.name = NEW_ACTIVITY_NAME newActivity.activity_load = 0 newActivity.index = i newActivity.enabled = False newActivity.threshold_total[model.ActivityTypes.ITEM] = 0 newActivity.threshold_time_sec[model.ActivityTypes.ITEM] = 0 activities.append(newActivity) context['activities'] = activities context['timePeriod'] = activityManager.getActivityPeriod() elif url =='publishers.html': publishers = model.getPublisherSites() context['publishers'] = publishers elif url =='paymentsconfig.html': context['paymentparams'] = model.getPaymentConfig() elif url =='ligerpediaconfig.html': context['ligerpediaconfig'] = model.getLigerpediaConfig() elif url =='defaultlinks.html': config = model.getDefaultLinksConfig() context['publisher'] = model.getPublisherSite(config.default_links_url) context['defaultlinksconfig'] = config items = defaultItemList.getOrderedItems() views = 0 for item in items: views += item.stats[model.StatType.VIEWS] context['items'] = items context['totalviews'] = views path = '' if url and len(url) > 0: path = os.path.join(os.path.dirname(__file__), 'webadmin', url) else: path = os.path.join(os.path.dirname(__file__), 'webadmin', 'index.html') self.response.out.write(template.render(path, context)) elif user and users.is_current_user_admin() == False: context = {'user' : user.nickname()} login_url = users.create_login_url(self.request.uri) context['login_url'] = login_url path = os.path.join(os.path.dirname(__file__), 'webadmin', 'unauthorized.html') self.response.out.write(template.render(path, context)) else: self.redirect(users.create_login_url(self.request.uri))
def updateLigerpediaConfig(self): config = model.getLigerpediaConfig() config.embedly_request_links_total = int(self.request.get('links_total')) config.embedly_request_timeout = int(self.request.get('timeout')) config.put() self.redirect('ligerpediaconfig.html?status=updated')
def copyWikiLinks(self, url_title): self.publisherUrl = self.request.host + "/wiki/" + url_title if self.publisherUrl[-1] != "/": self.publisherUrl += "/" self.publisherUrl = self.publisherUrl.lower() self.url_list = [] self.errors = 0 self.successes = 0 self.exceptions = 0 config = model.getLigerpediaConfig() config_timeout = config.embedly_request_timeout config_ellimit = str(config.embedly_request_links_total) #capitalize first letter for wiki #url_title = string.capwords(url_title.lower(), '_') self.links_map.clear() attempt = 0 while attempt < 2 and len(self.links_map) < 1: #querying http://en.wikipedia.org/w/api.php for external links for url_title in XML format #wikipedia ellimit can go up to 500. There are 10 parallel embedly requests of maximum 20 links (200 total) url = 'http://en.wikipedia.org/w/api.php?action=query&prop=extlinks&ellimit=' + config_ellimit + '&format=xml&titles=' + url_title data = urllib2.urlopen(url).read() dom = minidom.parseString(data) for node in dom.getElementsByTagName('el'): #if link is within wiki, ignore extlink = node.firstChild.data if (extlink.lower().startswith("http") != True): continue self.links_map[extlink] = None attempt = attempt + 1 if len(self.links_map) < 1 and attempt < 2: #casing possibly incorrect, will attempt to search for the right term url = 'http://en.wikipedia.org/w/api.php?action=opensearch&search=' + urllib.quote_plus(url_title) search_results = json.loads(urllib2.urlopen(url).read()) search_list = search_results[1] if len(search_list) > 0: url_title = search_list[0].replace(' ','_') else: #search did not return anything -- will not try any more break if len(self.links_map) < 1: return api_url = 'http://api.embed.ly/1/oembed?' #sending requests every 2 seconds up until config_timeout #embed.ly will cache requests from the earlier searches so that we can retrieve them later if needed attempt = 0 while ((attempt * 2) <= config_timeout and self.successes < config_ellimit and self.successes < 20): unretrieved_links = self.getUnretreivedUrls() logging.info('requesting %d links from embedly' % (len(unretrieved_links))) urls_per_request = math.ceil(len(unretrieved_links) / 10.0) rpcs = [] links_it = iter(unretrieved_links) iteration_stopped = False for asynch_request in range(10): rpc = urlfetch.create_rpc(deadline=2) rpc.callback = self.create_callback(rpc) url_list = "" j = 0 try: while not(j == urls_per_request and asynch_request < 9): link = str(links_it.next()) if len(url_list) > 0: url_list += "," url_list += urllib.quote_plus(link) j = j + 1 except StopIteration: iteration_stopped = True urlfetch.make_fetch_call(rpc, api_url + "key=863cd350298b11e091d0404058088959&urls=" + url_list) logging.info('ASYNCH REQUEST %d, requesting %d links' % (asynch_request, j)) logging.info('ASYNCH REQUEST: %s ' % api_url + "key=863cd350298b11e091d0404058088959&urls=" + url_list) rpcs.append(rpc) if iteration_stopped: break # Finish all RPCs, and let callbacks process the results. for rpc in rpcs: rpc.wait() attempt = attempt + 1 logging.info('successes / errors / exceptions: %d %d %d' % (self.successes, self.errors, self.exceptions)) if (self.successes > 0): itemList.refreshCacheForDefaultOrderedItems(self.publisherUrl)