def __init__(self, desc_len, summ_len): self.desc_length = desc_len self.summ_length = summ_len self.t_processor = Text_Process(self.desc_length, self.summ_length) self.wk_mgt = WikipediaHelper(self.desc_length, self.summ_length) self.t_creator = TopicManager() self.tl_creator = TopicLinkManager()
from datetime import datetime, timedelta from diggly.util.diggly_threads import FuncThread from django.db.models import Max, Q from diggly.models import Topic, FeaturedTopics, TopicRedirect from diggly.util.wikipediaAPI.wiki_api import WikipediaHelper from bs4 import BeautifulSoup import requests #constants; to be moved to another file TRENDING_TOPICS_URL = "http://wikipedia.trending.eu/en/__[404]24/" TRENDING_TOPICS_BACKUP_URL = "http://tools.wmflabs.org/wikitrends/english-most-visited-today.html" GET_PAGE_ID_URL = "http://rack36.cs.drexel.edu/getpageid/?q=" wiki_help = WikipediaHelper() def update_featured_object(topic): #Get featured topics instance now = datetime.now() earlier_recent = timedelta( minutes=15) #within last 15 mins (applies to recent topics) earlier_trending = timedelta( days=1) #within last 1 day (applies to trending topics) topic.visit_counter += 1 topic.save() try: featured_topics = FeaturedTopics.objects.all()[0] recent_topics = featured_topics.recent_topics
class JsonPediaManager(): #desc_len = 6 #summ_len = 1 #t_processor = Text_Process(desc_len, summ_len) #t_mgt = TopicManager() #tl_mgt = TopicLinkManager() #wk_mgt = WikipediaHelper(description_len, summary_len) @classmethod def __init__(self, desc_len, summ_len): self.desc_length = desc_len self.summ_length = summ_len self.t_processor = Text_Process(self.desc_length, self.summ_length) self.wk_mgt = WikipediaHelper(self.desc_length, self.summ_length) self.t_creator = TopicManager() self.tl_creator = TopicLinkManager() def get_article(self, r_args): nlinks = self.__count_articles(r_args) titles = self.__get_article_titles(r_args, nlinks) print "TITLES -->", titles for a_title in titles: r_entity = eng_entity.format(a_title) r_url = jpedia_base_url.format(r_entity) self.__fetch_article(r_url) def __fetch_article(self, url): print "\nURL --->\n", url headers={ "X-Mashape-Key": os.environ['MASHAPE_KEY'], "Accept": "application/json" } resp = requests.get(url, headers=headers) print "RESP STATUS CODE -->", resp.status_code print "RESP -->", resp.text if resp.status_code != 200: #TODO: handle exception better raise requests.ApiError('GET: Jsonpedia api request error\n{}'.format(resp.status_code)) return None json_response = resp.json() pid = json_response['revid'] print "JSONPEDIA PID -->", pid def __get_article_titles(self, r_args, nlinks): resources = self.__format_req(r_args) #assume you receive pageids #resources = "pageids=26903|26521" r_url = pageid_convert_url.format(resources) retrieveflag = "title" titles = [] pages = self.wk_mgt.request_pages_plain(r_url, False) #print "PAGES IN JSONPE -->", pages for pid,page in pages.iteritems(): a_title = page[retrieveflag].strip() titles.append(a_title.replace(" ", title_sep)) #return arg_sep.join(titles) return titles def __count_articles(self, r_args): if self.__is_seq(r_args): return len(r_args) #default return 1 def __format_req(self, r_args): if self.__is_seq(r_args): # check if arg is a list if self.__is_pageid(r_args[0]): return r_pageid.format(arg_sep.join(r_args)) else: return r_title.format(arg_sep.join(r_args)) if self.__is_pageid(r_args): return r_pageid.format(r_args) #default return single page title format return r_title.format(r_args) def __is_pageid(self, arg): try: pid = int(arg) return True; except ValueError: return False; def __is_seq(self, arg): return (not hasattr(arg, "strip") and hasattr(arg, "__getitem__") or hasattr(arg, "__iter__"))