def GET(self): result = {} action = web.input(action = None)['action'] if action=='quota': results = urlfetch.fetch('https://api.stackexchange.com/2.0/info?site=stackoverflow&key=%s' % api_key, headers = {'User-Agent': 'StackPrinter'}, deadline = 10) response = simplejson.loads(results.content) result['result'] = response if action=='quotaauth': results = urlfetch.fetch('https://api.stackexchange.com/2.0/info?site=stackoverflow&key=%s&access_token=%s' % (api_key, TokenManager.get_auth_token()), headers = {'User-Agent': 'StackPrinter'}, deadline = 10) response = simplejson.loads(results.content) result['result'] = response if action=='authkey': result['result'] = TokenManager.get_auth_token() elif action =='memcachestats': result = memcache.get_stats() elif action =='memcacheflush': result['result'] = memcache.flush_all() elif action =='normalize': deferred.defer(worker.deferred_normalize_printed_question) result['result'] = True elif action =='delete': service = web.input(service = None)['service'] question_id = web.input(question_id = None)['question_id'] result['printed_question_deletion'] = dbquestion.delete_printed_question(question_id,service) result['question_deletion'] = dbquestion.delete_question(question_id,service) result['answers_deletion'] = dbquestion.delete_answers(question_id,service) return render.admin(result)
def get_sites(): """ Get a list of Stack Exchange sites using Stackauth service """ results = __gae_fetch('https://api.stackexchange.com/%s/sites?pagesize=999&key=%s' % (__api_version, api_key)) response = simplejson.loads(results.content) return response
def invalidate_auth_token(auth_token): """ Invalidate the given auth_token """ results = __gae_fetch('https://api.stackexchange.com/%s/access-tokens/%s/invalidate' % (__api_version, auth_token)) response = simplejson.loads(results.content) return response
def get_sites(): """ Get a list of Stack Exchange sites using Stackauth service """ results = __gae_fetch( 'https://api.stackexchange.com/%s/sites?pagesize=999&key=%s' % (__api_version, api_key)) response = simplejson.loads(results.content) return response
def invalidate_auth_token(auth_token): """ Invalidate the given auth_token """ results = __gae_fetch( 'https://api.stackexchange.com/%s/access-tokens/%s/invalidate' % (__api_version, auth_token)) response = simplejson.loads(results.content) return response
def check_link_weight(link): base_link = get_base_link(link) if base_link == 'http://stackoverflow.com': question_id = ContentDiscoverer(link).get_id() if question_id: results = urlfetch.fetch('https://api.stackexchange.com/2.0/questions/%s?order=desc&sort=activity&site=stackoverflow&filter=!-T4d7xQ6' % question_id, headers = {'User-Agent': 'StackPrinter'}, deadline = 10) response = simplejson.loads(results.content) question = response['items'][0] return (question['score']) >= 3 else: return False return True
def handle_response(results, url = None): """ Load results in JSON """ #When request is throttled, API simply closes the door without any response try: response = simplejson.loads(results.content) except simplejson.JSONDecodeError: raise ApiRequestError(url, CODE_API_ERROR_THROTTLING, API_ERROR_THROTTLING) if "error" in response: error = response["error"] code = error["code"] message = error["message"] raise ApiRequestError(url, code, message) return response
def fetch(self, path, results_key, **url_params): """ Fetches all the results for a given path where path is the API URL path. results_key is the key of the results list. If url_params is given it's key/value pairs are used to build the API query string. """ base_url = "%s/%s/%s" % (self._name, self._version, path) params = { "key": self._api_key, "pagesize": self._page_size, "page": self._start_page } params.update(url_params) while True: query = urllib.urlencode(params) url = "%s?%s" % (base_url, query) data = self._get_response_data(url) response = json.loads(data) count = 0 if "error" in response: error = response["error"] code = error["Code"] message = error["Message"] raise APIError(url, code, message) if results_key: results = response[results_key] else: results = response if len(results) < 1: break for result in results: yield result if len(results) < params["pagesize"]: break params["page"] += 1
def handle_response(results, url = None): """ Load results in JSON """ #When request is throttled, API simply closes the door without any response try: response = simplejson.loads(results.content) except: raise ApiRequestError(url, CODE_API_ERROR_THROTTLING, API_ERROR_THROTTLING) if "backoff" in response: logging.info('Backoff warning found! Value: %s Url: %s' % (response["backoff"], url)) memcache.set('backoff', response["backoff"],response["backoff"]) if "error_id" in response: error = response["error_name"] code = response["error_id"] message = response["error_message"] raise ApiRequestError(url, code, message) return response
def handle_response(results, url=None): """ Load results in JSON """ #When request is throttled, API simply closes the door without any response try: response = simplejson.loads(results.content) except: raise ApiRequestError(url, CODE_API_ERROR_THROTTLING, API_ERROR_THROTTLING) if "backoff" in response: logging.info('Backoff warning found! Value: %s Url: %s' % (response["backoff"], url)) memcache.set('backoff', response["backoff"], response["backoff"]) if "error_id" in response: error = response["error_name"] code = response["error_id"] message = response["error_message"] raise ApiRequestError(url, code, message) return response
def get_tags_of_user(self, username): """ Retrieves user's public tags and their tag counts from Delicious.com. The tags represent a user's full public tagging vocabulary. DeliciousAPI uses the official JSON feed of the user. We could use RSS here, but the JSON feed has proven to be faster in practice. @param username: The Delicious.com username. @type username: str @return: Dictionary mapping tags to their tag counts. """ tags = {} path = "/v2/json/tags/%s" % username data = self._query(path, host="feeds.delicious.com") if data: try: tags = simplejson.loads(data) except TypeError: pass return tags
def get_urls(self, tag=None, popular=True, max_urls=100, sleep_seconds=1): """ Returns the list of recent URLs (of web documents) tagged with a given tag. This is very similar to parsing Delicious' RSS/JSON feeds directly, but this function will return up to 2,000 links compared to a maximum of 100 links when using the official feeds (with query parameter count=100). The return list of links will be sorted by recency in descending order, i.e. newest items first. Note that even when setting max_urls, get_urls() cannot guarantee that it can retrieve *at least* this many URLs. It is really just an upper bound. @param tag: Retrieve links which have been tagged with the given tag. If tag is not set (default), links will be retrieved from the Delicious.com front page (aka "delicious hotlist"). @type tag: unicode/str @param popular: If true (default), retrieve only popular links (i.e. /popular/<tag>). Otherwise, the most recent links tagged with the given tag will be retrieved (i.e. /tag/<tag>). As of January 2009, it seems that Delicious.com modified the list of popular tags to contain only up to a maximum of 15 URLs. This also means that setting max_urls to values larger than 15 will not change the results of get_urls(). So if you are interested in more URLs, set the "popular" parameter to false. Note that if you set popular to False, the returned list of URLs might contain duplicate items. This is due to the way Delicious.com creates its /tag/<tag> Web pages. So if you need a certain number of unique URLs, you have to take care of that in your own code. @type popular: bool @param max_urls: Retrieve at most max_urls links. The default is 100, which is the maximum number of links that can be retrieved by parsing the official JSON feeds. The maximum value of max_urls in practice is 2000 (currently). If it is set higher, Delicious will return the same links over and over again, giving lots of duplicate items. @type max_urls: int @param sleep_seconds: Optional, default: 1. Wait the specified number of seconds between subsequent queries in case that there are multiple pages of bookmarks for the given url. Must be greater than or equal to 1 to comply with Delicious.com's Terms of Use. See also parameter 'max_urls'. @type sleep_seconds: int @return: The list of recent URLs (of web documents) tagged with a given tag. """ assert sleep_seconds >= 1 urls = [] path = None if tag is None or (tag is not None and max_urls > 0 and max_urls <= 100): # use official JSON feeds max_json_count = 100 if tag: # tag-specific JSON feed if popular: path = "/v2/json/popular/%s?count=%d" % (tag, max_json_count) else: path = "/v2/json/tag/%s?count=%d" % (tag, max_json_count) else: # Delicious.com hotlist path = "/v2/json/?count=%d" % (max_json_count) data = self._query(path, host="feeds.delicious.com") if data: posts = [] try: posts = simplejson.loads(data) except TypeError: pass for post in posts: # url try: url = post['u'] if url: urls.append(url) except KeyError: pass else: # maximum number of urls/posts Delicious.com will display # per page on its website max_html_count = 100 # maximum number of pages that Delicious.com will display; # currently, the maximum number of pages is 20. Delicious.com # allows to go beyond page 20 via pagination, but page N (for # N > 20) will always display the same content as page 20. max_html_pages = 20 if popular: path = "/popular/%s?setcount=%d" % (tag, max_html_count) else: path = "/tag/%s?setcount=%d" % (tag, max_html_count) page_index = 1 urls = [] while path and page_index <= max_html_pages: data = self._query(path) path = None if data: # extract urls from current page soup = BeautifulSoup(data) links = soup.findAll("a", attrs={"class": re.compile("^taggedlink\s*")}) for link in links: try: url = link['href'] if url: urls.append(url) except KeyError: pass # check if there are more multiple pages of urls soup = BeautifulSoup(data) paginations = soup.findAll("div", id="pagination") if paginations: # find next path nexts = paginations[0].findAll("a", attrs={ "class": "pn next" }) if nexts and (max_urls == 0 or len(urls) < max_urls) and len(urls) > 0: # e.g. /url/2bb293d594a93e77d45c2caaf120e1b1?show=all&page=2 path = nexts[0]['href'] path += "&setcount=%d" % max_html_count page_index += 1 # wait between queries to Delicious.com to be # compliant with its Terms of Use time.sleep(sleep_seconds) if max_urls > 0: return urls[:max_urls] else: return urls
def get_questions(self, api_site_parameter, page, **kwargs): return simplejson.loads(QUESTIONS)
def get_answers(self,question_id, api_site_parameter, **kwargs): if question_id != 0: return simplejson.loads(ANSWERS) else: return simplejson.loads(EMPTY_ANSWERS)
def get_answer(self, answer_id, api_site_parameter, **kwargs): if answer_id != 0: return simplejson.loads(ANSWER) else: return simplejson.loads(EMPTY_ANSWERS)
def get_questions_by_tags(self, tags, api_site_parameter, page, **kwargs): if tags == 'python': return simplejson.loads(QUESTIONS) if tags == 'atagthedoesnotexist': return simplejson.loads(EMPTY_QUESTIONS)
def get_tags(self, user_id, api_site_parameter, page, **kwargs): return simplejson.loads(TAGS)
def get_question(self, question_id, api_site_parameter, **kwargs): if question_id != 0: return simplejson.loads(QUESTION) else: return simplejson.loads(EMPTY_QUESTIONS)
def get_network(self, username): """ Returns the user's list of followees and followers. Followees are users in his Delicious "network", i.e. those users whose bookmark streams he's subscribed to. Followers are his Delicious.com "fans", i.e. those users who have subscribed to the given user's bookmark stream). Example: A --------> --------> C D --------> B --------> E F --------> --------> F followers followees of B of B Arrows from user A to user B denote that A has subscribed to B's bookmark stream, i.e. A is "following" or "tracking" B. Note that user F is both a followee and a follower of B, i.e. F tracks B and vice versa. In Delicious.com terms, F is called a "mutual fan" of B. Comparing this network concept to information retrieval, one could say that followers are incoming links and followees outgoing links of B. @param username: Delicous.com username for which network information is retrieved. @type username: unicode/str @return: Tuple of two lists ([<followees>, [<followers>]), where each list contains tuples of (username, tracking_since_timestamp). If a network is set as private, i.e. hidden from public view, (None, None) is returned. If a network is public but empty, ([], []) is returned. """ assert username followees = followers = None # followees (network members) path = "/v2/json/networkmembers/%s" % username data = None try: data = self._query(path, host="feeds.delicious.com") except DeliciousForbiddenError: pass if data: followees = [] users = [] try: users = simplejson.loads(data) except TypeError: pass uname = tracking_since = None for user in users: # followee's username try: uname = user['user'] except KeyError: pass # try to convert uname to Unicode if uname: try: # we assume UTF-8 encoding uname = uname.decode('utf-8') except UnicodeDecodeError: pass # time when the given user started tracking this user try: tracking_since = datetime.datetime.strptime(user['dt'], "%Y-%m-%dT%H:%M:%SZ") except KeyError: pass if uname: followees.append( (uname, tracking_since) ) # followers (network fans) path = "/v2/json/networkfans/%s" % username data = None try: data = self._query(path, host="feeds.delicious.com") except DeliciousForbiddenError: pass if data: followers = [] users = [] try: users = simplejson.loads(data) except TypeError: pass uname = tracking_since = None for user in users: # fan's username try: uname = user['user'] except KeyError: pass # try to convert uname to Unicode if uname: try: # we assume UTF-8 encoding uname = uname.decode('utf-8') except UnicodeDecodeError: pass # time when fan started tracking the given user try: tracking_since = datetime.datetime.strptime(user['dt'], "%Y-%m-%dT%H:%M:%SZ") except KeyError: pass if uname: followers.append( (uname, tracking_since) ) return ( followees, followers )
def get_users(self, filter, api_site_parameter, **kwargs): return simplejson.loads(USERS)
def get_users_by_id(self, user_id, api_site_parameter, **kwargs): return simplejson.loads(USERS)
def get_url(self, url, max_bookmarks=50, sleep_seconds=1): """ Returns a DeliciousURL instance representing the Delicious.com history of url. Generally, this method is what you want for getting title, bookmark, tag, and user information about a URL. Delicious only returns up to 50 bookmarks per URL. This means that we have to do subsequent queries plus parsing if we want to retrieve more than 50. Roughly speaking, the processing time of get_url() increases linearly with the number of 50-bookmarks-chunks; i.e. it will take 10 times longer to retrieve 500 bookmarks than 50. @param url: The URL of the web document to be queried for. @type url: str @param max_bookmarks: Optional, default: 50. See the documentation of get_bookmarks() for more information as get_url() uses get_bookmarks() to retrieve a url's bookmarking history. @type max_bookmarks: int @param sleep_seconds: Optional, default: 1. See the documentation of get_bookmarks() for more information as get_url() uses get_bookmarks() to retrieve a url's bookmarking history. sleep_seconds must be >= 1 to comply with Delicious.com's Terms of Use. @type sleep_seconds: int @return: DeliciousURL instance representing the Delicious.com history of url. """ # we must wait at least 1 second between subsequent queries to # comply with Delicious.com's Terms of Use assert sleep_seconds >= 1 document = DeliciousURL(url) m = hashlib.md5() m.update(url) hash = m.hexdigest() path = "/v2/json/urlinfo/%s" % hash data = self._query(path, host="feeds.delicious.com") if data: urlinfo = {} try: urlinfo = simplejson.loads(data) if urlinfo: urlinfo = urlinfo[0] else: urlinfo = {} except TypeError: pass try: document.title = urlinfo['title'] or u"" except KeyError: pass try: top_tags = urlinfo['top_tags'] or {} if top_tags: document.top_tags = sorted(top_tags.iteritems(), key=itemgetter(1), reverse=True) else: document.top_tags = [] except KeyError: pass try: document.total_bookmarks = int(urlinfo['total_posts']) except (KeyError, ValueError): pass document.bookmarks = self.get_bookmarks(url=url, max_bookmarks=max_bookmarks, sleep_seconds=sleep_seconds) return document
def get_favorites_questions(self, user_id, api_site_parameter, page, **kwargs): return simplejson.loads(QUESTIONS)
def get_user(self, username, password=None, max_bookmarks=50, sleep_seconds=1): """Retrieves a user's bookmarks from Delicious.com. If a correct username AND password are supplied, a user's *full* bookmark collection (which also includes private bookmarks) is retrieved. Data communication is encrypted using SSL in this case. If no password is supplied, only the *public* bookmarks of the user are retrieved. Here, the parameter 'max_bookmarks' specifies how many public bookmarks will be retrieved (default: 50). Set the parameter to 0 to retrieve all public bookmarks. This function can be used to backup all of a user's bookmarks if called with a username and password. @param username: The Delicious.com username. @type username: str @param password: Optional, default: None. The user's Delicious.com password. If password is set, all communication with Delicious.com is SSL-encrypted. @type password: unicode/str @param max_bookmarks: Optional, default: 50. See the documentation of get_bookmarks() for more information as get_url() uses get_bookmarks() to retrieve a url's bookmarking history. The parameter is NOT used when a password is specified because in this case the *full* bookmark collection of a user will be retrieved. @type max_bookmarks: int @param sleep_seconds: Optional, default: 1. See the documentation of get_bookmarks() for more information as get_url() uses get_bookmarks() to retrieve a url's bookmarking history. sleep_seconds must be >= 1 to comply with Delicious.com's Terms of Use. @type sleep_seconds: int @return: DeliciousUser instance """ assert username user = DeliciousUser(username) bookmarks = [] if password: # We have username AND password, so we call # the official Delicious.com API. path = "/v1/posts/all" data = self._query(path, host="api.del.icio.us", use_ssl=True, user=username, password=password) if data: soup = BeautifulSoup(data) elements = soup.findAll("post") for element in elements: url = element["href"] title = element["description"] or u"" comment = element["extended"] or u"" tags = [] if element["tag"]: tags = element["tag"].split() timestamp = datetime.datetime.strptime(element["time"], "%Y-%m-%dT%H:%M:%SZ") bookmarks.append( (url, tags, title, comment, timestamp) ) user.bookmarks = bookmarks else: # We have only the username, so we extract data from # the user's JSON feed. However, the feed is restricted # to the most recent public bookmarks of the user, which # is about 100 if any. So if we need more than 100, we start # scraping the Delicious.com website directly if max_bookmarks > 0 and max_bookmarks <= 100: path = "/v2/json/%s/stackoverflow?count=100" % username data = self._query(path, host="feeds.delicious.com", user=username) if data: posts = [] try: posts = simplejson.loads(data) except TypeError: pass url = timestamp = None title = comment = u"" tags = [] for post in posts: # url try: url = post['u'] except KeyError: pass # title try: title = post['d'] except KeyError: pass # tags try: tags = post['t'] except KeyError: pass if not tags: tags = [u"system:unfiled"] # comment / notes try: comment = post['n'] except KeyError: pass # bookmark creation time try: timestamp = datetime.datetime.strptime(post['dt'], "%Y-%m-%dT%H:%M:%SZ") except KeyError: pass bookmarks.append( (url, tags, title, comment, timestamp) ) user.bookmarks = bookmarks[:max_bookmarks] else: # TODO: retrieve the first 100 bookmarks via JSON before # falling back to scraping the delicous.com website user.bookmarks = self.get_bookmarks(username=username, max_bookmarks=max_bookmarks, sleep_seconds=sleep_seconds) return user
def get_answers(self, question_id, api_site_parameter, **kwargs): if question_id != 0: return simplejson.loads(ANSWERS) else: return simplejson.loads(EMPTY_ANSWERS)