def iso_to_dt(date): seq = (int(date[:4]), int(date[5:7]), int(date[8:10]), int(date[11:13]), int(date[14:16]), int(date[17:19]), 0, 1, -1) return date_str( datetime.fromtimestamp( time.mktime( time.struct_time(seq)), tzutc()))
def get_memento(self, req_uri, accept_datetime): timestamp = date_str(accept_datetime, self.TIMESTAMPFMT) params = { 'rvlimit': 1, # Only need one 'rvstart': timestamp, # Start listing from here 'rvdir': 'older' # List in decreasing order } # Finds the API and title using scraping api_base_uri = None try: dom = self.get_xml(req_uri, html=True) links = dom.xpath("//link") for link in links: if link.attrib['rel'].lower() == "edituri": api_base_uri = link.attrib['href'].split("?")[0] if api_base_uri.startswith("//"): api_base_uri = api_base_uri.replace("//", "http://") parsed_url = urlparse.urlparse(req_uri) try: title = urlparse.parse_qs(parsed_url[4])['title'][0] except Exception as e: title = parsed_url.path.split('/')[-1] logging.debug( "Mediawiki handler: API found: %s, page title parsed to: %s " % (api_base_uri, title)) if not title: raise HandlerError("Cannot find Title", 404) if not api_base_uri: raise HandlerError("Cannot find mediawiki API on page", 404) else: title = urllib2.unquote(title) except HandlerError as he: raise he except Exception as e: logging.error( "MediaWikiHandler: querying and parsing page for title/api " "%s. handler will return empty response" % e ) return None base_uri = api_base_uri.replace("api.php", "index.php") return self.query(req_uri, params, title, api_base_uri, base_uri)
def get_memento(self, req_uri, accept_datetime): timestamp = date_str(accept_datetime, self.TIMESTAMPFMT) params = { 'rvlimit': 1, # Only need one 'rvstart': timestamp, # Start listing from here 'rvdir': 'older' # List in decreasing order } # Finds the API and title using scraping api_base_uri = None try: dom = self.get_xml(req_uri, html=True) links = dom.xpath("//link") for link in links: if link.attrib['rel'].lower() == "edituri": api_base_uri = link.attrib['href'].split("?")[0] if api_base_uri.startswith("//"): api_base_uri = api_base_uri.replace("//", "http://") parsed_url = urlparse.urlparse(req_uri) try: title = urlparse.parse_qs(parsed_url[4])['title'][0] except Exception as e: title = parsed_url.path.split('/')[-1] logging.debug( "Mediawiki handler: API found: %s, page title parsed to: %s " % (api_base_uri, title)) if not title: raise HandlerError("Cannot find Title", 404) if not api_base_uri: raise HandlerError("Cannot find mediawiki API on page", 404) else: title = urllib2.unquote(title) except HandlerError as he: raise he except Exception as e: logging.error( "MediaWikiHandler: querying and parsing page for title/api %s." " Handler will return empty response." % e) return None base_uri = api_base_uri.replace("api.php", "index.php") # The best Memento memento = self.query(req_uri, params, title, api_base_uri, base_uri)[0] # The first Memento if title in self.inner_cache and memento: logging.debug("Wiki Handler: found cached first for " + title) first = self.inner_cache[title] else: logging.debug("Wiki Handler: Querying first for " + title) first_params = { 'rvlimit': 1, # Only need one 'rvstart': '19900101000000', # Start listing from 1990 'rvdir': 'newer' # List in increasing order } first = self.query(req_uri, first_params, title, api_base_uri, base_uri)[0] if len(self.inner_cache) > self.max_inner_cache_size: self.inner_cache = {} self.inner_cache[title] = first # This handler returns more than only the best Memento. # A Link with rel="first memento" will also be returned to the client. return [first, memento]
def get_memento(self, req_uri, accept_datetime): timestamp = date_str(accept_datetime, self.TIMESTAMPFMT) params = { "rvlimit": 1, # Only need one "rvstart": timestamp, # Start listing from here "rvdir": "older", # List in decreasing order } # Finds the API and title using scraping api_base_uri = None try: dom = self.get_xml(req_uri, html=True) links = dom.xpath("//link") for link in links: if link.attrib["rel"].lower() == "edituri": api_base_uri = link.attrib["href"].split("?")[0] if api_base_uri.startswith("//"): api_base_uri = api_base_uri.replace("//", "http://") parsed_url = urlparse.urlparse(req_uri) try: title = urlparse.parse_qs(parsed_url[4])["title"][0] except Exception as e: title = parsed_url.path.split("/")[-1] logging.debug("Mediawiki handler: API found: %s, page title parsed to: %s " % (api_base_uri, title)) if not title: raise HandlerError("Cannot find Title", 404) if not api_base_uri: raise HandlerError("Cannot find mediawiki API on page", 404) else: title = urllib2.unquote(title) except HandlerError as he: raise he except Exception as e: logging.error( "MediaWikiHandler: querying and parsing page for title/api %s." " Handler will return empty response." % e ) return None base_uri = api_base_uri.replace("api.php", "index.php") # The best Memento memento = self.query(req_uri, params, title, api_base_uri, base_uri)[0] # The first Memento if title in self.inner_cache and memento: logging.debug("Wiki Handler: found cached first for " + title) first = self.inner_cache[title] else: logging.debug("Wiki Handler: Querying first for " + title) first_params = { "rvlimit": 1, # Only need one "rvstart": "19900101000000", # Start listing from 1990 "rvdir": "newer", # List in increasing order } first = self.query(req_uri, first_params, title, api_base_uri, base_uri)[0] if len(self.inner_cache) > self.max_inner_cache_size: self.inner_cache = {} self.inner_cache[title] = first # This handler returns more than only the best Memento. # A Link with rel="first memento" will also be returned to the client. return [first, memento]