def get_html(url): # type: (str) -> Optional[BeautifulSoup] """Gets cached or live HTML from the url""" headers = {"Accept": "text/html", "Accept-encoding": "gzip"} with Cache() as c: cached = c.get(url) is_info = url.startswith(MIA_INFO_URI) if cached: # Always return cached info pages... if cached["fresh"] or is_info: return BeautifulSoup(cached["blob"], "html.parser") headers.update(conditional_headers(cached)) r = requests.get(url, headers=headers, timeout=SEARCH_TIMEOUT) if 200 == r.status_code: # Always cache info pages without any query string headers = None if is_info else r.headers url = url.split("?")[0] if is_info else url soup = BeautifulSoup(r.content, "html.parser") c.set(url, r.content, headers) return soup if 304 == r.status_code: c.touch(url, r.headers) return BeautifulSoup(cached["blob"], "html.parser") logger.debug("get_html error: {} {}".format(r.status_code, url)) return None
def callAPI(self, endpoint, method="GET", api_ver=2, params=None, data=None): if int(api_ver) == 1: complete_url = 'https://f1tv.formula1.com' + endpoint elif int(api_ver) == 2: complete_url = 'https://f1tv-api.formula1.com/agl/1.0/gbr/en/all_devices/global/' + endpoint else: xbmc.log( "Unable to make an API with invalid API version: {}".format( api_ver), xbmc.LOGERROR) return if method.upper() == 'GET': # Check to see if we've cached the response with Cache() as c: if params: url_with_parameters = "{complete_url}?{parameters}".format( complete_url=complete_url, parameters=urllib.urlencode(params)) else: url_with_parameters = complete_url cached = c.get(url_with_parameters) if cached: # If we have a fresh cached version, return it. if cached["fresh"]: return json.loads(cached["blob"]) # otherwise append applicable "If-None-Match"/"If-Modified-Since" headers self.account_manager.getSession().headers.update( conditional_headers(cached)) # request a new version of the data r = self.account_manager.getSession().get(complete_url, params=params, data=data) if 200 == r.status_code: # add the new data and headers to the cache c.set(url_with_parameters, r.content, r.headers) return r.json() if 304 == r.status_code: # the data hasn't been modified so just touch the cache with the new headers # and return the existing data c.touch(url_with_parameters, r.headers) return json.loads(cached["blob"]) elif method.upper() == 'POST': r = self.account_manager.getSession().post(complete_url, params=params, data=data) if r.ok: return r.json() else: return else: return
def get_json(url): # type: (str) -> dict """Gets cached or live JSON from the url""" headers = {"Accept": "application/json", "Accept-encoding": "gzip"} with Cache() as c: cached = c.get(url) if cached: headers.update(conditional_headers(cached)) if cached["fresh"]: return json.loads(cached["blob"]) r = requests.get(url, headers=headers, timeout=SEARCH_TIMEOUT) if 200 == r.status_code: c.set(url, r.json(), r.headers) return r.json() elif 304 == r.status_code: c.touch(url, r.headers) return json.loads(cached["blob"])
def get_json(url): """Gets cached or live JSON from the url""" headers = {"Accept": "text/html", "Accept-encoding": "gzip"} with Cache() as c: cached = c.get(url) if cached: if cached["fresh"]: return cached["blob"] headers.update(conditional_headers(cached)) r = requests.get(url, headers=headers, timeout=SEARCH_TIMEOUT) if 200 == r.status_code: c.set(url, r.json(), r.headers) return r.json() if 304 == r.status_code: c.touch(url, r.headers) return cached["blob"] logger.debug("get_json error: {} {}".format(r.status_code, url)) return None
def get_html(uri): # type: (str) -> Optional[BeautifulSoup] """Gets cached or live HTML from the url""" headers = {"Accept": "text/html", "Accept-encoding": "gzip"} with Cache() as c: cached = c.get(uri) if cached: # Always return cached info pages... if cached["fresh"]: return BeautifulSoup(cached["blob"], "html.parser") headers.update(conditional_headers(cached)) r = requests.get(uri, headers=headers, timeout=SEARCH_TIMEOUT) if 200 == r.status_code: soup = BeautifulSoup(r.content, "html.parser") c.set(uri, r.content, r.headers) return soup elif 304 == r.status_code: c.touch(uri, r.headers) return BeautifulSoup(cached["blob"], "html.parser") logger.debug("get_html error: {} {}".format(r.status_code, uri))
def get_html(uri): # type: (str) -> Union[BeautifulSoup, None] """Gets cached or live HTML from the url""" headers = {"Accept": "text/html", "Accept-encoding": "gzip"} with Cache() as c: cached = c.get(uri) if cached: headers.update(conditional_headers(cached)) if cached["fresh"]: return BeautifulSoup(cached["blob"], "html.parser") r = requests.get(uri, headers=headers, timeout=SEARCH_TIMEOUT) if 200 == r.status_code: strainer = SoupStrainer("div", {"id": "main-container"}) soup = BeautifulSoup(r.content, "html.parser", parse_only=strainer) c.set(uri, str(soup), r.headers) return soup if 304 == r.status_code: c.touch(uri, r.headers) return BeautifulSoup(cached["blob"], "html.parser") return None
def get_html(url): # type: (str) -> BeautifulSoup """Gets cached or live HTML from the url""" headers = {"Accept": "text/html", "Accept-encoding": "gzip"} with Cache() as c: cached = c.get(url) if cached: headers.update(conditional_headers(cached)) if cached["fresh"]: return BeautifulSoup(cached["blob"], "html.parser") r = requests.get(url, headers=headers, timeout=SEARCH_TIMEOUT) if 200 == r.status_code: soup = BeautifulSoup(r.content, "html.parser") # pre-cache clean-up for x in soup(["script", "style"]): x.extract() c.set(url, r.content, r.headers) return soup elif 304 == r.status_code: c.touch(url, r.headers) return BeautifulSoup(cached["blob"], "html.parser")