def test_custom_heuristic(self): def request_callback(request, uri, response_headers): return [200, response_headers, json.dumps({"epoch": time.time()})] httpretty.register_uri( httpretty.GET, "https://now.httpbin.org", body=request_callback ) session = CachedSession( fallback_cache_duration=2, file_cache_directory=file_cache_directory, ) # with a 2s retention, and a 1.1s time between requests, 2 of the # request should have the same epoch, where as the 3rd gets fresh data # the first requests gets send at t=0 with freeze_time("2012-01-14 12:00:01") as freezer: response_1 = session.get("https://now.httpbin.org") freezer.tick() response_2 = session.get("https://now.httpbin.org") freezer.tick() response_3 = session.get("https://now.httpbin.org") self.assertEqual(response_1.text, response_2.text) self.assertNotEqual(response_2.text, response_3.text)
def test_default_heuristic(self): def request_callback(request, uri, response_headers): return [200, response_headers, json.dumps({"epoch": time.time()})] httpretty.register_uri( httpretty.GET, "https://now.httpbin.org", body=request_callback ) session = CachedSession(file_cache_directory=file_cache_directory) with freeze_time("2012-01-14 12:00:01") as freezer: response_1 = session.get("https://now.httpbin.org") freezer.tick() freezer.tick() response_2 = session.get("https://now.httpbin.org") freezer.tick() freezer.tick() freezer.tick() response_3 = session.get("https://now.httpbin.org") self.assertEqual(response_1.text, response_2.text) self.assertNotEqual(response_2.text, response_3.text)
def test_redis_cache(self): class FakeConnectionPool: def __init__(self, name): self.name = name # our mock will be called here. Passing a connection_pool with a name # makes sure that we can identify the different redis mocks redis_mock_1 = redis.Redis( connection_pool=FakeConnectionPool(name="test1") ) redis_mock_2 = redis.Redis( connection_pool=FakeConnectionPool(name="test2") ) self.assertNotEqual(redis_mock_1, redis_mock_2) def request_callback(request, uri, response_headers): return [200, response_headers, json.dumps({"epoch": time.time()})] httpretty.register_uri( httpretty.GET, "https://now.httpbin.org", body=request_callback ) with freeze_time("2012-01-14 12:00:01") as freezer: session_1 = CachedSession( redis_connection=redis_mock_1, fallback_cache_duration=500 ) session_2 = CachedSession( redis_connection=redis_mock_2, fallback_cache_duration=1 ) resp_1 = session_1.get("https://now.httpbin.org") resp_2 = session_2.get("https://now.httpbin.org") self.assertNotEqual(resp_1.text, resp_2.text) freezer.tick() resp_3 = session_2.get("https://now.httpbin.org") self.assertNotEqual(resp_2.text, resp_3.text) session_3 = CachedSession( redis_connection=redis_mock_1, fallback_cache_duration=1 ) resp_4 = session_3.get("https://now.httpbin.org") self.assertEqual(resp_1.text, resp_4.text)
def test_timeout_adapter(self): session = CachedSession( timeout=2, file_cache_directory=file_cache_directory ) # this test can be inconsistent on multiple concurrent # runs due to the use of time.sleep with self.assertRaises( ( requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout, ) ): session.get("https://httpbin.org/delay/3") resp = session.get("https://httpbin.org/delay/1") self.assertIsNotNone(resp)
def test_file_cache(self): def request_callback(request, uri, response_headers): return [200, response_headers, json.dumps({"epoch": time.time()})] httpretty.register_uri( httpretty.GET, "https://now.httpbin.org", body=request_callback ) cache_dir_1 = ".test1" cache_dir_2 = ".test2" session_1 = CachedSession( file_cache_directory=cache_dir_1, fallback_cache_duration=2000 ) session_2 = CachedSession(file_cache_directory=cache_dir_2) resp_1 = session_1.get("https://now.httpbin.org") self.assertEqual(os.path.isdir(cache_dir_1), True) resp_2 = session_2.get("https://now.httpbin.org") self.assertEqual(os.path.isdir(cache_dir_2), True) self.assertNotEqual(resp_1.text, resp_2.text) shutil.rmtree(cache_dir_2) self.assertEqual(os.path.isdir(cache_dir_2), False) resp_3 = session_2.get("https://now.httpbin.org") self.assertEqual(os.path.isdir(cache_dir_2), True) self.assertNotEqual(resp_2.text, resp_3.text) session_3 = CachedSession( file_cache_directory=cache_dir_1, fallback_cache_duration=2000 ) resp_4 = session_3.get("https://now.httpbin.org") self.assertEqual(resp_1.text, resp_4.text) shutil.rmtree(cache_dir_1) shutil.rmtree(cache_dir_2)
def test_cache_control_no_cache_overwrites_custom_heuristic(self): def request_callback(request, uri, response_headers): return [200, response_headers, json.dumps({"epoch": time.time()})] httpretty.register_uri( httpretty.GET, "https://now.httpbin.org", body=request_callback, adding_headers={"Cache-Control": "no-cache"}, ) session = CachedSession(file_cache_directory=file_cache_directory) # with no-cache set, no request should be cached, # thus all bodies are different response_1 = session.get("https://now.httpbin.org") response_2 = session.get("https://now.httpbin.org") response_3 = session.get("https://now.httpbin.org") self.assertNotEqual(response_1.text, response_2.text) self.assertNotEqual(response_2.text, response_3.text)
def test_cache_control_max_age_overwrites_custom_heuristic(self): def request_callback(request, uri, response_headers): return [200, response_headers, json.dumps({"epoch": time.time()})] httpretty.register_uri( httpretty.GET, "https://now.httpbin.org", body=request_callback, adding_headers={"Cache-Control": "max-age=2"}, ) session = CachedSession(file_cache_directory=file_cache_directory) with freeze_time("2012-01-14 12:00:01") as freezer: response_1 = session.get("https://now.httpbin.org") freezer.tick() response_2 = session.get("https://now.httpbin.org") freezer.tick() response_3 = session.get("https://now.httpbin.org") self.assertEqual(response_1.text, response_2.text) self.assertNotEqual(response_2.text, response_3.text)
class DiscourseDocs: """ A basic model class for retrieving Documentation content from a Discourse installation through the API """ def __init__(self, base_url, frontpage_id, session_class=CachedSession): """ @param base_url: The Discourse URL (e.g. https://discourse.example.com) @param frontpage_id: The ID of the frontpage topic in Discourse. This topic should also contain the navigation. """ self.base_url = base_url.rstrip("/") self.frontpage_id = frontpage_id self.session = CachedSession(expire_after=300) def get_topic(self, path): """ Retrieve topic object by path """ response = self.session.get(f"{self.base_url}/t/{path}.json", allow_redirects=False) response.raise_for_status() if response.status_code >= 300: raise RedirectFoundError(response=response) return response.json() def parse_topic(self, topic): return { "title": topic["title"], "body_html": topic["post_stream"]["posts"][0]["cooked"], "updated": dateutil.parser.parse( topic["post_stream"]["posts"][0]["updated_at"]), "forum_link": f"{self.base_url}/t/{topic['slug']}/{topic['id']}", "path": f"/t/{topic['slug']}/{topic['id']}", } def get_frontpage(self): # Get topic data topic = self.get_topic(self.frontpage_id) frontpage = self.parse_topic(topic) # Split HTML into nav and body frontpage_html = frontpage["body_html"] frontpage_soup = BeautifulSoup(frontpage_html, features="html.parser") frontpage_splitpoint = frontpage_soup.find(re.compile("^h[1-6]$"), text="Content") content_elements = frontpage_splitpoint.fetchPreviousSiblings() nav_elements = frontpage_splitpoint.fetchNextSiblings() # Update frontpage frontpage["body_html"] = "\n".join(map(str, reversed(content_elements))) nav_html = "\n".join(map(str, nav_elements)) return frontpage, nav_html def get_document(self, path): """ Retrieve and return relevant data about a document: - Title - HTML content - Navigation content """ document, nav_html = self.get_frontpage() if f"/t/{path}" != document["path"]: topic = self.get_topic(path) document = self.parse_topic(topic) return document, nav_html
class DiscourseDocs: """ A basic model class for retrieving Documentation content from a Discourse installation through the API """ def __init__(self, base_url, frontpage_id, session_class=CachedSession): """ @param base_url: The Discourse URL (e.g. https://discourse.example.com) @param frontpage_id: The ID of the frontpage topic in Discourse. This topic should also contain the navigation. """ self.base_url = base_url.rstrip("/") self.frontpage_id = frontpage_id self.session = CachedSession(expire_after=300) def get_topic(self, path): """ Retrieve topic object by path """ response = self.session.get(f"{self.base_url}/t/{path}.json", allow_redirects=False) response.raise_for_status() if response.status_code >= 300: raise RedirectFoundError(response=response) return response.json() def parse_topic(self, topic): return { "title": topic["title"], "body_html": topic["post_stream"]["posts"][0]["cooked"], "updated": dateutil.parser.parse( topic["post_stream"]["posts"][0]["updated_at"]), "forum_link": f"{self.base_url}/t/{topic['slug']}/{topic['id']}", "path": f"/t/{topic['slug']}/{topic['id']}", } def get_frontpage(self): # Get topic data topic = self.get_topic(self.frontpage_id) frontpage = self.parse_topic(topic) # Split HTML into nav and body soup = BeautifulSoup(frontpage["body_html"], features="html.parser") splitpoint = soup.find(re.compile("^h[1-6]$"), text="Content") if splitpoint: body_elements = splitpoint.fetchPreviousSiblings() frontpage["body_html"] = "\n".join( map(str, reversed(body_elements))) nav_elements = splitpoint.fetchNextSiblings() nav_html = "\n".join(map(str, nav_elements)) else: nav_html = ("<p><em>" "Error: Failed to parse navigation from" f' <a href="{frontpage["forum_link"]}">' "the frontpage topic</a>." " Please check the format." "</p></em>") return frontpage, nav_html def process_html(self, html): """ Post-process the HTML output from Discourse to remove 'NOTE TO EDITORS' sections """ soup = BeautifulSoup(html, features="html.parser") notes_to_editors_spans = soup.find_all(text="NOTE TO EDITORS") for span in notes_to_editors_spans: container = span.parent.parent.parent.parent if container.name == 'aside' and 'quote' in container.attrs[ 'class']: container.decompose() return soup.prettify() def get_document(self, path): """ Retrieve and return relevant data about a document: - Title - HTML content - Navigation content """ document, nav_html = self.get_frontpage() if f"/t/{path}" != document["path"]: topic = self.get_topic(path) document = self.parse_topic(topic) document["body_html"] = self.process_html(document["body_html"]) return document, nav_html