Example #1
0
    def test_custom_heuristic(self):
        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET, "https://now.httpbin.org", body=request_callback
        )

        session = CachedSession(
            fallback_cache_duration=2,
            file_cache_directory=file_cache_directory,
        )

        # with a 2s retention, and a 1.1s time between requests, 2 of the
        # request should have the same epoch, where as the 3rd gets fresh data
        # the first requests gets send at t=0

        with freeze_time("2012-01-14 12:00:01") as freezer:
            response_1 = session.get("https://now.httpbin.org")
            freezer.tick()

            response_2 = session.get("https://now.httpbin.org")
            freezer.tick()

            response_3 = session.get("https://now.httpbin.org")

            self.assertEqual(response_1.text, response_2.text)
            self.assertNotEqual(response_2.text, response_3.text)
Example #2
0
    def test_default_heuristic(self):
        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET, "https://now.httpbin.org", body=request_callback
        )

        session = CachedSession(file_cache_directory=file_cache_directory)

        with freeze_time("2012-01-14 12:00:01") as freezer:

            response_1 = session.get("https://now.httpbin.org")
            freezer.tick()
            freezer.tick()

            response_2 = session.get("https://now.httpbin.org")
            freezer.tick()
            freezer.tick()
            freezer.tick()

            response_3 = session.get("https://now.httpbin.org")

            self.assertEqual(response_1.text, response_2.text)
            self.assertNotEqual(response_2.text, response_3.text)
Example #3
0
    def test_redis_cache(self):
        class FakeConnectionPool:
            def __init__(self, name):
                self.name = name

        # our mock will be called here. Passing a connection_pool with a name
        # makes sure that we can identify the different redis mocks

        redis_mock_1 = redis.Redis(
            connection_pool=FakeConnectionPool(name="test1")
        )
        redis_mock_2 = redis.Redis(
            connection_pool=FakeConnectionPool(name="test2")
        )

        self.assertNotEqual(redis_mock_1, redis_mock_2)

        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET, "https://now.httpbin.org", body=request_callback
        )

        with freeze_time("2012-01-14 12:00:01") as freezer:
            session_1 = CachedSession(
                redis_connection=redis_mock_1, fallback_cache_duration=500
            )
            session_2 = CachedSession(
                redis_connection=redis_mock_2, fallback_cache_duration=1
            )

            resp_1 = session_1.get("https://now.httpbin.org")
            resp_2 = session_2.get("https://now.httpbin.org")

            self.assertNotEqual(resp_1.text, resp_2.text)

            freezer.tick()

            resp_3 = session_2.get("https://now.httpbin.org")

            self.assertNotEqual(resp_2.text, resp_3.text)

            session_3 = CachedSession(
                redis_connection=redis_mock_1, fallback_cache_duration=1
            )

            resp_4 = session_3.get("https://now.httpbin.org")

            self.assertEqual(resp_1.text, resp_4.text)
Example #4
0
    def test_timeout_adapter(self):
        session = CachedSession(
            timeout=2, file_cache_directory=file_cache_directory
        )

        # this test can be inconsistent on multiple concurrent
        # runs due to the use of time.sleep
        with self.assertRaises(
            (
                requests.exceptions.ConnectTimeout,
                requests.exceptions.ReadTimeout,
            )
        ):
            session.get("https://httpbin.org/delay/3")

        resp = session.get("https://httpbin.org/delay/1")

        self.assertIsNotNone(resp)
Example #5
0
    def test_file_cache(self):
        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET, "https://now.httpbin.org", body=request_callback
        )

        cache_dir_1 = ".test1"
        cache_dir_2 = ".test2"

        session_1 = CachedSession(
            file_cache_directory=cache_dir_1, fallback_cache_duration=2000
        )
        session_2 = CachedSession(file_cache_directory=cache_dir_2)

        resp_1 = session_1.get("https://now.httpbin.org")

        self.assertEqual(os.path.isdir(cache_dir_1), True)

        resp_2 = session_2.get("https://now.httpbin.org")

        self.assertEqual(os.path.isdir(cache_dir_2), True)
        self.assertNotEqual(resp_1.text, resp_2.text)

        shutil.rmtree(cache_dir_2)

        self.assertEqual(os.path.isdir(cache_dir_2), False)

        resp_3 = session_2.get("https://now.httpbin.org")

        self.assertEqual(os.path.isdir(cache_dir_2), True)
        self.assertNotEqual(resp_2.text, resp_3.text)

        session_3 = CachedSession(
            file_cache_directory=cache_dir_1, fallback_cache_duration=2000
        )

        resp_4 = session_3.get("https://now.httpbin.org")

        self.assertEqual(resp_1.text, resp_4.text)

        shutil.rmtree(cache_dir_1)
        shutil.rmtree(cache_dir_2)
Example #6
0
    def test_cache_control_no_cache_overwrites_custom_heuristic(self):
        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET,
            "https://now.httpbin.org",
            body=request_callback,
            adding_headers={"Cache-Control": "no-cache"},
        )
        session = CachedSession(file_cache_directory=file_cache_directory)

        # with no-cache set, no request should be cached,
        # thus all bodies are different
        response_1 = session.get("https://now.httpbin.org")
        response_2 = session.get("https://now.httpbin.org")
        response_3 = session.get("https://now.httpbin.org")

        self.assertNotEqual(response_1.text, response_2.text)
        self.assertNotEqual(response_2.text, response_3.text)
Example #7
0
    def test_cache_control_max_age_overwrites_custom_heuristic(self):
        def request_callback(request, uri, response_headers):
            return [200, response_headers, json.dumps({"epoch": time.time()})]

        httpretty.register_uri(
            httpretty.GET,
            "https://now.httpbin.org",
            body=request_callback,
            adding_headers={"Cache-Control": "max-age=2"},
        )

        session = CachedSession(file_cache_directory=file_cache_directory)

        with freeze_time("2012-01-14 12:00:01") as freezer:

            response_1 = session.get("https://now.httpbin.org")
            freezer.tick()
            response_2 = session.get("https://now.httpbin.org")
            freezer.tick()
            response_3 = session.get("https://now.httpbin.org")

            self.assertEqual(response_1.text, response_2.text)
            self.assertNotEqual(response_2.text, response_3.text)
Example #8
0
class DiscourseDocs:
    """
    A basic model class for retrieving Documentation content
    from a Discourse installation through the API
    """
    def __init__(self, base_url, frontpage_id, session_class=CachedSession):
        """
        @param base_url: The Discourse URL (e.g. https://discourse.example.com)
        @param frontpage_id: The ID of the frontpage topic in Discourse.
                            This topic should also contain the navigation.
        """

        self.base_url = base_url.rstrip("/")
        self.frontpage_id = frontpage_id
        self.session = CachedSession(expire_after=300)

    def get_topic(self, path):
        """
        Retrieve topic object by path
        """

        response = self.session.get(f"{self.base_url}/t/{path}.json",
                                    allow_redirects=False)
        response.raise_for_status()

        if response.status_code >= 300:
            raise RedirectFoundError(response=response)

        return response.json()

    def parse_topic(self, topic):
        return {
            "title":
            topic["title"],
            "body_html":
            topic["post_stream"]["posts"][0]["cooked"],
            "updated":
            dateutil.parser.parse(
                topic["post_stream"]["posts"][0]["updated_at"]),
            "forum_link":
            f"{self.base_url}/t/{topic['slug']}/{topic['id']}",
            "path":
            f"/t/{topic['slug']}/{topic['id']}",
        }

    def get_frontpage(self):
        # Get topic data
        topic = self.get_topic(self.frontpage_id)
        frontpage = self.parse_topic(topic)

        # Split HTML into nav and body
        frontpage_html = frontpage["body_html"]
        frontpage_soup = BeautifulSoup(frontpage_html, features="html.parser")
        frontpage_splitpoint = frontpage_soup.find(re.compile("^h[1-6]$"),
                                                   text="Content")
        content_elements = frontpage_splitpoint.fetchPreviousSiblings()
        nav_elements = frontpage_splitpoint.fetchNextSiblings()

        # Update frontpage
        frontpage["body_html"] = "\n".join(map(str,
                                               reversed(content_elements)))
        nav_html = "\n".join(map(str, nav_elements))

        return frontpage, nav_html

    def get_document(self, path):
        """
        Retrieve and return relevant data about a document:
        - Title
        - HTML content
        - Navigation content
        """

        document, nav_html = self.get_frontpage()

        if f"/t/{path}" != document["path"]:
            topic = self.get_topic(path)
            document = self.parse_topic(topic)

        return document, nav_html
Example #9
0
class DiscourseDocs:
    """
    A basic model class for retrieving Documentation content
    from a Discourse installation through the API
    """
    def __init__(self, base_url, frontpage_id, session_class=CachedSession):
        """
        @param base_url: The Discourse URL (e.g. https://discourse.example.com)
        @param frontpage_id: The ID of the frontpage topic in Discourse.
                            This topic should also contain the navigation.
        """

        self.base_url = base_url.rstrip("/")
        self.frontpage_id = frontpage_id
        self.session = CachedSession(expire_after=300)

    def get_topic(self, path):
        """
        Retrieve topic object by path
        """

        response = self.session.get(f"{self.base_url}/t/{path}.json",
                                    allow_redirects=False)
        response.raise_for_status()

        if response.status_code >= 300:
            raise RedirectFoundError(response=response)

        return response.json()

    def parse_topic(self, topic):
        return {
            "title":
            topic["title"],
            "body_html":
            topic["post_stream"]["posts"][0]["cooked"],
            "updated":
            dateutil.parser.parse(
                topic["post_stream"]["posts"][0]["updated_at"]),
            "forum_link":
            f"{self.base_url}/t/{topic['slug']}/{topic['id']}",
            "path":
            f"/t/{topic['slug']}/{topic['id']}",
        }

    def get_frontpage(self):
        # Get topic data
        topic = self.get_topic(self.frontpage_id)
        frontpage = self.parse_topic(topic)

        # Split HTML into nav and body
        soup = BeautifulSoup(frontpage["body_html"], features="html.parser")
        splitpoint = soup.find(re.compile("^h[1-6]$"), text="Content")

        if splitpoint:
            body_elements = splitpoint.fetchPreviousSiblings()
            frontpage["body_html"] = "\n".join(
                map(str, reversed(body_elements)))

            nav_elements = splitpoint.fetchNextSiblings()
            nav_html = "\n".join(map(str, nav_elements))
        else:
            nav_html = ("<p><em>"
                        "Error: Failed to parse navigation from"
                        f' <a href="{frontpage["forum_link"]}">'
                        "the frontpage topic</a>."
                        " Please check the format."
                        "</p></em>")

        return frontpage, nav_html

    def process_html(self, html):
        """
        Post-process the HTML output from Discourse to
        remove 'NOTE TO EDITORS' sections
        """

        soup = BeautifulSoup(html, features="html.parser")
        notes_to_editors_spans = soup.find_all(text="NOTE TO EDITORS")

        for span in notes_to_editors_spans:
            container = span.parent.parent.parent.parent

            if container.name == 'aside' and 'quote' in container.attrs[
                    'class']:
                container.decompose()

        return soup.prettify()

    def get_document(self, path):
        """
        Retrieve and return relevant data about a document:
        - Title
        - HTML content
        - Navigation content
        """

        document, nav_html = self.get_frontpage()

        if f"/t/{path}" != document["path"]:
            topic = self.get_topic(path)
            document = self.parse_topic(topic)

        document["body_html"] = self.process_html(document["body_html"])

        return document, nav_html