Ejemplo n.º 1
0
    def test_get(self, requests_mock):
        config.config["force_fetch"] = False
        requests_mock.get(
            "https://example.com/1",
            text="example 1",
        )

        request_dispatcher.get("https://example.com/1")
        request_dispatcher.get("https://example.com/1")

        assert requests_mock.call_count == 1

        with open(
                request_dispatcher.cache_path_for_url(
                    "https://example.com/1")) as f:
            assert f.read() == "example 1"
Ejemplo n.º 2
0
    def fetch_chapters_info(self):
        navigation_page_url = (
            f"https://www.archiveofourown.org/works/{self.ao3_id}/navigate")
        navigation_page_html = request_dispatcher.get(navigation_page_url)
        ao3_navigation_page = AO3NavigationPage(navigation_page_html)

        return ao3_navigation_page.chapters
Ejemplo n.º 3
0
 def threadmarks_categories(self):
     # We can get the threadmarks categories from any thread page, but there is no page that is guaranteed to be included in all fetcher modes.
     # So, just get them from the threadmarks index page for the default category
     url = self.threadmarks_index_url()
     print(f"Getting threadmarks categories from {url}")
     html = request_dispatcher.get(url)
     dom = bs4.BeautifulSoup(html, "lxml")
     a_elems = dom.select(".block-tabHeader--threadmarkCategoryTabs a")
     return {
         a_elem.text.strip().lower(): self.threadmark_category_number(a_elem)
         for a_elem in a_elems
     }
Ejemplo n.º 4
0
    def generate_ffnet_chapters(self):
        for n in itertools.count(1):
            chapter_url = self.generate_chapter_url(n)

            print(f"Fetching chapter {n} ({chapter_url})")
            chapter_html = request_dispatcher.get(chapter_url)

            if config.get("comments") != "none":
                reviews_url = self.generate_chapter_reviews_url(n)
                print(f"Fetching chapter {n} reviews ({reviews_url})")
                reviews_html = request_dispatcher.get(reviews_url)
            else:
                reviews_html = ""

            ffnet_chapter = FFNetChapter(str(chapter_html), str(reviews_html))

            print("OK")
            yield ffnet_chapter

            if (ffnet_chapter.is_last_chapter()
                    or ffnet_chapter.is_single_chapter_story()):
                print("Done")
                return
Ejemplo n.º 5
0
    def generate_rr_chapters(self):
        for chapter_url in self.story_index.chapter_urls:
            print(f"Fetching chapter {chapter_url}")
            chapter_html = request_dispatcher.get(chapter_url)

            if config.get("comments") != "none":
                comments_html = ""  # TODO
            else:
                comments_html = ""

            rr_chapter = RoyalroadChapter(str(chapter_html),
                                          str(comments_html))

            print("OK")
            yield rr_chapter
Ejemplo n.º 6
0
    def generate_threadmarked_posts(self, *, category):
        print(f"Fetching {category}")
        category_id = self.threadmarks_categories[category]
        url = self.threadmarks_reader_url(category_id)
        while True:
            print(f"Fetching posts from page {url}")
            html = request_dispatcher.get(url)
            page = SpacebattlesPage(bs4.BeautifulSoup(html, "lxml"))

            for message in page.messages:
                yield SpacebattlesPost(message)

            url = page.next_page_url
            if not url:
                print("Done. Reached end of last page")
                return
Ejemplo n.º 7
0
    def generate_ao3_chapters(self):
        for chapter_info in self.fetch_chapters_info():
            chapter_url = chapter_info.url

            print(f"Fetching chapter {chapter_url}")
            chapter_html = request_dispatcher.get(chapter_url)

            if config.get("comments") != "none":
                comments_html = ""  # TODO
            else:
                comments_html = ""

            ao3_chapter = AO3Chapter(str(chapter_html),
                                     str(comments_html),
                                     date_published=chapter_info.date)

            print("OK")
            yield ao3_chapter
Ejemplo n.º 8
0
    def generate_next_entries(self, start_url):
        """Generate WordpressEntry objects by following "next" links."""
        chapter_url = start_url
        while True:
            print(f"Fetching chapter {chapter_url}")
            entry_html = request_dispatcher.get(chapter_url)
            entry = WordpressEntry(entry_html)
            print("OK")

            yield entry

            if self.is_last_chapter_url(chapter_url):
                print("Done. Matched last_chapter pattern.")
                return

            chapter_url = entry.next_url
            if not chapter_url:
                print("Done. Could not find a `next` link.")
                return
Ejemplo n.º 9
0
    def story_index(self):
        index_page_html = request_dispatcher.get(
            f"https://www.royalroad.com/fiction/{self.rr_id}")

        return RoyalroadIndexPage(index_page_html)