Beispiel #1
0
 def crawl_detail(self, manga: Manga) -> None:
     start_url = manga.url
     response = requests.get(start_url)
     if response.status_code == 200:
         tree = html.fromstring(response.content)
         for element in tree.xpath(self.re_chapter_path):
             title = str(element.xpath('text()')[0]).strip().replace('\t', ' ')
             url = urljoin(self.base_url, str(element.xpath('@href')[0]))
             chapter = Chapter(manga, title)
             chapter.url = url
             manga.add_chapter(chapter)
     else:
         raise ConnectionError(_(F'Could not connect with {start_url} site, status code: {response.status_code}'))
Beispiel #2
0
    def test_make_pdf2(self):
        """
        Make pdf from pages in memory - simulated by manually added pages
        """
        directory_name = os.path.dirname(__file__)
        dummy_manga_site = MangaSite('test_site')
        dummy_manga = Manga('test_manga', 'test_manga_url', dummy_manga_site)
        dummy_chapter = Chapter(dummy_manga, 'test_chapter_title')

        self.assertTrue(dummy_chapter.number_of_pages() == 0)

        source_images_dir = os.path.join(directory_name, 'images')
        test_tmp_dir = os.path.join(directory_name, RESULTS_DIRECTORY)
        for file in os.listdir(source_images_dir):
            file_path = os.path.join(source_images_dir, file)
            if os.path.isfile(file_path):
                with open(file_path, 'rb') as f:
                    dummy_chapter.add_page(f.read())
        result, path_to_pdf = dummy_chapter.make_pdf(test_tmp_dir)

        self.assertTrue(result)
        self.assertTrue(dummy_chapter.number_of_pages() == 4)
        self.assertTrue(os.path.isfile(path_to_pdf))
        self.assertTrue(os.path.getsize(path_to_pdf) > 0)
        os.unlink(path_to_pdf)
        shutil.rmtree(test_tmp_dir, ignore_errors=True)
Beispiel #3
0
    def test_make_pdf1(self):
        """
        Make pdf from previously downloaded images - simulated on copied files
        """
        directory_name = os.path.dirname(__file__)
        dummy_manga_site = MangaSite('test_site')
        dummy_manga = Manga('test_manga', 'test_manga_url', dummy_manga_site)
        dummy_chapter = Chapter(dummy_manga, 'test_chapter_title')

        self.assertTrue(dummy_chapter.number_of_pages() == 0)

        source_images_dir = os.path.join(directory_name, 'images')
        test_tmp_dir = os.path.join(directory_name, RESULTS_DIRECTORY)
        images_dir = dummy_chapter.get_download_path(test_tmp_dir)
        os.makedirs(images_dir, exist_ok=True)
        for file in os.listdir(source_images_dir):
            file_path = os.path.join(source_images_dir, file)
            if os.path.isfile(file_path):
                shutil.copy(file_path, images_dir)
        result, path_to_pdf = dummy_chapter.make_pdf(test_tmp_dir)

        self.assertTrue(result)
        self.assertTrue(dummy_chapter.number_of_pages() == 0)
        self.assertTrue(os.path.isfile(path_to_pdf))
        self.assertTrue(os.path.getsize(path_to_pdf) > 0)
        os.unlink(path_to_pdf)
        shutil.rmtree(test_tmp_dir, ignore_errors=True)
Beispiel #4
0
 def crawl_detail(self, manga: Manga) -> None:
     start_url = manga.url
     try:
         with SeleniumDriver() as driver:
             driver.get(start_url)
             wait_for_page(driver, self.re_chapter_path)
             content = driver.find_element_by_xpath('//*').get_attribute('outerHTML')
             tree = html.fromstring(content)
             # crawl for manga chapters
             for element in tree.xpath(self.re_chapter_path):
                 title = str(element.xpath('text()')[0]).strip().replace('\t', ' ')
                 url = urljoin(self.base_url, str(element.xpath('@href')[0]))
                 chapter = Chapter(manga, title)
                 chapter.url = url
                 manga.add_chapter(chapter)
     except Exception as e:
         raise ConnectionError(_(F'Could not connect with {start_url} site, error message: {e}'))
Beispiel #5
0
    def test_manga_1(self):
        dummy_manga_site = MangaSite('test_site')
        dummy_manga1 = Manga('test_manga', 'test_manga_url', dummy_manga_site)

        self.assertTrue(dummy_manga1.title == 'test_manga')
        dummy_manga1.downloaded = True
        self.assertTrue(dummy_manga1.downloaded)

        dummy_chapter1 = Chapter(dummy_manga1)
        dummy_manga1.add_chapter(dummy_chapter1)
        self.assertTrue(len(dummy_manga1.chapters) > 0)
        dummy_manga1.clear_state()
        self.assertFalse(dummy_manga1.downloaded)
        self.assertTrue(len(dummy_manga1.chapters) == 0)
        try:
            dummy_manga1.get_download_path(os.getcwd())
        except Exception as e:
            self.assertIsInstance(e, AttributeError)
            self.assertTrue("NoneType" in str(e))
        dump = dummy_manga1.dump()
        self.assertIsInstance(dump, bytes)
        self.assertTrue(len(dump) > 0)
Beispiel #6
0
    def test_manga_site_1(self):
        dummy_manga_site1 = MangaSite()
        self.assertIsNone(dummy_manga_site1.site_name)

        dummy_manga_site2 = MangaSite('test_site')
        dummy_manga1 = Manga('test_manga', 'test_manga_url', dummy_manga_site2)

        dummy_manga_site2.add_manga(dummy_manga1)
        self.assertTrue(len(dummy_manga_site2.mangas) > 0)
        dummy_manga_site2.clear_state()
        self.assertTrue(len(dummy_manga_site2.mangas) == 0)
        dump = dummy_manga_site2.dump()
        self.assertIsInstance(dump, bytes)
        self.assertTrue(len(dump) > 0)
Beispiel #7
0
 def crawl_index(self, manga_site: MangaSite) -> None:
     start_url = urljoin(self.base_url, self.manga_index)
     response = requests.get(start_url)
     if response.status_code == 200:
         manga_site.url = self.base_url
         tree = html.fromstring(response.content)
         for element in tree.xpath(self.re_index_path):
             title = str(element.xpath('text()')[0]).strip().replace('\t', ' ')
             url = urljoin(self.base_url, str(element.xpath('@href')[0]))
             manga = Manga(title, url, manga_site)
             manga_site.add_manga(manga)
     else:
         raise ConnectionError(
             _(F'Could not connect with {start_url} site, status code: {response.status_code}'))
Beispiel #8
0
    def test_chapter_1(self):
        dummy_manga_site = MangaSite('test_site')
        dummy_manga = Manga('test_manga', 'test_manga_url', dummy_manga_site)

        dummy_chapter1 = Chapter(dummy_manga)
        self.assertTrue(dummy_chapter1.number_of_pages() == 0)
        self.assertIsNone(dummy_chapter1.title)

        dummy_chapter2 = Chapter(dummy_manga, 'test_chapter_title')
        self.assertTrue(dummy_chapter2.title == 'test_chapter_title')
        dummy_chapter2.set_downloaded(True)
        self.assertTrue(dummy_chapter2.downloaded)
        self.assertTrue(dummy_chapter2.in_memory)
        dummy_chapter2.clear_state()
        self.assertFalse(dummy_chapter2.downloaded)
        self.assertFalse(dummy_chapter2.in_memory)
        try:
            dummy_chapter2.get_download_path(os.getcwd())
        except Exception as e:
            self.assertIsInstance(e, AttributeError)
            self.assertTrue("NoneType" in str(e))
        dump = dummy_chapter2.dump()
        self.assertIsInstance(dump, bytes)
        self.assertTrue(len(dump) > 0)
Beispiel #9
0
 def crawl_index(self, manga_site: MangaSite) -> None:
     start_url = urljoin(self.base_url, self.manga_index)
     try:
         with SeleniumDriver() as driver:
             collected_all_pages = False
             driver.get(start_url)
             wait_for_page(driver, self.re_index_path)
             manga_site.url = self.base_url
             while collected_all_pages is False:
                 content = driver.find_element_by_xpath('//*').get_attribute('outerHTML')
                 tree = html.fromstring(content)
                 for element in tree.xpath(self.re_index_path):
                     title = str(element.xpath('text()')[0]).strip().replace('\t', ' ')
                     url = urljoin(self.base_url, str(element.xpath('@href')[0]))
                     manga = Manga(title, url, manga_site)
                     manga_site.add_manga(manga)
                 for element2 in tree.xpath(self.re_index_next_page):
                     if 'Next'.lower() in element2.xpath('text()')[0].lower():
                         driver.get(urljoin(self.base_url, element2.xpath('@href')[0]))
                         break
                 else:
                     collected_all_pages = True
     except Exception as e:
         raise ConnectionError(_(F'Could not connect with {start_url} site, error message: {e}'))