Example #1
0
 def download(self, chapter: Chapter) -> int:
     start_url = chapter.url
     url = start_url
     chapter.clear_state()
     retrieved_all_pages = False
     while not retrieved_all_pages:
         response = requests.get(url)
         if response.status_code == 200:
             tree = html.fromstring(response.content)
             image_src = str(tree.xpath(self.re_download_path)[0])
             image = requests.get(image_src, stream=True).content
             chapter.add_page(image)
             nav_next = str(tree.xpath(self.re_download_next_path)[0])
             if nav_next.startswith('/'):
                 nav_next = urljoin(self.base_url, nav_next)
             if start_url in nav_next:
                 # next button navigates to next page of a chapter
                 url = nav_next
             else:
                 # next button navigates to next chapter
                 retrieved_all_pages = True
         else:
             raise ConnectionError(
                 _(F'Could not connect with {start_url} site, status code: {response.status_code}'))
     return chapter.number_of_pages()
Example #2
0
 def crawl_detail(self, manga: Manga) -> None:
     start_url = manga.url
     response = requests.get(start_url)
     if response.status_code == 200:
         tree = html.fromstring(response.content)
         for element in tree.xpath(self.re_chapter_path):
             title = str(element.xpath('text()')[0]).strip().replace('\t', ' ')
             url = urljoin(self.base_url, str(element.xpath('@href')[0]))
             chapter = Chapter(manga, title)
             chapter.url = url
             manga.add_chapter(chapter)
     else:
         raise ConnectionError(_(F'Could not connect with {start_url} site, status code: {response.status_code}'))
Example #3
0
 def download(self, chapter: Chapter) -> int:
     start_url = chapter.url
     try:
         with SeleniumDriver() as driver:
             driver.get(start_url)
             wait_for_page(driver, self.re_download_path)
             chapter.clear_state()
             content = driver.find_element_by_xpath('//*').get_attribute('outerHTML')
             tree = html.fromstring(content)
             for element in tree.xpath(self.re_download_path):
                 image_src = str(element.xpath('@src')[0])
                 image = requests.get(image_src, stream=True).content
                 chapter.add_page(image)
         return chapter.number_of_pages()
     except Exception as e:
         raise ConnectionError(_(F'Could not connect with {start_url} site, error message: {e}'))
Example #4
0
 def crawl_detail(self, manga: Manga) -> None:
     start_url = manga.url
     try:
         with SeleniumDriver() as driver:
             driver.get(start_url)
             wait_for_page(driver, self.re_chapter_path)
             content = driver.find_element_by_xpath('//*').get_attribute('outerHTML')
             tree = html.fromstring(content)
             # crawl for manga chapters
             for element in tree.xpath(self.re_chapter_path):
                 title = str(element.xpath('text()')[0]).strip().replace('\t', ' ')
                 url = urljoin(self.base_url, str(element.xpath('@href')[0]))
                 chapter = Chapter(manga, title)
                 chapter.url = url
                 manga.add_chapter(chapter)
     except Exception as e:
         raise ConnectionError(_(F'Could not connect with {start_url} site, error message: {e}'))
Example #5
0
    def test_make_pdf2(self):
        """
        Make pdf from pages in memory - simulated by manually added pages
        """
        directory_name = os.path.dirname(__file__)
        dummy_manga_site = MangaSite('test_site')
        dummy_manga = Manga('test_manga', 'test_manga_url', dummy_manga_site)
        dummy_chapter = Chapter(dummy_manga, 'test_chapter_title')

        self.assertTrue(dummy_chapter.number_of_pages() == 0)

        source_images_dir = os.path.join(directory_name, 'images')
        test_tmp_dir = os.path.join(directory_name, RESULTS_DIRECTORY)
        for file in os.listdir(source_images_dir):
            file_path = os.path.join(source_images_dir, file)
            if os.path.isfile(file_path):
                with open(file_path, 'rb') as f:
                    dummy_chapter.add_page(f.read())
        result, path_to_pdf = dummy_chapter.make_pdf(test_tmp_dir)

        self.assertTrue(result)
        self.assertTrue(dummy_chapter.number_of_pages() == 4)
        self.assertTrue(os.path.isfile(path_to_pdf))
        self.assertTrue(os.path.getsize(path_to_pdf) > 0)
        os.unlink(path_to_pdf)
        shutil.rmtree(test_tmp_dir, ignore_errors=True)
Example #6
0
    def test_make_pdf1(self):
        """
        Make pdf from previously downloaded images - simulated on copied files
        """
        directory_name = os.path.dirname(__file__)
        dummy_manga_site = MangaSite('test_site')
        dummy_manga = Manga('test_manga', 'test_manga_url', dummy_manga_site)
        dummy_chapter = Chapter(dummy_manga, 'test_chapter_title')

        self.assertTrue(dummy_chapter.number_of_pages() == 0)

        source_images_dir = os.path.join(directory_name, 'images')
        test_tmp_dir = os.path.join(directory_name, RESULTS_DIRECTORY)
        images_dir = dummy_chapter.get_download_path(test_tmp_dir)
        os.makedirs(images_dir, exist_ok=True)
        for file in os.listdir(source_images_dir):
            file_path = os.path.join(source_images_dir, file)
            if os.path.isfile(file_path):
                shutil.copy(file_path, images_dir)
        result, path_to_pdf = dummy_chapter.make_pdf(test_tmp_dir)

        self.assertTrue(result)
        self.assertTrue(dummy_chapter.number_of_pages() == 0)
        self.assertTrue(os.path.isfile(path_to_pdf))
        self.assertTrue(os.path.getsize(path_to_pdf) > 0)
        os.unlink(path_to_pdf)
        shutil.rmtree(test_tmp_dir, ignore_errors=True)
Example #7
0
 def download(self, chapter: Chapter) -> int:
     start_url = chapter.url
     url = start_url
     chapter.clear_state()
     with requests.Session() as s:
         response = s.get(url)
         if response.status_code == 200:
             tree = html.fromstring(response.content)
             pages_count = len(tree.xpath('/html/body/div[2]/div[4]/div/div/span/select[2]/option'))
             page_url_start = str(tree.xpath(self.re_download_path)[0])
             image_ext = page_url_start[page_url_start.rfind('.'):]
             page_url_start = page_url_start[:page_url_start.rfind('.')]
             digits_for_page_number = len(page_url_start.split('-')[-1])
             page_url_start = page_url_start[:page_url_start.rfind('-')]
             for page_number in range(1, 1 + pages_count):
                 image = s.get(F'{page_url_start}-{page_number:0{digits_for_page_number}}{image_ext}',
                               stream=True).content
                 chapter.add_page(image)
         else:
             raise ConnectionError(
                 _(F'Could not connect with {start_url} site, status code: {response.status_code}'))
     return chapter.number_of_pages()
Example #8
0
    def test_manga_1(self):
        dummy_manga_site = MangaSite('test_site')
        dummy_manga1 = Manga('test_manga', 'test_manga_url', dummy_manga_site)

        self.assertTrue(dummy_manga1.title == 'test_manga')
        dummy_manga1.downloaded = True
        self.assertTrue(dummy_manga1.downloaded)

        dummy_chapter1 = Chapter(dummy_manga1)
        dummy_manga1.add_chapter(dummy_chapter1)
        self.assertTrue(len(dummy_manga1.chapters) > 0)
        dummy_manga1.clear_state()
        self.assertFalse(dummy_manga1.downloaded)
        self.assertTrue(len(dummy_manga1.chapters) == 0)
        try:
            dummy_manga1.get_download_path(os.getcwd())
        except Exception as e:
            self.assertIsInstance(e, AttributeError)
            self.assertTrue("NoneType" in str(e))
        dump = dummy_manga1.dump()
        self.assertIsInstance(dump, bytes)
        self.assertTrue(len(dump) > 0)
Example #9
0
 def crawl_chapter(self, chapter: Chapter):
     site = self.cwd_site
     crawler = self.__get_crawler(site.site_name)
     if crawler:
         crawler.download(chapter)
         chapter.set_downloaded(True)
Example #10
0
    def test_chapter_1(self):
        dummy_manga_site = MangaSite('test_site')
        dummy_manga = Manga('test_manga', 'test_manga_url', dummy_manga_site)

        dummy_chapter1 = Chapter(dummy_manga)
        self.assertTrue(dummy_chapter1.number_of_pages() == 0)
        self.assertIsNone(dummy_chapter1.title)

        dummy_chapter2 = Chapter(dummy_manga, 'test_chapter_title')
        self.assertTrue(dummy_chapter2.title == 'test_chapter_title')
        dummy_chapter2.set_downloaded(True)
        self.assertTrue(dummy_chapter2.downloaded)
        self.assertTrue(dummy_chapter2.in_memory)
        dummy_chapter2.clear_state()
        self.assertFalse(dummy_chapter2.downloaded)
        self.assertFalse(dummy_chapter2.in_memory)
        try:
            dummy_chapter2.get_download_path(os.getcwd())
        except Exception as e:
            self.assertIsInstance(e, AttributeError)
            self.assertTrue("NoneType" in str(e))
        dump = dummy_chapter2.dump()
        self.assertIsInstance(dump, bytes)
        self.assertTrue(len(dump) > 0)