def crawl_detail(self, manga: Manga) -> None: start_url = manga.url response = requests.get(start_url) if response.status_code == 200: tree = html.fromstring(response.content) for element in tree.xpath(self.re_chapter_path): title = str(element.xpath('text()')[0]).strip().replace('\t', ' ') url = urljoin(self.base_url, str(element.xpath('@href')[0])) chapter = Chapter(manga, title) chapter.url = url manga.add_chapter(chapter) else: raise ConnectionError(_(F'Could not connect with {start_url} site, status code: {response.status_code}'))
def crawl_detail(self, manga: Manga) -> None: start_url = manga.url try: with SeleniumDriver() as driver: driver.get(start_url) wait_for_page(driver, self.re_chapter_path) content = driver.find_element_by_xpath('//*').get_attribute('outerHTML') tree = html.fromstring(content) # crawl for manga chapters for element in tree.xpath(self.re_chapter_path): title = str(element.xpath('text()')[0]).strip().replace('\t', ' ') url = urljoin(self.base_url, str(element.xpath('@href')[0])) chapter = Chapter(manga, title) chapter.url = url manga.add_chapter(chapter) except Exception as e: raise ConnectionError(_(F'Could not connect with {start_url} site, error message: {e}'))
def test_manga_1(self): dummy_manga_site = MangaSite('test_site') dummy_manga1 = Manga('test_manga', 'test_manga_url', dummy_manga_site) self.assertTrue(dummy_manga1.title == 'test_manga') dummy_manga1.downloaded = True self.assertTrue(dummy_manga1.downloaded) dummy_chapter1 = Chapter(dummy_manga1) dummy_manga1.add_chapter(dummy_chapter1) self.assertTrue(len(dummy_manga1.chapters) > 0) dummy_manga1.clear_state() self.assertFalse(dummy_manga1.downloaded) self.assertTrue(len(dummy_manga1.chapters) == 0) try: dummy_manga1.get_download_path(os.getcwd()) except Exception as e: self.assertIsInstance(e, AttributeError) self.assertTrue("NoneType" in str(e)) dump = dummy_manga1.dump() self.assertIsInstance(dump, bytes) self.assertTrue(len(dump) > 0)