def run(self): try: if not self.chapter.in_memory: try: self.message.emit( self.task_id, self.uuid, _(F'Downloading chapter [{self.chapter.title}]...')) self.ddmd.crawl_chapter(self.chapter) except Exception as e: msg = _( F'Could not download chapter [{self.chapter.title}], reason {e}' ) self.message.emit(self.task_id, self.uuid, msg) logger.error(msg) return ret, path = self.chapter.save_images(self.ddmd.sites_location) if ret: self.message.emit(self.task_id, self.uuid, _(F'Saved chapter [{self.chapter.title}]')) else: self.message.emit( self.task_id, self.uuid, _(F'Could not save downloaded chapter [{self.chapter.title}], in path {path}' )) self.finished.emit(self.uuid) except Exception as e: logger.error(str(e))
def run(self): try: if not self.chapter.in_memory and not self.chapter.chapter_images_present( self.ddmd.sites_location): try: self.message.emit( self.task_id, self.uuid, _(F'Downloading chapter [{self.chapter.title}]...')) self.ddmd.crawl_chapter(self.chapter) except Exception as e: self.message.emit( self.task_id, self.uuid, _(F'Could not download chapter [{self.chapter.title}], reason {e}' )) return ret, path = self.chapter.make_pdf(self.ddmd.sites_location) if ret: self.message.emit( self.task_id, self.uuid, _(F'Converted chapter [{self.chapter.title}]')) else: self.message.emit( self.task_id, self.uuid, _(F'Could not convert chapter [{self.chapter.title}] to PDF file {path}' )) self.finished.emit(self.uuid) except Exception as e: logger.error(str(e))
def import_db(self): path = str( QFileDialog.getExistingDirectory(self, _('Select Directory'))) if path: self.config.db_path = path self.controller.load_db() self.main_widget = MangaSiteWidget(self.controller) self.main_widget.connect_actions(self.show_msg_on_status_bar, self.lock_gui, self.unlock_gui) self.setCentralWidget(self.main_widget) self.show_msg_on_status_bar( _(F'Loaded DB from {self.config.db_path}'))
def update_mangas(self): try: self.lock_gui() site = self.ddmd.crawl_site() except Exception as e: logger.warning(_(F'Could not refresh site, reason: {e}')) self.show_msg_on_status_bar( _('Could not refresh site, for more info look into log file.')) else: self.load_stored_mangas(site=site) finally: self.unlock_gui()
def init_menu_bar(self): menu_bar = self.menuBar() file_menu = menu_bar.addMenu(_('File')) options_menu = menu_bar.addMenu(_('Options')) manga_menu = QMenu(_('Manga DB'), self) file_menu.addMenu(manga_menu) save_act = QAction(_('Save '), self) save_act.triggered.connect(self.save_sites) manga_menu.addAction(save_act) imp_act = QAction(_('Import'), self) imp_act.triggered.connect(self.import_db) manga_menu.addAction(imp_act) relocate_act = QAction(_('Relocate'), self) relocate_act.triggered.connect(self.relocate_db) manga_menu.addAction(relocate_act) aot_menu = QAction(_('Stay on top'), self) aot_menu.setCheckable(True) aot_menu.triggered.connect( lambda: self.change_sot(aot_menu.isChecked())) options_menu.addAction(aot_menu) aot_menu.setChecked(self.config.sot) dark_mode = QAction(_('Dark mode'), self) dark_mode.setCheckable(True) dark_mode.triggered.connect( lambda: self.redraw_palette(dark_mode.isChecked())) options_menu.addAction(dark_mode) dark_mode.setChecked(self.config.dark_mode)
def update_chapters(self): try: self.lock_gui() manga = self.ddmd.crawl_manga() except Exception as e: logger.warning( _(F'Could not download chapters for {self.ddmd.get_current_manga().title}, reason: {e}' )) self.show_msg_on_status_bar( _(F'Could not download chapters for {self.ddmd.get_current_manga().title}' )) else: self.load_stored_chapters(manga) finally: self.unlock_gui()
def download(self, chapter: Chapter) -> int: start_url = chapter.url url = start_url chapter.clear_state() retrieved_all_pages = False while not retrieved_all_pages: response = requests.get(url) if response.status_code == 200: tree = html.fromstring(response.content) image_src = str(tree.xpath(self.re_download_path)[0]) image = requests.get(image_src, stream=True).content chapter.add_page(image) nav_next = str(tree.xpath(self.re_download_next_path)[0]) if nav_next.startswith('/'): nav_next = urljoin(self.base_url, nav_next) if start_url in nav_next: # next button navigates to next page of a chapter url = nav_next else: # next button navigates to next chapter retrieved_all_pages = True else: raise ConnectionError( _(F'Could not connect with {start_url} site, status code: {response.status_code}')) return chapter.number_of_pages()
def __init__(self, cwd): path = os.path.join(cwd, DATA_DIR) if not os.path.isdir(path): os.mkdir(path) self.config_path = os.path.join(path, 'ddmd.ini') self.config = configparser.ConfigParser() self._default_sot = False # type: bool self._default_dark_mode = True # type: bool self._default_db_path = get_sites_path(path) # type: str self._default_max_threads = 5 # type: int self._default_log_level = 2 # type: int self._default_last_site = 0 # type: int self._sot = self._default_sot # type: bool self._dark_mode = self._default_dark_mode # type: bool self._db_path = self._default_db_path # type: str self._max_threads = self._default_max_threads # type: int self._log_level = self._default_log_level # type: int self._last_site = self._default_last_site # type: int try: self.config.read(self.config_path) if not self.config.has_section('Window'): self.config.add_section('Window') if not self.config.has_section('Manga'): self.config.add_section('Manga') except Exception as e: logger.error(_(F'Could not open config file due to: {e}')) self.read_config()
def start_gui(title, config): return_code = -1 try: return_code = DDMDApplication(title, config, sys.argv).exec_() except Exception as e: logger.critical(_(F'Application failed due to an error: {e}')) finally: sys.exit(return_code)
def __getstate__(self): state = self.__dict__.copy() try: state['pages'] = [] state['in_memory'] = False except KeyError as e: logger.error( _(F'Could not drop filed while dumping object, reason {e}')) return state
def make_pdf(self, base_path: str) -> Tuple[bool, str]: pdf_dir = self.get_pdf_path(base_path) if not os.path.isdir(pdf_dir): os.makedirs(pdf_dir, exist_ok=True) pdf_path = os.path.join(pdf_dir, F'{self.get_title()}.pdf') builder = Canvas(pdf_path, pageCompression=False, pagesize=A4) builder.setTitle(self.title) if len(self.pages) > 0: for i, page in enumerate(self.pages): image = Image.open(BytesIO(page)) builder.setPageSize(image.size) builder.drawImage(ImageReader(image), 0, 0) builder.showPage() else: images_dir = self.get_download_path(base_path) if not os.path.isdir(images_dir): logger.warning( _('Could not convert to PDF, source path with images does not exist or images not downloaded' )) return False, pdf_path try: files = [ os.path.join(images_dir, f) for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f)) ] files = sorted(files) for i, file in enumerate(files): builder.setPageSize(Image.open(file).size) builder.drawImage(file, 0, 0) builder.showPage() except Exception as e: logger.error( _(F'Could not save PDF to {pdf_path}\nError message: {e}')) return False, pdf_path if not os.path.exists(os.path.dirname(pdf_path)): os.makedirs(os.path.dirname(pdf_path), exist_ok=True) builder.save() self.converted = True logger.info(_(F'PDF saved to a {pdf_path} file.')) return True, pdf_path
def run(self): try: try: self.message.emit( self.task_id, self.uuid, _(F'Downloading chapter [{self.chapter.title}]...')) self.ddmd.crawl_chapter(self.chapter) except Exception as e: msg = _( F'Could not download chapter [{self.chapter.title}], reason {e}' ) self.message.emit(self.task_id, self.uuid, msg) logger.error(msg) else: self.message.emit( self.task_id, self.uuid, _(F'Downloaded chapter [{self.chapter.title}]')) finally: self.finished.emit(self.uuid) except Exception as e: logger.error(str(e))
def crawl_detail(self, manga: Manga) -> None: start_url = manga.url response = requests.get(start_url) if response.status_code == 200: tree = html.fromstring(response.content) for element in tree.xpath(self.re_chapter_path): title = str(element.xpath('text()')[0]).strip().replace('\t', ' ') url = urljoin(self.base_url, str(element.xpath('@href')[0])) chapter = Chapter(manga, title) chapter.url = url manga.add_chapter(chapter) else: raise ConnectionError(_(F'Could not connect with {start_url} site, status code: {response.status_code}'))
def crawl_index(self, manga_site: MangaSite) -> None: start_url = urljoin(self.base_url, self.manga_index) response = requests.get(start_url) if response.status_code == 200: manga_site.url = self.base_url tree = html.fromstring(response.content) for element in tree.xpath(self.re_index_path): title = str(element.xpath('text()')[0]).strip().replace('\t', ' ') url = urljoin(self.base_url, str(element.xpath('@href')[0])) manga = Manga(title, url, manga_site) manga_site.add_manga(manga) else: raise ConnectionError( _(F'Could not connect with {start_url} site, status code: {response.status_code}'))
def __init__(self, config) -> None: self.config = config # type: ConfigManager self.ddmd_storage = MangaStorage() # type: MangaStorage set_logger_level(self.config.log_level) self.last_site = self.config.last_site # type: int self.cwd_site = None # type: MangaSite self.cwd_manga = None # type: Manga self.cwd_chapter = None # type: Chapter self.manga_sites = [] # type: List[MangaSite] self.crawlers = {} # type: Dict[str, BaseCrawler] self.load_db() logger.info(_('Program started'))
def download(self, chapter: Chapter) -> int: start_url = chapter.url try: with SeleniumDriver() as driver: driver.get(start_url) wait_for_page(driver, self.re_download_path) chapter.clear_state() content = driver.find_element_by_xpath('//*').get_attribute('outerHTML') tree = html.fromstring(content) for element in tree.xpath(self.re_download_path): image_src = str(element.xpath('@src')[0]) image = requests.get(image_src, stream=True).content chapter.add_page(image) return chapter.number_of_pages() except Exception as e: raise ConnectionError(_(F'Could not connect with {start_url} site, error message: {e}'))
def __get_crawler(self, site_name: str) -> Union[BaseCrawler, bool]: """ This method gets proper crawlers for a given site :param site_name: String - name of a site :return: Appropriate crawlers or False if oen does not exist """ if site_name.lower() in (site.lower() for site in self.crawlers): return self.crawlers[site_name] else: crawler = manga_site_2_crawler(site_name) if crawler: self.crawlers[site_name] = crawler return crawler else: logger.error(_(F'Could not get {site_name} crawlers')) return False
def crawl_detail(self, manga: Manga) -> None: start_url = manga.url try: with SeleniumDriver() as driver: driver.get(start_url) wait_for_page(driver, self.re_chapter_path) content = driver.find_element_by_xpath('//*').get_attribute('outerHTML') tree = html.fromstring(content) # crawl for manga chapters for element in tree.xpath(self.re_chapter_path): title = str(element.xpath('text()')[0]).strip().replace('\t', ' ') url = urljoin(self.base_url, str(element.xpath('@href')[0])) chapter = Chapter(manga, title) chapter.url = url manga.add_chapter(chapter) except Exception as e: raise ConnectionError(_(F'Could not connect with {start_url} site, error message: {e}'))
def save_images(self, base_path: str) -> Tuple[bool, str]: images_dir = self.get_download_path(base_path) try: if not os.path.isdir(images_dir): os.makedirs(images_dir, exist_ok=True) for idx, page in enumerate(self.pages): img_type = imghdr.what(BytesIO(page)) path = os.path.join(images_dir, F'{idx:0>3d}.{img_type}') with open(path, 'wb') as f: f.write(page) except Exception as e: logger.error( _(F'Could not save images to {images_dir}\nError message: {e}') ) return False, images_dir self.saved_images = True return True, images_dir
def open_terminal(self, level: str): try: base_path = self.ddmd.sites_location if level == 'M': list_object = self.mangas_list.item( self.mangas_list.currentRow()).data(QtCore.Qt.UserRole) elif level == 'C': list_object = self.chapters_list.item( self.chapters_list.currentRow()).data(QtCore.Qt.UserRole) else: return path = list_object.get_download_path(base_path) if not os.path.exists(path): path = list_object.get_pdf_path(base_path) if platform == 'linux' or platform == 'linux2': os.system(F'gnome-terminal --working-directory="{path}"') elif platform == 'win32': os.system(F'start cmd /K "cd /d {path}"') except Exception as e: logger.warning( _(F'There was a problem while opening manga: [{e}]'))
def download(self, chapter: Chapter) -> int: start_url = chapter.url url = start_url chapter.clear_state() with requests.Session() as s: response = s.get(url) if response.status_code == 200: tree = html.fromstring(response.content) pages_count = len(tree.xpath('/html/body/div[2]/div[4]/div/div/span/select[2]/option')) page_url_start = str(tree.xpath(self.re_download_path)[0]) image_ext = page_url_start[page_url_start.rfind('.'):] page_url_start = page_url_start[:page_url_start.rfind('.')] digits_for_page_number = len(page_url_start.split('-')[-1]) page_url_start = page_url_start[:page_url_start.rfind('-')] for page_number in range(1, 1 + pages_count): image = s.get(F'{page_url_start}-{page_number:0{digits_for_page_number}}{image_ext}', stream=True).content chapter.add_page(image) else: raise ConnectionError( _(F'Could not connect with {start_url} site, status code: {response.status_code}')) return chapter.number_of_pages()
def crawl_index(self, manga_site: MangaSite) -> None: start_url = urljoin(self.base_url, self.manga_index) try: with SeleniumDriver() as driver: collected_all_pages = False driver.get(start_url) wait_for_page(driver, self.re_index_path) manga_site.url = self.base_url while collected_all_pages is False: content = driver.find_element_by_xpath('//*').get_attribute('outerHTML') tree = html.fromstring(content) for element in tree.xpath(self.re_index_path): title = str(element.xpath('text()')[0]).strip().replace('\t', ' ') url = urljoin(self.base_url, str(element.xpath('@href')[0])) manga = Manga(title, url, manga_site) manga_site.add_manga(manga) for element2 in tree.xpath(self.re_index_next_page): if 'Next'.lower() in element2.xpath('text()')[0].lower(): driver.get(urljoin(self.base_url, element2.xpath('@href')[0])) break else: collected_all_pages = True except Exception as e: raise ConnectionError(_(F'Could not connect with {start_url} site, error message: {e}'))
def dump(self): logger.debug( _(F'Dumped {self.site_name} site with {len(self.mangas)} mangas - size in memory = {get_object_mem_size(self)} bytes.' )) return pickle.dumps(self)
def start_chapters_save(self, *args): self.start_working_threads(SingleChapterSaveThread, _('Saving {} chapters...'))
def relocate_db(self): path = str( QFileDialog.getExistingDirectory(self, _('Select Directory'))) if path: self.config.db_path = path self.controller.store_sites()
def __init__(self, apply_filter): QLineEdit.__init__(self) self.setToolTip(_('Filter')) self.setPlaceholderText(_('Search manga...')) self.textChanged.connect(apply_filter)
def __init__(self, default_string=_('No Items')): QListWidget.__init__(self) self.default_string = default_string
def start_chapters_convert(self, *args): self.start_working_threads(SingleChapterConvertThread, _('Converting {} chapters...'))
def save_sites(self): self.controller.store_sites(), self.show_msg_on_status_bar(_(F'DB saved to {self.config.db_path}'))
def start_chapters_download(self, *args): self.start_working_threads(SingleChapterDownloadThread, _('Downloading {} chapters...'))