def start_download(self) -> None: if not self.gallery or not self.gallery.link: return logger.info( "Downloading an archive from a generic HTTP server: {}".format( self.gallery.link)) request_dict = construct_request_dict(self.settings, self.own_settings) request_file = requests.get(self.gallery.link, stream='True', **request_dict) filename = get_filename_from_cd( request_file.headers.get('content-disposition')) if not filename: if self.gallery.link.find('/'): filename = self.gallery.link.rsplit('/', 1)[1] if not filename: logger.error("Could not find a filename for link: {}".format( self.gallery.link)) self.return_code = 0 self.gallery.title = filename.replace(".zip", "") self.gallery.filename = replace_illegal_name( available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, filename))) filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) with open(filepath, 'wb') as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: logger.error("Could not download archive") self.return_code = 0
def start_download(self) -> None: if not self.gallery or not self.gallery.temp_archive: return logger.info( "Downloading an archive: {} from a Panda Backup-like source: {}". format(self.gallery.title, self.gallery.temp_archive['link'])) to_use_filename = get_base_filename_string_from_gallery_data( self.gallery) to_use_filename = replace_illegal_name(to_use_filename) self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, to_use_filename + '.zip')) # TODO: File could be cbz. request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['stream'] = True request_file = request_with_retries( self.gallery.temp_archive['link'], request_dict, ) if not request_file: logger.error("Could not download archive") self.return_code = 0 return filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) with open(filepath, 'wb') as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: logger.error("Could not download archive") self.return_code = 0
def start_download(self) -> None: if not self.gallery or not self.gallery.link or not self.gallery.archiver_key: return to_use_filename = get_base_filename_string_from_gallery_data( self.gallery) to_use_filename = replace_illegal_name(to_use_filename) self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, to_use_filename + '.zip')) request_dict = construct_request_dict(self.settings, self.own_settings) request_file = requests.get(self.gallery.archiver_key, stream='True', **request_dict) filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) with open(filepath, 'wb') as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: logger.error("Could not download archive") os.remove(filepath) self.return_code = 0
def start_download(self) -> None: if not self.gallery: return self.logger.info( "Downloading an archive: {} from a Panda Backup-like source: {}". format(self.gallery.title, self.gallery.archiver_key['link'])) self.gallery.title = replace_illegal_name(self.gallery.title) self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, self.gallery.title + '.zip')) request_file = requests.get(self.gallery.archiver_key['link'], stream='True', headers=self.settings.requests_headers, timeout=self.settings.timeout_timer, cookies=self.own_settings.cookies) filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) with open(filepath, 'wb') as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: self.logger.error("Could not download archive") self.return_code = 0
def start_download(self) -> None: if not self.gallery: return to_use_filename = get_base_filename_string_from_gallery_data( self.gallery) to_use_filename = replace_illegal_name(to_use_filename) self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, to_use_filename + '.zip')) if not (self.gallery.root and self.gallery.gid and self.gallery.token and self.gallery.archiver_key): logger.error( 'Missing required data -> root: {}, gid: {}, token: {}, archiver_key: {}.' .format( self.gallery.root, self.gallery.gid, self.gallery.token, self.gallery.archiver_key, )) self.return_code = 0 return r = self.request_archive_download(self.gallery.root, self.gallery.gid, self.gallery.token, self.gallery.archiver_key) if not r: logger.error('Could not get download link.') self.return_code = 0 return r.encoding = 'utf-8' if 'Invalid archiver key' in r.text: logger.error("Invalid archiver key received.") self.return_code = 0 else: archive_link = get_archive_link_from_html_page(r.text) if archive_link == '': logger.error( 'Could not find archive link, page text: {}'.format( r.text)) self.return_code = 0 else: m = re.match(r"(.*?)(\?.*?)", archive_link) if m: archive_link = m.group(1) logger.info('Got link: {}, from url: {}'.format( archive_link, r.url)) request_dict = construct_request_dict(self.settings, self.own_settings) request_file = requests.get(archive_link + '?start=1', stream='True', **request_dict) if r and r.status_code == 200: logger.info( 'Downloading gallery: {}.zip'.format(to_use_filename)) filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) with open(filepath, 'wb') as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: logger.error("Could not download archive") self.return_code = 0
def start_download(self) -> None: if not self.gallery or not self.gallery.link: return if self.settings.gallery_dl.executable_path: exe_path_to_use = shutil.which( self.settings.gallery_dl.executable_path) else: exe_path_to_use = shutil.which( self.settings.gallery_dl.executable_name) if not exe_path_to_use: self.return_code = 0 logger.error("The gallery-dl executable was not found") return directory_path = mkdtemp() arguments = ["--zip", "--dest", "{}".format(directory_path)] if self.own_settings.proxy: arguments.append("--proxy") arguments.append("{}".format(self.own_settings.proxy)) if self.settings.gallery_dl.config_file: arguments.append("--config") arguments.append("{}".format(self.settings.gallery_dl.config_file)) if self.settings.gallery_dl.extra_arguments: arguments.append("{}".format( self.settings.gallery_dl.extra_arguments)) arguments.append("{}".format(self.gallery.link)) logger.info("Calling gallery-dl: {}.".format(" ".join( [exe_path_to_use, *arguments]))) process_result = subprocess.run([exe_path_to_use, *arguments], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) if process_result.stderr: self.return_code = 0 logger.error( "An error was captured when running gallery-dl: {}".format( process_result.stderr)) return if process_result.returncode != 0: self.return_code = 0 logger.error("Return code was not 0: {}".format( process_result.returncode)) return # If we downloaded more than one file, get the latest one output_path = '' file_name = '' for (dir_path, dir_names, filenames) in os.walk(directory_path): for current_file in filenames: file_name = current_file output_path = os.path.join(dir_path, current_file) if not output_path: self.return_code = 0 logger.error("The resulting download file was not found") return if not output_path or not os.path.isfile(output_path): self.return_code = 0 logger.error( "The resulting download file was not found: {}".format( file_name)) return self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, replace_illegal_name(file_name))) self.gallery.title = os.path.splitext(file_name)[0] filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) shutil.move(output_path, filepath) shutil.rmtree(directory_path, ignore_errors=True) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: logger.error("Could not download archive") self.return_code = 0
def start_download(self) -> None: if not self.gallery or not self.gallery.link: return if self.own_settings.megadl_executable_path: exe_path_to_use = shutil.which( self.own_settings.megadl_executable_path) else: exe_path_to_use = shutil.which( self.own_settings.megadl_executable_name) if not exe_path_to_use: self.return_code = 0 self.logger.error("The megadl tools was not found") return directory_path = mkdtemp() arguments = [ "--no-progress", "--print-names", "--path", "{}".format(directory_path) ] if self.own_settings.proxy: arguments.append("--proxy") arguments.append("{}".format(self.own_settings.proxy)) if self.own_settings.extra_megadl_arguments: arguments.append("{}".format( self.own_settings.extra_megadl_arguments)) arguments.append("{}".format(self.gallery.link)) self.logger.info("Calling megadl: {}.".format(" ".join( [exe_path_to_use, *arguments]))) process_result = subprocess.run([exe_path_to_use, *arguments], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) message_text = process_result.stdout if not message_text: self.return_code = 0 self.logger.error( "The link could not be downloaded, no output was generated after running megadl" ) return if process_result.stderr: self.return_code = 0 self.logger.error( "An error was captured when running megadl: {}".format( process_result.stderr)) return if "WARNING: Skipping invalid" in message_text: self.return_code = 0 self.logger.error( "The link could not be downloaded: {}".format(message_text)) return # If we downloaded a folder, just take the first result file_names = message_text.splitlines() file_name = file_names[0] output_path = os.path.join(directory_path, file_name) if not os.path.isfile(output_path): self.return_code = 0 self.logger.error( "The resulting download file was not found: {}".format( file_name)) return self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, replace_illegal_name(file_name))) self.gallery.title = os.path.splitext(file_name)[0] filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) shutil.move(output_path, filepath) shutil.rmtree(directory_path, ignore_errors=True) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: self.logger.error("Could not download archive") self.return_code = 0
def start_download(self) -> None: if not self.gallery or not self.gallery.link: return to_use_filename = get_base_filename_string_from_gallery_data(self.gallery) to_use_filename = replace_illegal_name(to_use_filename) self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join( self.own_settings.archive_dl_folder, to_use_filename + '.zip')) if self.gallery.content: soup_1 = BeautifulSoup(self.gallery.content, 'html.parser') else: request_dict = construct_request_dict(self.settings, self.own_settings) gallery_page = requests.get( self.gallery.link, **request_dict ) soup_1 = BeautifulSoup(gallery_page.content, 'html.parser') gallery_read = soup_1.find("a", {"class": "x-btn-rounded"})['href'] # Some URLs are really bad formatted gallery_read = re.sub( r'.*(' + re.escape(constants.main_page) + r'/manga/read/.+/0/1/).*', r'\1', gallery_read, flags=re.DOTALL ) if not gallery_read or gallery_read in constants.bad_urls or not gallery_read.startswith(constants.main_page): logger.warning("Reading gallery page not available, trying to guess the name.") gallery_read = guess_gallery_read_url(self.gallery.link, self.gallery) if not gallery_read.endswith('page/1'): gallery_read += 'page/1' page_regex = re.compile(r"(.*?page/)(\d+)/*$", re.IGNORECASE) last_image = '' directory_path = mkdtemp() logger.info('Downloading gallery: {}'.format(self.gallery.title)) second_pass = False while True: try: request_dict = construct_request_dict(self.settings, self.own_settings) gallery_read_page = requests.get( gallery_read, **request_dict ) except requests.exceptions.MissingSchema: logger.error("Malformed URL: {}, skipping".format(gallery_read)) self.return_code = 0 shutil.rmtree(directory_path, ignore_errors=True) return if gallery_read_page.status_code == 404: if gallery_read.endswith('page/1'): if not second_pass: gallery_read = guess_gallery_read_url(self.gallery.link, self.gallery, False) second_pass = True continue logger.error("Last page was the first one: {}, stopping".format(gallery_read)) self.return_code = 0 shutil.rmtree(directory_path, ignore_errors=True) return # yield("Got to last gallery page, stopping") break soup_2 = BeautifulSoup(gallery_read_page.content, 'html.parser') img_find = soup_2.find("img", {"class": "open"}) if not img_find: logger.error("Gallery not available, skipping") self.return_code = 0 shutil.rmtree(directory_path, ignore_errors=True) return img = img_find['src'] if last_image != '' and last_image == img: # yield('Current image is the same as previous, skipping') break last_image = img img_name = os.path.basename(img) request_dict = construct_request_dict(self.settings, self.own_settings) request_file = requests.get( img, **request_dict ) if request_file.status_code == 404: # yield("Got to last image, stopping") break with open(os.path.join(directory_path, img_name), "wb") as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) page_match = page_regex.search(gallery_read) if page_match: gallery_read = page_match.group(1) + str(int(page_match.group(2)) + 1) else: # yield("Could not match to change page, stopping") break file_path = os.path.join( self.settings.MEDIA_ROOT, self.gallery.filename ) with ZipFile(file_path, 'w') as archive: for (root_path, _, file_names) in os.walk(directory_path): for current_file in file_names: archive.write( os.path.join(root_path, current_file), arcname=os.path.basename(current_file)) shutil.rmtree(directory_path, ignore_errors=True) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(file_path) if self.gallery.filesize > 0: self.crc32 = calc_crc32(file_path) self.fileDownloaded = 1 self.return_code = 1
def start_download(self) -> None: if not self.gallery or not self.gallery.link: return to_use_filename = get_base_filename_string_from_gallery_data(self.gallery) to_use_filename = replace_illegal_name(to_use_filename) self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join( self.own_settings.archive_dl_folder, to_use_filename + '.zip')) if self.gallery.content: soup_1 = BeautifulSoup(self.gallery.content, 'html.parser') else: request_dict = construct_request_dict(self.settings, self.own_settings) gallery_page = requests.get( self.gallery.link, **request_dict ) soup_1 = BeautifulSoup(gallery_page.content, 'html.parser') gallery_read = soup_1.find("a", {"class": "x-btn-rounded"})['href'] # Some URLs are really bad formatted gallery_read = re.sub( r'.*(' + re.escape(constants.main_page) + r'/manga/read/.+/0/1/).*', r'\1', gallery_read, flags=re.DOTALL ) if not gallery_read or gallery_read in constants.bad_urls or not gallery_read.startswith(constants.main_page): logger.warning("Reading gallery page not available, trying to guess the name.") gallery_read = guess_gallery_read_url(self.gallery.link, self.gallery) if not gallery_read.endswith('page/1'): gallery_read += 'page/1' logger.info('Downloading gallery: {}'.format(self.gallery.title)) try: request_dict = construct_request_dict(self.settings, self.own_settings) gallery_read_page = requests.get( gallery_read, **request_dict ) except requests.exceptions.MissingSchema: logger.error("Malformed URL: {}, skipping".format(gallery_read)) self.return_code = 0 return if gallery_read_page.status_code != 200: gallery_read = guess_gallery_read_url(self.gallery.link, self.gallery, False) try: request_dict = construct_request_dict(self.settings, self.own_settings) gallery_read_page = requests.get( gallery_read, **request_dict ) except requests.exceptions.MissingSchema: logger.error("Malformed URL: {}, skipping".format(gallery_read)) self.return_code = 0 return if gallery_read_page.status_code == 200: image_urls = self.get_img_urls_from_gallery_read_page(gallery_read_page.text) if not image_urls: logger.error("Could not find image links, archive not downloaded") self.return_code = 0 return directory_path = mkdtemp() for image_url in image_urls: img_name = os.path.basename(image_url) request_dict = construct_request_dict(self.settings, self.own_settings) request_file = requests.get( image_url, **request_dict ) if request_file.status_code == 404: logger.warning("Image link reported 404 error, stopping") break with open(os.path.join(directory_path, img_name), "wb") as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) file_path = os.path.join( self.settings.MEDIA_ROOT, self.gallery.filename ) with ZipFile(file_path, 'w') as archive: for (root_path, _, file_names) in os.walk(directory_path): for current_file in file_names: archive.write( os.path.join(root_path, current_file), arcname=os.path.basename(current_file)) shutil.rmtree(directory_path, ignore_errors=True) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(file_path) if self.gallery.filesize > 0: self.crc32 = calc_crc32(file_path) self.fileDownloaded = 1 self.return_code = 1 else: logger.error("Wrong HTML code returned, could not download, link: {}".format(gallery_read)) self.return_code = 0