def parse_domains(self, list): _re = re.compile( r"^(?:https?://)?(?:www\.)?(?:\w+\.)*((?:(?:\d{1,3}\.){3}\d{1,3}|[\w\-^_]{3,63}(?:\.[a-zA-Z]{2,}){1,2})(?:\:\d+)?)", re.I | re.U, ) domains = [ decode(domain).strip().lower() for url in list for domain in _re.findall(url) ] return self.replace_domains(uniquify(domains))
def add_password(self, password): """ Adds a password to saved list. """ try: self.passwords = uniquify([password] + self.passwords) file = os.fsdecode(self.config.get("passwordfile")) with open(file, mode="w") as fp: for pw in self.passwords: fp.write(pw + "\n") except IOError as exc: self.log_error(exc)
def get_links(self): """ Extract embedded links from HTML // then check if there are further images which will be lazy-loaded. """ def f(url): return "http://" + re.sub(r"(\w{7})s\.", r"\1.", url) direct_links = uniquify(f(x) for x in re.findall(self.LINK_PATTERN, self.data)) # Imgur Galleryies may contain more images than initially shown. Find # the rest now! try: indirect_links = self.get_indirect_links(direct_links) self.log_debug(f"Found {len(indirect_links)} additional links") except (TypeError, KeyError, ValueError) as exc: # Fail gracefull as we already had some success self.log_error( self._("Processing of additional links unsuccessful - {}: {}").format( type(exc).__name__, exc ) ) indirect_links = [] # Check if all images were found and inform the user num_images_found = len(direct_links) + len(indirect_links) if num_images_found < self.total_num_images: self.log_error( self._("Could not save all images of this gallery: {}/{}").format( num_images_found, self.total_num_images ) ) # If we could extract a name, use this to create a specific package if self.gallery_name: self.packages.append( (self.gallery_name, direct_links + indirect_links, self.gallery_name) ) return [] else: return direct_links + indirect_links
def send_request_to_server(self): """ Send torrent/magnet to the server """ if self.pyfile.url.endswith(".torrent"): #: torrent URL if self.pyfile.url.startswith("http"): #: remote URL, download the torrent to tmp directory torrent_content = self.load(self.pyfile.url, decode=False) torrent_filename = safejoin("tmp", "tmp_{}.torrent".format(self.pyfile.package().name)) #: `tmp_` files are deleted automatically with open(torrent_filename, "wb") as f: f.write(torrent_content) else: #: URL is local torrent file (uploaded container) torrent_filename = urllib.request.url2pathname(self.pyfile.url[7:]) #: trim the starting `file://` if not exists(torrent_filename): self.fail(self._("Torrent file does not exist")) #: Check if the torrent file path is inside pyLoad's config directory if os.path.abspath(torrent_filename).startswith(self.pyload.tempdir + os.sep): for _i in range(2): try: #: send the torrent content to the server json_data = self.upload(torrent_filename, self.API_URL + "/torrents/addTorrent", get={'auth_token': self.api_token}) except BadHeader as exc: json_data = exc.content api_data = json.loads(json_data) if len(json_data) > 0 else {} if "error_code" in api_data: if api_data["error_code"] == 8: #: token expired, refresh the token and retry self.account.relogin() if not self.account.info["login"]["valid"]: self.fail(_("Token refresh has failed")) else: self.api_token = self.account.accounts[self.account.accounts.keys()[0]]["api_token"] else: error_msg = api_data["error"] self.fail(error_msg) else: break else: self.fail(self._("Token refresh has failed")) else: self.fail(self._("Illegal URL")) #: We don't allow files outside pyLoad's config directory else: #: magnet URL, send to the server api_data = self.api_response("/torrents/addMagnet", get={"auth_token": self.api_token}, post={"magnet": self.pyfile.url}) torrent_id = api_data["id"] torrent_info = self.api_response("/torrents/info/" + torrent_id, get={'auth_token': self.api_token}) if "error" in torrent_info: self.fail("{} (code: {})".format(torrent_info["error"], torrent_info.get("error_code", -1))) #: Filter and select files for downloading exclude_filters = self.config.get("exclude_filter").split(';') excluded_ids = [] for _filter in exclude_filters: excluded_ids.extend([_file["id"] for _file in torrent_info["files"] if fnmatch.fnmatch(os.path.basename(_file["path"]), _filter)]) excluded_ids = uniquify(excluded_ids) include_filters = self.config.get("include_filter").split(";") included_ids = [] for _filter in include_filters: included_ids.extend([_file["id"] for _file in torrent_info["files"] if fnmatch.fnmatch(os.path.basename(_file["path"]), _filter)]) included_ids = uniquify(included_ids) selected_ids = ",".join([str(_id) for _id in included_ids if _id not in excluded_ids]) self.api_response("/torrents/selectFiles/" + torrent_id, get={"auth_token": self.api_token}, post={"files": selected_ids}) return torrent_id
def _extract(self, pyfile, archive, password): name = os.path.basename(archive.filename) pyfile.set_status("processing") encrypted = False try: self.log_debug(f"Password: {password or None}") passwords = (uniquify([password] + self.get_passwords(False)) if self.config.get("usepasswordfile") else [password]) for pw in passwords: try: pyfile.set_custom_status(self._("archive testing")) pyfile.set_progress(0) archive.verify(pw) pyfile.set_progress(100) except PasswordError: if not encrypted: self.log_info(name, self._("Password protected")) encrypted = True except CRCError as exc: self.log_debug(name, exc) self.log_info(name, self._("CRC Error")) if not self.repair: raise CRCError("Archive damaged") else: self.log_warning(name, self._("Repairing...")) pyfile.set_custom_status(self._("archive repairing")) pyfile.set_progress(0) repaired = archive.repair() pyfile.set_progress(100) if not repaired and not self.config.get("keepbroken"): raise CRCError("Archive damaged") else: self.add_password(pw) break except ArchiveError as exc: raise ArchiveError(exc) else: self.add_password(pw) break pyfile.set_custom_status(self._("archive extracting")) pyfile.set_progress(0) if not encrypted or not self.config.get("usepasswordfile"): self.log_debug("Extracting using password: {}".format( password or "None")) archive.extract(password) else: for pw in [ f for f in uniquify([password] + self.get_passwords(False)) if f ]: try: self.log_debug(f"Extracting using password: {pw}") archive.extract(pw) self.add_password(pw) break except PasswordError: self.log_debug("Password was wrong") else: raise PasswordError pyfile.set_progress(100) pyfile.set_status("processing") extracted_files = archive.files or archive.list() delfiles = archive.chunks() self.log_debug("Would delete: " + ", ".join(delfiles)) if self.config.get("delete"): self.log_info( self._("Deleting {} files").format(len(delfiles))) deltotrash = self.config.get("deltotrash") for f in delfiles: file = os.fsdecode(f) if not exists(file): continue if not deltotrash: os.remove(file) else: try: send2trash.send2trash(file) except NameError: self.log_warning( self._("Unable to move {} to trash").format( os.path.basename(f)), self._("Send2Trash lib not installed"), ) except Exception as exc: self.log_warning( self._("Unable to move {} to trash").format( os.path.basename(f)), exc, ) else: self.log_info( self._("Moved {} to trash").format( os.path.basename(f))) self.log_info(name, self._("Extracting finished")) return extracted_files except PasswordError: self.log_error( name, self._("Wrong password" if password else "No password found")) except CRCError as exc: self.log_error(name, self._("CRC mismatch"), exc) except ArchiveError as exc: self.log_error(name, self._("Archive error"), exc) except Exception as exc: self.log_error(name, self._("Unknown error"), exc) self.m.dispatch_event("archive_extract_failed", pyfile, archive) raise Exception(self._("Extract failed"))
def extract( self, ids, thread=None ): # TODO: Use pypack, not pid to improve method usability if not ids: return False extracted = [] failed = [] def to_list(value): return value.replace(" ", "").replace(",", "|").replace(";", "|").split("|") destination = self.config.get("destination") subfolder = self.config.get("subfolder") fullpath = self.config.get("fullpath") overwrite = self.config.get("overwrite") priority = self.config.get("priority") recursive = self.config.get("recursive") keepbroken = self.config.get("keepbroken") extensions = [ x.lstrip(".").lower() for x in to_list(self.config.get("extensions")) ] excludefiles = to_list(self.config.get("excludefiles")) if extensions: self.log_debug(f"Use for extensions: .{'|.'.join(extensions)}") #: Reload from txt file self.reload_passwords() dl_folder = self.pyload.config.get("general", "storage_folder") #: Iterate packages -> extractors -> targets for pid in ids: pypack = self.pyload.files.get_package(pid) if not pypack: self.queue.remove(pid) continue self.log_info(self._("Check package: {}").format(pypack.name)) pack_dl_folder = os.path.join( dl_folder, pypack.folder, "" ) #: Force trailing slash #: Determine output folder extract_folder = os.path.join( pack_dl_folder, destination, "" ) #: Force trailing slash if subfolder: extract_folder = os.path.join( extract_folder, pypack.folder or safename(pypack.name.replace("http://", "")), ) os.makedirs(extract_folder, exist_ok=True) if subfolder: self.set_permissions(extract_folder) matched = False success = True files_ids = list( { fdata["name"]: ( fdata["id"], (os.path.join(pack_dl_folder, fdata["name"])), extract_folder, ) for fdata in pypack.get_children().values() }.values() ) #: : Remove duplicates #: Check as long there are unseen files while files_ids: new_files_ids = [] if extensions: #: Include only specified archive types files_ids = [ file_id for file_id in files_ids if any( [ Extractor.archivetype(file_id[1]) in extensions for Extractor in self.extractors ] ) ] #: Sort by filename to ensure (or at least try) that a multivolume archive is targeted by its first part #: This is important because, for example, UnRar ignores preceding parts in listing mode files_ids.sort(key=lambda file_id: file_id[1]) for Extractor in self.extractors: targets = Extractor.get_targets(files_ids) if targets: self.log_debug( "Targets for {}: {}".format(Extractor.__name__, targets) ) matched = True for fid, fname, fout in targets: name = os.path.basename(fname) if not exists(fname): self.log_debug(name, "File not found") continue self.log_info(name, self._("Extract to: {}").format(fout)) try: pyfile = self.pyload.files.get_file(fid) archive = Extractor( pyfile, fname, fout, fullpath, overwrite, excludefiles, priority, keepbroken, ) thread.add_active(pyfile) archive.init() #: Save for removal from file processing list, which happens after deletion. #: So archive.chunks() would just return an empty list. chunks = archive.chunks() try: new_files = self._extract( pyfile, archive, pypack.password ) finally: pyfile.set_progress(100) thread.finish_file(pyfile) except Exception as exc: self.log_error(name, exc) success = False continue #: Remove processed file and related multiparts from list files_ids = [ (fid, fname, fout) for fid, fname, fout in files_ids if fname not in chunks ] self.log_debug(f"Extracted files: {new_files}") new_folders = uniquify( os.path.dirname(f) for f in new_files ) for foldername in new_folders: self.set_permissions( os.path.join(extract_folder, foldername) ) for filename in new_files: self.set_permissions( os.path.join(extract_folder, filename) ) for filename in new_files: file = os.fsdecode( os.path.join( os.path.dirname(archive.filename), filename ) ) if not exists(file): self.log_debug( "New file {} does not exists".format(filename) ) continue if recursive and os.path.isfile(file): new_files_ids.append( (fid, filename, os.path.dirname(filename)) ) #: Append as new target self.m.dispatch_event("archive_extracted", pyfile, archive) files_ids = new_files_ids #: Also check extracted files if matched: if success: #: Delete empty pack folder if extract_folder resides outside download folder if self.config.get("delete") and self.pyload.config.get( "general", "folder_per_package" ): if not extract_folder.startswith(pack_dl_folder): if len(os.listdir(pack_dl_folder)) == 0: try: os.rmdir(pack_dl_folder) self.log_debug( "Successfully deleted pack folder {}".format( pack_dl_folder ) ) except OSError: self.log_warning( "Unable to delete pack folder {}".format( pack_dl_folder ) ) else: self.log_warning( "Not deleting pack folder {}, folder not empty".format( pack_dl_folder ) ) extracted.append(pid) self.m.dispatch_event("package_extracted", pypack) else: failed.append(pid) self.m.dispatch_event("package_extract_failed", pypack) self.failed.add(pid) else: self.log_info(self._("No files found to extract")) if not matched or not success and subfolder: try: os.rmdir(extract_folder) except OSError: pass self.queue.remove(pid) return True if not failed else False
def send_request_to_server(self): """ Send torrent/magnet to the server """ if self.pyfile.url.endswith(".torrent"): #: torrent URL if self.pyfile.url.startswith("http"): #: remote URL, download the torrent to tmp directory torrent_content = self.load(self.pyfile.url, decode=False) torrent_filename = safejoin("tmp", "tmp_{}.torrent".format( self.pyfile.package( ).name)) #: `tmp_` files are deleted automatically with open(torrent_filename, "wb") as f: f.write(torrent_content) else: #: URL is local torrent file (uploaded container) torrent_filename = urllib.request.url2pathname( self.pyfile.url[7:]).encode('latin1').decode( 'utf8') #: trim the starting `file://` if not exists(torrent_filename): self.fail(self._("Torrent file does not exist")) #: Check if the torrent file path is inside pyLoad's config directory if os.path.abspath(torrent_filename).startswith( os.path.abspath(os.getcwd()) + os.sep): try: #: send the torrent content to the server api_data = json.loads( self.load("https://up1.debrid.link/seedbox", post={ 'file': FormFile( torrent_filename, mimetype="application/x-bittorrent") }, multipart=True)) if api_data["result"] != "OK": self.fail(api_data["ERR"]) api_data = self.api_response_safe("v2/seedbox/add", post={ "url": api_data["link"], "wait": True, "async": True }) except NameError: self.fail( self. _("Posting file attachments is not supported by HTTPRequest, please update your pyLoad installation" )) else: self.fail( self._("Illegal URL") ) #: We don't allow files outside pyLoad's config directory else: #: magnet URL, send to the server api_data = self.api_response_safe("v2/seedbox/add", post={ "url": self.pyfile.url, "wait": True, "async": True }) if not api_data["success"]: self.fail("{} (code: {})".format( api_data.get("error_description", error_description(api_data["error"])), api_data["error"])) torrent_id = api_data["value"]["id"] self.pyfile.setCustomStatus("metadata") self.pyfile.setProgress(0) #: Get the file list of the torrent page = 0 files = [] while True: api_data = self.api_response_safe("v2/seedbox/list", get={ "ids": torrent_id, "page": page, "perPage": 50 }) if not api_data['success']: self.fail("{} (code: {})".format( api_data.get("error_description", error_description(api_data["error"])), api_data["error"])) if api_data["value"][0]["status"] == 1: files.extend([{ "id": _file["id"], "name": _file["name"], "size": _file["size"], "url": _file["downloadUrl"] } for _file in api_data["value"][0]["files"]]) page = api_data["pagination"]["next"] if page == -1: break else: continue self.sleep(5) self.pyfile.name = api_data["value"][0]["name"] #: Filter and select files for downloading exclude_filters = self.config.get("exclude_filter").split(";") excluded_ids = [] for _filter in exclude_filters: excluded_ids.extend([ _file["id"] for _file in files if fnmatch.fnmatch(_file["name"], _filter) ]) excluded_ids = uniquify(excluded_ids) include_filters = self.config.get("include_filter").split(";") included_ids = [] for _filter in include_filters: included_ids.extend([ _file["id"] for _file in files if fnmatch.fnmatch(_file["name"], _filter) ]) included_ids = uniquify(included_ids) selected_ids = [_id for _id in included_ids if _id not in excluded_ids] unwanted_ids = [ _file["id"] for _file in files if _file["id"] not in selected_ids ] self.pyfile.size = sum( [_file["size"] for _file in files if _file["id"] in selected_ids]) api_data = self.api_response_safe("v2/seedbox/{}/config".format( torrent_id, post={'files-unwanted': json.dumps(unwanted_ids)})) if not api_data["success"]: self.fail("{} (code: {})".format( api_data.get("error_description", error_description(api_data["error"])), api_data["error"])) return torrent_id, [ _file["url"] for _file in files if _file["id"] in selected_ids ]