Example #1
0
    def parse_domains(self, list):
        _re = re.compile(
            r"^(?:https?://)?(?:www\.)?(?:\w+\.)*((?:(?:\d{1,3}\.){3}\d{1,3}|[\w\-^_]{3,63}(?:\.[a-zA-Z]{2,}){1,2})(?:\:\d+)?)",
            re.I | re.U,
        )

        domains = [
            decode(domain).strip().lower() for url in list
            for domain in _re.findall(url)
        ]

        return self.replace_domains(uniquify(domains))
Example #2
0
    def add_password(self, password):
        """
        Adds a password to saved list.
        """
        try:
            self.passwords = uniquify([password] + self.passwords)

            file = os.fsdecode(self.config.get("passwordfile"))
            with open(file, mode="w") as fp:
                for pw in self.passwords:
                    fp.write(pw + "\n")

        except IOError as exc:
            self.log_error(exc)
Example #3
0
    def get_links(self):
        """
        Extract embedded links from HTML // then check if there are further images which
        will be lazy-loaded.
        """

        def f(url):
            return "http://" + re.sub(r"(\w{7})s\.", r"\1.", url)

        direct_links = uniquify(f(x) for x in re.findall(self.LINK_PATTERN, self.data))

        # Imgur Galleryies may contain more images than initially shown. Find
        # the rest now!
        try:
            indirect_links = self.get_indirect_links(direct_links)
            self.log_debug(f"Found {len(indirect_links)} additional links")

        except (TypeError, KeyError, ValueError) as exc:
            # Fail gracefull as we already had some success
            self.log_error(
                self._("Processing of additional links unsuccessful - {}: {}").format(
                    type(exc).__name__, exc
                )
            )
            indirect_links = []

        # Check if all images were found and inform the user
        num_images_found = len(direct_links) + len(indirect_links)
        if num_images_found < self.total_num_images:
            self.log_error(
                self._("Could not save all images of this gallery: {}/{}").format(
                    num_images_found, self.total_num_images
                )
            )

        # If we could extract a name, use this to create a specific package
        if self.gallery_name:
            self.packages.append(
                (self.gallery_name, direct_links + indirect_links, self.gallery_name)
            )
            return []

        else:
            return direct_links + indirect_links
Example #4
0
    def send_request_to_server(self):
        """ Send torrent/magnet to the server """

        if self.pyfile.url.endswith(".torrent"):
            #: torrent URL
            if self.pyfile.url.startswith("http"):
                #: remote URL, download the torrent to tmp directory
                torrent_content = self.load(self.pyfile.url, decode=False)
                torrent_filename = safejoin("tmp", "tmp_{}.torrent".format(self.pyfile.package().name)) #: `tmp_` files are deleted automatically
                with open(torrent_filename, "wb") as f:
                    f.write(torrent_content)

            else:
                #: URL is local torrent file (uploaded container)
                torrent_filename = urllib.request.url2pathname(self.pyfile.url[7:])  #: trim the starting `file://`
                if not exists(torrent_filename):
                    self.fail(self._("Torrent file does not exist"))

            #: Check if the torrent file path is inside pyLoad's config directory
            if os.path.abspath(torrent_filename).startswith(self.pyload.tempdir + os.sep):
                for _i in range(2):
                    try:
                        #: send the torrent content to the server
                        json_data = self.upload(torrent_filename,
                                                self.API_URL + "/torrents/addTorrent",
                                                get={'auth_token': self.api_token})
                    except BadHeader as exc:
                        json_data = exc.content

                    api_data = json.loads(json_data) if len(json_data) > 0 else {}

                    if "error_code" in api_data:
                        if api_data["error_code"] == 8:  #: token expired, refresh the token and retry
                            self.account.relogin()
                            if not self.account.info["login"]["valid"]:
                                self.fail(_("Token refresh has failed"))

                            else:
                                self.api_token = self.account.accounts[self.account.accounts.keys()[0]]["api_token"]

                        else:
                            error_msg = api_data["error"]
                            self.fail(error_msg)

                    else:
                        break

                else:
                    self.fail(self._("Token refresh has failed"))

            else:
                self.fail(self._("Illegal URL"))  #: We don't allow files outside pyLoad's config directory

        else:
            #: magnet URL, send to the server
            api_data = self.api_response("/torrents/addMagnet",
                                          get={"auth_token": self.api_token},
                                          post={"magnet": self.pyfile.url})

        torrent_id = api_data["id"]

        torrent_info = self.api_response("/torrents/info/" + torrent_id,
                                         get={'auth_token': self.api_token})

        if "error" in torrent_info:
            self.fail("{} (code: {})".format(torrent_info["error"], torrent_info.get("error_code", -1)))

        #: Filter and select files for downloading
        exclude_filters = self.config.get("exclude_filter").split(';')
        excluded_ids = []
        for _filter in exclude_filters:
            excluded_ids.extend([_file["id"] for _file in torrent_info["files"]
                                 if fnmatch.fnmatch(os.path.basename(_file["path"]), _filter)])

        excluded_ids = uniquify(excluded_ids)

        include_filters = self.config.get("include_filter").split(";")
        included_ids = []
        for _filter in include_filters:
            included_ids.extend([_file["id"] for _file in torrent_info["files"]
                                 if fnmatch.fnmatch(os.path.basename(_file["path"]), _filter)])

        included_ids = uniquify(included_ids)

        selected_ids = ",".join([str(_id) for _id in included_ids
                                 if _id not in excluded_ids])
        self.api_response("/torrents/selectFiles/" + torrent_id,
                          get={"auth_token": self.api_token},
                          post={"files": selected_ids})

        return torrent_id
Example #5
0
    def _extract(self, pyfile, archive, password):
        name = os.path.basename(archive.filename)

        pyfile.set_status("processing")

        encrypted = False
        try:
            self.log_debug(f"Password: {password or None}")
            passwords = (uniquify([password] + self.get_passwords(False))
                         if self.config.get("usepasswordfile") else [password])

            for pw in passwords:
                try:
                    pyfile.set_custom_status(self._("archive testing"))
                    pyfile.set_progress(0)
                    archive.verify(pw)
                    pyfile.set_progress(100)

                except PasswordError:
                    if not encrypted:
                        self.log_info(name, self._("Password protected"))
                        encrypted = True

                except CRCError as exc:
                    self.log_debug(name, exc)
                    self.log_info(name, self._("CRC Error"))

                    if not self.repair:
                        raise CRCError("Archive damaged")

                    else:
                        self.log_warning(name, self._("Repairing..."))
                        pyfile.set_custom_status(self._("archive repairing"))
                        pyfile.set_progress(0)
                        repaired = archive.repair()
                        pyfile.set_progress(100)

                        if not repaired and not self.config.get("keepbroken"):
                            raise CRCError("Archive damaged")

                        else:
                            self.add_password(pw)
                            break

                except ArchiveError as exc:
                    raise ArchiveError(exc)

                else:
                    self.add_password(pw)
                    break

            pyfile.set_custom_status(self._("archive extracting"))
            pyfile.set_progress(0)

            if not encrypted or not self.config.get("usepasswordfile"):
                self.log_debug("Extracting using password: {}".format(
                    password or "None"))
                archive.extract(password)
            else:
                for pw in [
                        f for f in uniquify([password] +
                                            self.get_passwords(False)) if f
                ]:
                    try:
                        self.log_debug(f"Extracting using password: {pw}")

                        archive.extract(pw)
                        self.add_password(pw)
                        break

                    except PasswordError:
                        self.log_debug("Password was wrong")
                else:
                    raise PasswordError

            pyfile.set_progress(100)
            pyfile.set_status("processing")

            extracted_files = archive.files or archive.list()

            delfiles = archive.chunks()
            self.log_debug("Would delete: " + ", ".join(delfiles))

            if self.config.get("delete"):
                self.log_info(
                    self._("Deleting {} files").format(len(delfiles)))

                deltotrash = self.config.get("deltotrash")
                for f in delfiles:
                    file = os.fsdecode(f)
                    if not exists(file):
                        continue

                    if not deltotrash:
                        os.remove(file)

                    else:
                        try:
                            send2trash.send2trash(file)

                        except NameError:
                            self.log_warning(
                                self._("Unable to move {} to trash").format(
                                    os.path.basename(f)),
                                self._("Send2Trash lib not installed"),
                            )

                        except Exception as exc:
                            self.log_warning(
                                self._("Unable to move {} to trash").format(
                                    os.path.basename(f)),
                                exc,
                            )

                        else:
                            self.log_info(
                                self._("Moved {} to trash").format(
                                    os.path.basename(f)))

            self.log_info(name, self._("Extracting finished"))

            return extracted_files

        except PasswordError:
            self.log_error(
                name,
                self._("Wrong password" if password else "No password found"))

        except CRCError as exc:
            self.log_error(name, self._("CRC mismatch"), exc)

        except ArchiveError as exc:
            self.log_error(name, self._("Archive error"), exc)

        except Exception as exc:
            self.log_error(name, self._("Unknown error"), exc)

        self.m.dispatch_event("archive_extract_failed", pyfile, archive)

        raise Exception(self._("Extract failed"))
Example #6
0
    def extract(
        self, ids, thread=None
    ):  # TODO: Use pypack, not pid to improve method usability
        if not ids:
            return False

        extracted = []
        failed = []

        def to_list(value):
            return value.replace(" ", "").replace(",", "|").replace(";", "|").split("|")

        destination = self.config.get("destination")
        subfolder = self.config.get("subfolder")
        fullpath = self.config.get("fullpath")
        overwrite = self.config.get("overwrite")
        priority = self.config.get("priority")
        recursive = self.config.get("recursive")
        keepbroken = self.config.get("keepbroken")

        extensions = [
            x.lstrip(".").lower() for x in to_list(self.config.get("extensions"))
        ]
        excludefiles = to_list(self.config.get("excludefiles"))

        if extensions:
            self.log_debug(f"Use for extensions: .{'|.'.join(extensions)}")

        #: Reload from txt file
        self.reload_passwords()

        dl_folder = self.pyload.config.get("general", "storage_folder")

        #: Iterate packages -> extractors -> targets
        for pid in ids:
            pypack = self.pyload.files.get_package(pid)

            if not pypack:
                self.queue.remove(pid)
                continue

            self.log_info(self._("Check package: {}").format(pypack.name))

            pack_dl_folder = os.path.join(
                dl_folder, pypack.folder, ""
            )  #: Force trailing slash

            #: Determine output folder
            extract_folder = os.path.join(
                pack_dl_folder, destination, ""
            )  #: Force trailing slash

            if subfolder:
                extract_folder = os.path.join(
                    extract_folder,
                    pypack.folder or safename(pypack.name.replace("http://", "")),
                )

            os.makedirs(extract_folder, exist_ok=True)
            if subfolder:
                self.set_permissions(extract_folder)

            matched = False
            success = True
            files_ids = list(
                {
                    fdata["name"]: (
                        fdata["id"],
                        (os.path.join(pack_dl_folder, fdata["name"])),
                        extract_folder,
                    )
                    for fdata in pypack.get_children().values()
                }.values()
            )  #: : Remove duplicates

            #: Check as long there are unseen files
            while files_ids:
                new_files_ids = []

                if extensions:  #: Include only specified archive types
                    files_ids = [
                        file_id
                        for file_id in files_ids
                        if any(
                            [
                                Extractor.archivetype(file_id[1]) in extensions
                                for Extractor in self.extractors
                            ]
                        )
                    ]

                #: Sort by filename to ensure (or at least try) that a multivolume archive is targeted by its first part
                #: This is important because, for example, UnRar ignores preceding parts in listing mode
                files_ids.sort(key=lambda file_id: file_id[1])

                for Extractor in self.extractors:
                    targets = Extractor.get_targets(files_ids)
                    if targets:
                        self.log_debug(
                            "Targets for {}: {}".format(Extractor.__name__, targets)
                        )
                        matched = True

                        for fid, fname, fout in targets:
                            name = os.path.basename(fname)

                            if not exists(fname):
                                self.log_debug(name, "File not found")
                                continue

                            self.log_info(name, self._("Extract to: {}").format(fout))
                            try:
                                pyfile = self.pyload.files.get_file(fid)
                                archive = Extractor(
                                    pyfile,
                                    fname,
                                    fout,
                                    fullpath,
                                    overwrite,
                                    excludefiles,
                                    priority,
                                    keepbroken,
                                )

                                thread.add_active(pyfile)
                                archive.init()

                                #: Save for removal from file processing list, which happens after deletion.
                                #: So archive.chunks() would just return an empty list.
                                chunks = archive.chunks()

                                try:
                                    new_files = self._extract(
                                        pyfile, archive, pypack.password
                                    )

                                finally:
                                    pyfile.set_progress(100)
                                    thread.finish_file(pyfile)

                            except Exception as exc:
                                self.log_error(name, exc)
                                success = False
                                continue

                            #: Remove processed file and related multiparts from list
                            files_ids = [
                                (fid, fname, fout)
                                for fid, fname, fout in files_ids
                                if fname not in chunks
                            ]
                            self.log_debug(f"Extracted files: {new_files}")

                            new_folders = uniquify(
                                os.path.dirname(f) for f in new_files
                            )
                            for foldername in new_folders:
                                self.set_permissions(
                                    os.path.join(extract_folder, foldername)
                                )

                            for filename in new_files:
                                self.set_permissions(
                                    os.path.join(extract_folder, filename)
                                )

                            for filename in new_files:
                                file = os.fsdecode(
                                    os.path.join(
                                        os.path.dirname(archive.filename), filename
                                    )
                                )
                                if not exists(file):
                                    self.log_debug(
                                        "New file {} does not exists".format(filename)
                                    )
                                    continue

                                if recursive and os.path.isfile(file):
                                    new_files_ids.append(
                                        (fid, filename, os.path.dirname(filename))
                                    )  #: Append as new target

                            self.m.dispatch_event("archive_extracted", pyfile, archive)

                files_ids = new_files_ids  #: Also check extracted files

            if matched:
                if success:
                    #: Delete empty pack folder if extract_folder resides outside download folder
                    if self.config.get("delete") and self.pyload.config.get(
                        "general", "folder_per_package"
                    ):
                        if not extract_folder.startswith(pack_dl_folder):
                            if len(os.listdir(pack_dl_folder)) == 0:
                                try:
                                    os.rmdir(pack_dl_folder)
                                    self.log_debug(
                                        "Successfully deleted pack folder {}".format(
                                            pack_dl_folder
                                        )
                                    )

                                except OSError:
                                    self.log_warning(
                                        "Unable to delete pack folder {}".format(
                                            pack_dl_folder
                                        )
                                    )

                            else:
                                self.log_warning(
                                    "Not deleting pack folder {}, folder not empty".format(
                                        pack_dl_folder
                                    )
                                )

                    extracted.append(pid)
                    self.m.dispatch_event("package_extracted", pypack)

                else:
                    failed.append(pid)
                    self.m.dispatch_event("package_extract_failed", pypack)

                    self.failed.add(pid)
            else:
                self.log_info(self._("No files found to extract"))

            if not matched or not success and subfolder:
                try:
                    os.rmdir(extract_folder)

                except OSError:
                    pass

            self.queue.remove(pid)

        return True if not failed else False
    def send_request_to_server(self):
        """ Send torrent/magnet to the server """

        if self.pyfile.url.endswith(".torrent"):
            #: torrent URL
            if self.pyfile.url.startswith("http"):
                #: remote URL, download the torrent to tmp directory
                torrent_content = self.load(self.pyfile.url, decode=False)
                torrent_filename = safejoin("tmp", "tmp_{}.torrent".format(
                    self.pyfile.package(
                    ).name))  #: `tmp_` files are deleted automatically
                with open(torrent_filename, "wb") as f:
                    f.write(torrent_content)

            else:
                #: URL is local torrent file (uploaded container)
                torrent_filename = urllib.request.url2pathname(
                    self.pyfile.url[7:]).encode('latin1').decode(
                        'utf8')  #: trim the starting `file://`
                if not exists(torrent_filename):
                    self.fail(self._("Torrent file does not exist"))

            #: Check if the torrent file path is inside pyLoad's config directory
            if os.path.abspath(torrent_filename).startswith(
                    os.path.abspath(os.getcwd()) + os.sep):
                try:
                    #: send the torrent content to the server
                    api_data = json.loads(
                        self.load("https://up1.debrid.link/seedbox",
                                  post={
                                      'file':
                                      FormFile(
                                          torrent_filename,
                                          mimetype="application/x-bittorrent")
                                  },
                                  multipart=True))
                    if api_data["result"] != "OK":
                        self.fail(api_data["ERR"])

                    api_data = self.api_response_safe("v2/seedbox/add",
                                                      post={
                                                          "url":
                                                          api_data["link"],
                                                          "wait": True,
                                                          "async": True
                                                      })

                except NameError:
                    self.fail(
                        self.
                        _("Posting file attachments is not supported by HTTPRequest, please update your pyLoad installation"
                          ))
            else:
                self.fail(
                    self._("Illegal URL")
                )  #: We don't allow files outside pyLoad's config directory

        else:
            #: magnet URL, send to the server
            api_data = self.api_response_safe("v2/seedbox/add",
                                              post={
                                                  "url": self.pyfile.url,
                                                  "wait": True,
                                                  "async": True
                                              })

        if not api_data["success"]:
            self.fail("{} (code: {})".format(
                api_data.get("error_description",
                             error_description(api_data["error"])),
                api_data["error"]))

        torrent_id = api_data["value"]["id"]

        self.pyfile.setCustomStatus("metadata")
        self.pyfile.setProgress(0)

        #: Get the file list of the torrent
        page = 0
        files = []
        while True:
            api_data = self.api_response_safe("v2/seedbox/list",
                                              get={
                                                  "ids": torrent_id,
                                                  "page": page,
                                                  "perPage": 50
                                              })

            if not api_data['success']:
                self.fail("{} (code: {})".format(
                    api_data.get("error_description",
                                 error_description(api_data["error"])),
                    api_data["error"]))

            if api_data["value"][0]["status"] == 1:
                files.extend([{
                    "id": _file["id"],
                    "name": _file["name"],
                    "size": _file["size"],
                    "url": _file["downloadUrl"]
                } for _file in api_data["value"][0]["files"]])

                page = api_data["pagination"]["next"]
                if page == -1:
                    break
                else:
                    continue

            self.sleep(5)

        self.pyfile.name = api_data["value"][0]["name"]

        #: Filter and select files for downloading
        exclude_filters = self.config.get("exclude_filter").split(";")
        excluded_ids = []
        for _filter in exclude_filters:
            excluded_ids.extend([
                _file["id"] for _file in files
                if fnmatch.fnmatch(_file["name"], _filter)
            ])

        excluded_ids = uniquify(excluded_ids)

        include_filters = self.config.get("include_filter").split(";")
        included_ids = []
        for _filter in include_filters:
            included_ids.extend([
                _file["id"] for _file in files
                if fnmatch.fnmatch(_file["name"], _filter)
            ])

        included_ids = uniquify(included_ids)

        selected_ids = [_id for _id in included_ids if _id not in excluded_ids]

        unwanted_ids = [
            _file["id"] for _file in files if _file["id"] not in selected_ids
        ]

        self.pyfile.size = sum(
            [_file["size"] for _file in files if _file["id"] in selected_ids])

        api_data = self.api_response_safe("v2/seedbox/{}/config".format(
            torrent_id, post={'files-unwanted': json.dumps(unwanted_ids)}))

        if not api_data["success"]:
            self.fail("{} (code: {})".format(
                api_data.get("error_description",
                             error_description(api_data["error"])),
                api_data["error"]))

        return torrent_id, [
            _file["url"] for _file in files if _file["id"] in selected_ids
        ]