コード例 #1
0
    def start_download(self) -> None:

        if not self.gallery or not self.gallery.link:
            return

        logger.info(
            "Downloading an archive from a generic HTTP server: {}".format(
                self.gallery.link))

        request_dict = construct_request_dict(self.settings, self.own_settings)

        request_file = requests.get(self.gallery.link,
                                    stream='True',
                                    **request_dict)

        filename = get_filename_from_cd(
            request_file.headers.get('content-disposition'))

        if not filename:
            if self.gallery.link.find('/'):
                filename = self.gallery.link.rsplit('/', 1)[1]

        if not filename:
            logger.error("Could not find a filename for link: {}".format(
                self.gallery.link))
            self.return_code = 0

        self.gallery.title = filename.replace(".zip", "")
        self.gallery.filename = replace_illegal_name(
            available_filename(
                self.settings.MEDIA_ROOT,
                os.path.join(self.own_settings.archive_dl_folder, filename)))

        filepath = os.path.join(self.settings.MEDIA_ROOT,
                                self.gallery.filename)
        with open(filepath, 'wb') as fo:
            for chunk in request_file.iter_content(4096):
                fo.write(chunk)

        self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(
            filepath)
        if self.gallery.filesize > 0:
            self.crc32 = calc_crc32(filepath)

            self.fileDownloaded = 1
            self.return_code = 1

        else:
            logger.error("Could not download archive")
            self.return_code = 0
コード例 #2
0
    def start_download(self) -> None:

        if not self.gallery or not self.gallery.temp_archive:
            return

        logger.info(
            "Downloading an archive: {} from a Panda Backup-like source: {}".
            format(self.gallery.title, self.gallery.temp_archive['link']))

        to_use_filename = get_base_filename_string_from_gallery_data(
            self.gallery)

        to_use_filename = replace_illegal_name(to_use_filename)

        self.gallery.filename = available_filename(
            self.settings.MEDIA_ROOT,
            os.path.join(self.own_settings.archive_dl_folder,
                         to_use_filename + '.zip'))  # TODO: File could be cbz.

        request_dict = construct_request_dict(self.settings, self.own_settings)
        request_dict['stream'] = True
        request_file = request_with_retries(
            self.gallery.temp_archive['link'],
            request_dict,
        )
        if not request_file:
            logger.error("Could not download archive")
            self.return_code = 0
            return
        filepath = os.path.join(self.settings.MEDIA_ROOT,
                                self.gallery.filename)

        with open(filepath, 'wb') as fo:
            for chunk in request_file.iter_content(4096):
                fo.write(chunk)

        self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(
            filepath)
        if self.gallery.filesize > 0:
            self.crc32 = calc_crc32(filepath)

            self.fileDownloaded = 1
            self.return_code = 1

        else:
            logger.error("Could not download archive")
            self.return_code = 0
コード例 #3
0
    def start_download(self) -> None:

        if not self.gallery or not self.gallery.link or not self.gallery.archiver_key:
            return

        to_use_filename = get_base_filename_string_from_gallery_data(
            self.gallery)

        to_use_filename = replace_illegal_name(to_use_filename)

        self.gallery.filename = available_filename(
            self.settings.MEDIA_ROOT,
            os.path.join(self.own_settings.archive_dl_folder,
                         to_use_filename + '.zip'))

        request_dict = construct_request_dict(self.settings, self.own_settings)

        request_file = requests.get(self.gallery.archiver_key,
                                    stream='True',
                                    **request_dict)

        filepath = os.path.join(self.settings.MEDIA_ROOT,
                                self.gallery.filename)
        with open(filepath, 'wb') as fo:
            for chunk in request_file.iter_content(4096):
                fo.write(chunk)

        self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(
            filepath)
        if self.gallery.filesize > 0:
            self.crc32 = calc_crc32(filepath)

            self.fileDownloaded = 1
            self.return_code = 1

        else:
            logger.error("Could not download archive")
            os.remove(filepath)
            self.return_code = 0
コード例 #4
0
    def start_download(self) -> None:

        if not self.gallery:
            return

        self.logger.info(
            "Downloading an archive: {} from a Panda Backup-like source: {}".
            format(self.gallery.title, self.gallery.archiver_key['link']))

        self.gallery.title = replace_illegal_name(self.gallery.title)
        self.gallery.filename = available_filename(
            self.settings.MEDIA_ROOT,
            os.path.join(self.own_settings.archive_dl_folder,
                         self.gallery.title + '.zip'))

        request_file = requests.get(self.gallery.archiver_key['link'],
                                    stream='True',
                                    headers=self.settings.requests_headers,
                                    timeout=self.settings.timeout_timer,
                                    cookies=self.own_settings.cookies)

        filepath = os.path.join(self.settings.MEDIA_ROOT,
                                self.gallery.filename)
        with open(filepath, 'wb') as fo:
            for chunk in request_file.iter_content(4096):
                fo.write(chunk)

        self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(
            filepath)
        if self.gallery.filesize > 0:
            self.crc32 = calc_crc32(filepath)

            self.fileDownloaded = 1
            self.return_code = 1

        else:
            self.logger.error("Could not download archive")
            self.return_code = 0
コード例 #5
0
    def start_download(self) -> None:

        if not self.gallery:
            return

        to_use_filename = get_base_filename_string_from_gallery_data(
            self.gallery)

        to_use_filename = replace_illegal_name(to_use_filename)

        self.gallery.filename = available_filename(
            self.settings.MEDIA_ROOT,
            os.path.join(self.own_settings.archive_dl_folder,
                         to_use_filename + '.zip'))

        if not (self.gallery.root and self.gallery.gid and self.gallery.token
                and self.gallery.archiver_key):
            logger.error(
                'Missing required data -> root: {}, gid: {}, token: {}, archiver_key: {}.'
                .format(
                    self.gallery.root,
                    self.gallery.gid,
                    self.gallery.token,
                    self.gallery.archiver_key,
                ))
            self.return_code = 0
            return

        r = self.request_archive_download(self.gallery.root, self.gallery.gid,
                                          self.gallery.token,
                                          self.gallery.archiver_key)

        if not r:
            logger.error('Could not get download link.')
            self.return_code = 0
            return

        r.encoding = 'utf-8'

        if 'Invalid archiver key' in r.text:
            logger.error("Invalid archiver key received.")
            self.return_code = 0
        else:

            archive_link = get_archive_link_from_html_page(r.text)

            if archive_link == '':
                logger.error(
                    'Could not find archive link, page text: {}'.format(
                        r.text))
                self.return_code = 0
            else:
                m = re.match(r"(.*?)(\?.*?)", archive_link)
                if m:
                    archive_link = m.group(1)

                logger.info('Got link: {}, from url: {}'.format(
                    archive_link, r.url))

                request_dict = construct_request_dict(self.settings,
                                                      self.own_settings)

                request_file = requests.get(archive_link + '?start=1',
                                            stream='True',
                                            **request_dict)

                if r and r.status_code == 200:
                    logger.info(
                        'Downloading gallery: {}.zip'.format(to_use_filename))
                    filepath = os.path.join(self.settings.MEDIA_ROOT,
                                            self.gallery.filename)
                    with open(filepath, 'wb') as fo:
                        for chunk in request_file.iter_content(4096):
                            fo.write(chunk)

                    self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(
                        filepath)
                    if self.gallery.filesize > 0:
                        self.crc32 = calc_crc32(filepath)

                        self.fileDownloaded = 1
                        self.return_code = 1

                else:
                    logger.error("Could not download archive")
                    self.return_code = 0
コード例 #6
0
    def start_download(self) -> None:

        if not self.gallery or not self.gallery.link:
            return

        if self.settings.gallery_dl.executable_path:
            exe_path_to_use = shutil.which(
                self.settings.gallery_dl.executable_path)
        else:
            exe_path_to_use = shutil.which(
                self.settings.gallery_dl.executable_name)

        if not exe_path_to_use:
            self.return_code = 0
            logger.error("The gallery-dl executable was not found")
            return

        directory_path = mkdtemp()

        arguments = ["--zip", "--dest", "{}".format(directory_path)]

        if self.own_settings.proxy:
            arguments.append("--proxy")
            arguments.append("{}".format(self.own_settings.proxy))

        if self.settings.gallery_dl.config_file:
            arguments.append("--config")
            arguments.append("{}".format(self.settings.gallery_dl.config_file))

        if self.settings.gallery_dl.extra_arguments:
            arguments.append("{}".format(
                self.settings.gallery_dl.extra_arguments))

        arguments.append("{}".format(self.gallery.link))

        logger.info("Calling gallery-dl: {}.".format(" ".join(
            [exe_path_to_use, *arguments])))

        process_result = subprocess.run([exe_path_to_use, *arguments],
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE,
                                        universal_newlines=True)

        if process_result.stderr:
            self.return_code = 0
            logger.error(
                "An error was captured when running gallery-dl: {}".format(
                    process_result.stderr))
            return

        if process_result.returncode != 0:
            self.return_code = 0
            logger.error("Return code was not 0: {}".format(
                process_result.returncode))
            return

        # If we downloaded more than one file, get the latest one
        output_path = ''
        file_name = ''
        for (dir_path, dir_names, filenames) in os.walk(directory_path):
            for current_file in filenames:
                file_name = current_file
                output_path = os.path.join(dir_path, current_file)

        if not output_path:
            self.return_code = 0
            logger.error("The resulting download file was not found")
            return

        if not output_path or not os.path.isfile(output_path):
            self.return_code = 0
            logger.error(
                "The resulting download file was not found: {}".format(
                    file_name))
            return

        self.gallery.filename = available_filename(
            self.settings.MEDIA_ROOT,
            os.path.join(self.own_settings.archive_dl_folder,
                         replace_illegal_name(file_name)))

        self.gallery.title = os.path.splitext(file_name)[0]

        filepath = os.path.join(self.settings.MEDIA_ROOT,
                                self.gallery.filename)

        shutil.move(output_path, filepath)
        shutil.rmtree(directory_path, ignore_errors=True)

        self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(
            filepath)
        if self.gallery.filesize > 0:
            self.crc32 = calc_crc32(filepath)

            self.fileDownloaded = 1
            self.return_code = 1

        else:
            logger.error("Could not download archive")
            self.return_code = 0
コード例 #7
0
ファイル: downloaders.py プロジェクト: Kadantte/pandachaika
    def start_download(self) -> None:

        if not self.gallery or not self.gallery.link:
            return

        if self.own_settings.megadl_executable_path:
            exe_path_to_use = shutil.which(
                self.own_settings.megadl_executable_path)
        else:
            exe_path_to_use = shutil.which(
                self.own_settings.megadl_executable_name)

        if not exe_path_to_use:
            self.return_code = 0
            self.logger.error("The megadl tools was not found")
            return

        directory_path = mkdtemp()

        arguments = [
            "--no-progress", "--print-names", "--path",
            "{}".format(directory_path)
        ]

        if self.own_settings.proxy:
            arguments.append("--proxy")
            arguments.append("{}".format(self.own_settings.proxy))

        if self.own_settings.extra_megadl_arguments:
            arguments.append("{}".format(
                self.own_settings.extra_megadl_arguments))

        arguments.append("{}".format(self.gallery.link))

        self.logger.info("Calling megadl: {}.".format(" ".join(
            [exe_path_to_use, *arguments])))

        process_result = subprocess.run([exe_path_to_use, *arguments],
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE,
                                        universal_newlines=True)

        message_text = process_result.stdout

        if not message_text:
            self.return_code = 0
            self.logger.error(
                "The link could not be downloaded, no output was generated after running megadl"
            )
            return

        if process_result.stderr:
            self.return_code = 0
            self.logger.error(
                "An error was captured when running megadl: {}".format(
                    process_result.stderr))
            return

        if "WARNING: Skipping invalid" in message_text:
            self.return_code = 0
            self.logger.error(
                "The link could not be downloaded: {}".format(message_text))
            return

        # If we downloaded a folder, just take the first result
        file_names = message_text.splitlines()
        file_name = file_names[0]

        output_path = os.path.join(directory_path, file_name)

        if not os.path.isfile(output_path):
            self.return_code = 0
            self.logger.error(
                "The resulting download file was not found: {}".format(
                    file_name))
            return

        self.gallery.filename = available_filename(
            self.settings.MEDIA_ROOT,
            os.path.join(self.own_settings.archive_dl_folder,
                         replace_illegal_name(file_name)))

        self.gallery.title = os.path.splitext(file_name)[0]

        filepath = os.path.join(self.settings.MEDIA_ROOT,
                                self.gallery.filename)

        shutil.move(output_path, filepath)
        shutil.rmtree(directory_path, ignore_errors=True)

        self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(
            filepath)
        if self.gallery.filesize > 0:
            self.crc32 = calc_crc32(filepath)

            self.fileDownloaded = 1
            self.return_code = 1

        else:
            self.logger.error("Could not download archive")
            self.return_code = 0
コード例 #8
0
    def start_download(self) -> None:

        if not self.gallery or not self.gallery.link:
            return

        to_use_filename = get_base_filename_string_from_gallery_data(self.gallery)

        to_use_filename = replace_illegal_name(to_use_filename)

        self.gallery.filename = available_filename(
            self.settings.MEDIA_ROOT,
            os.path.join(
                self.own_settings.archive_dl_folder,
                to_use_filename + '.zip'))
        if self.gallery.content:
            soup_1 = BeautifulSoup(self.gallery.content, 'html.parser')
        else:
            request_dict = construct_request_dict(self.settings, self.own_settings)
            gallery_page = requests.get(
                self.gallery.link,
                **request_dict
            )
            soup_1 = BeautifulSoup(gallery_page.content, 'html.parser')

        gallery_read = soup_1.find("a", {"class": "x-btn-rounded"})['href']

        # Some URLs are really bad formatted
        gallery_read = re.sub(
            r'.*(' + re.escape(constants.main_page) + r'/manga/read/.+/0/1/).*', r'\1',
            gallery_read,
            flags=re.DOTALL
        )

        if not gallery_read or gallery_read in constants.bad_urls or not gallery_read.startswith(constants.main_page):
            logger.warning("Reading gallery page not available, trying to guess the name.")
            gallery_read = guess_gallery_read_url(self.gallery.link, self.gallery)

        if not gallery_read.endswith('page/1'):
            gallery_read += 'page/1'

        page_regex = re.compile(r"(.*?page/)(\d+)/*$", re.IGNORECASE)

        last_image = ''

        directory_path = mkdtemp()

        logger.info('Downloading gallery: {}'.format(self.gallery.title))

        second_pass = False
        while True:

            try:
                request_dict = construct_request_dict(self.settings, self.own_settings)
                gallery_read_page = requests.get(
                    gallery_read,
                    **request_dict
                )
            except requests.exceptions.MissingSchema:
                logger.error("Malformed URL: {}, skipping".format(gallery_read))
                self.return_code = 0
                shutil.rmtree(directory_path, ignore_errors=True)
                return

            if gallery_read_page.status_code == 404:
                if gallery_read.endswith('page/1'):
                    if not second_pass:
                        gallery_read = guess_gallery_read_url(self.gallery.link, self.gallery, False)
                        second_pass = True
                        continue
                    logger.error("Last page was the first one: {}, stopping".format(gallery_read))
                    self.return_code = 0
                    shutil.rmtree(directory_path, ignore_errors=True)
                    return
                # yield("Got to last gallery page, stopping")
                break

            soup_2 = BeautifulSoup(gallery_read_page.content, 'html.parser')
            img_find = soup_2.find("img", {"class": "open"})

            if not img_find:
                logger.error("Gallery not available, skipping")
                self.return_code = 0
                shutil.rmtree(directory_path, ignore_errors=True)
                return

            img = img_find['src']

            if last_image != '' and last_image == img:
                # yield('Current image is the same as previous, skipping')
                break
            last_image = img
            img_name = os.path.basename(img)
            request_dict = construct_request_dict(self.settings, self.own_settings)
            request_file = requests.get(
                img,
                **request_dict
            )
            if request_file.status_code == 404:
                # yield("Got to last image, stopping")
                break
            with open(os.path.join(directory_path, img_name), "wb") as fo:
                for chunk in request_file.iter_content(4096):
                    fo.write(chunk)

            page_match = page_regex.search(gallery_read)

            if page_match:
                gallery_read = page_match.group(1) + str(int(page_match.group(2)) + 1)
            else:
                # yield("Could not match to change page, stopping")
                break

        file_path = os.path.join(
            self.settings.MEDIA_ROOT,
            self.gallery.filename
        )

        with ZipFile(file_path, 'w') as archive:
            for (root_path, _, file_names) in os.walk(directory_path):
                for current_file in file_names:
                    archive.write(
                        os.path.join(root_path, current_file), arcname=os.path.basename(current_file))
        shutil.rmtree(directory_path, ignore_errors=True)

        self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(file_path)
        if self.gallery.filesize > 0:
            self.crc32 = calc_crc32(file_path)
            self.fileDownloaded = 1
            self.return_code = 1
コード例 #9
0
    def start_download(self) -> None:

        if not self.gallery or not self.gallery.link:
            return

        to_use_filename = get_base_filename_string_from_gallery_data(self.gallery)

        to_use_filename = replace_illegal_name(to_use_filename)

        self.gallery.filename = available_filename(
            self.settings.MEDIA_ROOT,
            os.path.join(
                self.own_settings.archive_dl_folder,
                to_use_filename + '.zip'))
        if self.gallery.content:
            soup_1 = BeautifulSoup(self.gallery.content, 'html.parser')
        else:
            request_dict = construct_request_dict(self.settings, self.own_settings)
            gallery_page = requests.get(
                self.gallery.link,
                **request_dict
            )
            soup_1 = BeautifulSoup(gallery_page.content, 'html.parser')

        gallery_read = soup_1.find("a", {"class": "x-btn-rounded"})['href']

        # Some URLs are really bad formatted
        gallery_read = re.sub(
            r'.*(' + re.escape(constants.main_page) + r'/manga/read/.+/0/1/).*', r'\1',
            gallery_read,
            flags=re.DOTALL
        )

        if not gallery_read or gallery_read in constants.bad_urls or not gallery_read.startswith(constants.main_page):
            logger.warning("Reading gallery page not available, trying to guess the name.")
            gallery_read = guess_gallery_read_url(self.gallery.link, self.gallery)

        if not gallery_read.endswith('page/1'):
            gallery_read += 'page/1'

        logger.info('Downloading gallery: {}'.format(self.gallery.title))

        try:
            request_dict = construct_request_dict(self.settings, self.own_settings)
            gallery_read_page = requests.get(
                gallery_read,
                **request_dict
            )
        except requests.exceptions.MissingSchema:
            logger.error("Malformed URL: {}, skipping".format(gallery_read))
            self.return_code = 0
            return

        if gallery_read_page.status_code != 200:
            gallery_read = guess_gallery_read_url(self.gallery.link, self.gallery, False)
            try:
                request_dict = construct_request_dict(self.settings, self.own_settings)
                gallery_read_page = requests.get(
                    gallery_read,
                    **request_dict
                )
            except requests.exceptions.MissingSchema:
                logger.error("Malformed URL: {}, skipping".format(gallery_read))
                self.return_code = 0
                return

        if gallery_read_page.status_code == 200:

            image_urls = self.get_img_urls_from_gallery_read_page(gallery_read_page.text)

            if not image_urls:
                logger.error("Could not find image links, archive not downloaded")
                self.return_code = 0
                return

            directory_path = mkdtemp()

            for image_url in image_urls:
                img_name = os.path.basename(image_url)

                request_dict = construct_request_dict(self.settings, self.own_settings)
                request_file = requests.get(
                    image_url,
                    **request_dict
                )
                if request_file.status_code == 404:
                    logger.warning("Image link reported 404 error, stopping")
                    break
                with open(os.path.join(directory_path, img_name), "wb") as fo:
                    for chunk in request_file.iter_content(4096):
                        fo.write(chunk)

            file_path = os.path.join(
                self.settings.MEDIA_ROOT,
                self.gallery.filename
            )

            with ZipFile(file_path, 'w') as archive:
                for (root_path, _, file_names) in os.walk(directory_path):
                    for current_file in file_names:
                        archive.write(
                            os.path.join(root_path, current_file), arcname=os.path.basename(current_file))
            shutil.rmtree(directory_path, ignore_errors=True)

            self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo(file_path)
            if self.gallery.filesize > 0:
                self.crc32 = calc_crc32(file_path)
                self.fileDownloaded = 1
                self.return_code = 1
        else:
            logger.error("Wrong HTML code returned, could not download, link: {}".format(gallery_read))
            self.return_code = 0