コード例 #1
0
    def _download_submission(self, submission: praw.models.Submission):
        if not isinstance(submission, praw.models.Submission):
            logger.warning(f'{submission.id} is not a submission')
            return
        if not self.download_filter.check_url(submission.url):
            logger.debug(
                f'Download filter removed submission {submission.id} with URL {submission.url}'
            )
            return
        try:
            downloader_class = DownloadFactory.pull_lever(submission.url)
            downloader = downloader_class(submission)
            logger.debug(
                f'Using {downloader_class.__name__} with url {submission.url}')
        except errors.NotADownloadableLinkError as e:
            logger.error(f'Could not download submission {submission.id}: {e}')
            return

        try:
            content = downloader.find_resources(self.authenticator)
        except errors.SiteDownloaderError as e:
            logger.error(
                f'Site {downloader_class.__name__} failed to download submission {submission.id}: {e}'
            )
            return
        for destination, res in self.file_name_formatter.format_resource_paths(
                content, self.download_directory):
            if destination.exists():
                logger.debug(f'File {destination} already exists, continuing')
            else:
                try:
                    res.download(self.args.max_wait_time)
                except errors.BulkDownloaderException as e:
                    logger.error(
                        f'Failed to download resource {res.url} with downloader {downloader_class.__name__}: {e}'
                    )
                    return
                resource_hash = res.hash.hexdigest()
                destination.parent.mkdir(parents=True, exist_ok=True)
                if resource_hash in self.master_hash_list:
                    if self.args.no_dupes:
                        logger.info(
                            f'Resource hash {resource_hash} from submission {submission.id} downloaded elsewhere'
                        )
                        return
                    elif self.args.make_hard_links:
                        self.master_hash_list[resource_hash].link_to(
                            destination)
                        logger.info(
                            f'Hard link made linking {destination} to {self.master_hash_list[resource_hash]}'
                        )
                        return
                with open(destination, 'wb') as file:
                    file.write(res.content)
                logger.debug(f'Written file to {destination}')
                self.master_hash_list[resource_hash] = destination
                logger.debug(f'Hash added to master list: {resource_hash}')
                logger.info(
                    f'Downloaded submission {submission.id} from {submission.subreddit.display_name}'
                )
コード例 #2
0
def test_is_web_resource(test_url: str, expected: bool):
    result = DownloadFactory.is_web_resource(test_url)
    assert result == expected
コード例 #3
0
def test_sanitise_url(test_url: str, expected: str):
    result = DownloadFactory.sanitise_url(test_url)
    assert result == expected
コード例 #4
0
def test_factory_lever_bad(test_url: str):
    with pytest.raises(NotADownloadableLinkError):
        DownloadFactory.pull_lever(test_url)
コード例 #5
0
def test_factory_lever_good(test_submission_url: str,
                            expected_class: BaseDownloader,
                            reddit_instance: praw.Reddit):
    result = DownloadFactory.pull_lever(test_submission_url)
    assert result is expected_class
コード例 #6
0
    def _download_submission(self, submission: praw.models.Submission):
        if submission.id in self.excluded_submission_ids:
            logger.debug(f'Object {submission.id} in exclusion list, skipping')
            return
        elif submission.subreddit.display_name.lower() in self.args.skip_subreddit:
            logger.debug(f'Submission {submission.id} in {submission.subreddit.display_name} in skip list')
            return
        elif submission.author.name in self.args.ignore_user:
            logger.debug(
                f'Submission {submission.id} in {submission.subreddit.display_name} skipped'
                f' due to {submission.author.name} being an ignored user')
            return
        elif not isinstance(submission, praw.models.Submission):
            logger.warning(f'{submission.id} is not a submission')
            return
        elif not self.download_filter.check_url(submission.url):
            logger.debug(f'Submission {submission.id} filtered due to URL {submission.url}')
            return

        logger.debug(f'Attempting to download submission {submission.id}')
        try:
            downloader_class = DownloadFactory.pull_lever(submission.url)
            downloader = downloader_class(submission)
            logger.debug(f'Using {downloader_class.__name__} with url {submission.url}')
        except errors.NotADownloadableLinkError as e:
            logger.error(f'Could not download submission {submission.id}: {e}')
            return
        if downloader_class.__name__.lower() in self.args.disable_module:
            logger.debug(f'Submission {submission.id} skipped due to disabled module {downloader_class.__name__}')
            return
        try:
            content = downloader.find_resources(self.authenticator)
        except errors.SiteDownloaderError as e:
            logger.error(f'Site {downloader_class.__name__} failed to download submission {submission.id}: {e}')
            return
        for destination, res in self.file_name_formatter.format_resource_paths(content, self.download_directory):
            if destination.exists():
                logger.debug(f'File {destination} from submission {submission.id} already exists, continuing')
                continue
            elif not self.download_filter.check_resource(res):
                logger.debug(f'Download filter removed {submission.id} file with URL {submission.url}')
                continue
            try:
                res.download({'max_wait_time': self.args.max_wait_time})
            except errors.BulkDownloaderException as e:
                logger.error(f'Failed to download resource {res.url} in submission {submission.id} '
                             f'with downloader {downloader_class.__name__}: {e}')
                return
            resource_hash = res.hash.hexdigest()
            destination.parent.mkdir(parents=True, exist_ok=True)
            if resource_hash in self.master_hash_list:
                if self.args.no_dupes:
                    logger.info(
                        f'Resource hash {resource_hash} from submission {submission.id} downloaded elsewhere')
                    return
                elif self.args.make_hard_links:
                    self.master_hash_list[resource_hash].link_to(destination)
                    logger.info(
                        f'Hard link made linking {destination} to {self.master_hash_list[resource_hash]}'
                        f' in submission {submission.id}')
                    return
            try:
                with open(destination, 'wb') as file:
                    file.write(res.content)
                logger.debug(f'Written file to {destination}')
            except OSError as e:
                logger.exception(e)
                logger.error(f'Failed to write file in submission {submission.id} to {destination}: {e}')
                return
            creation_time = time.mktime(datetime.fromtimestamp(submission.created_utc).timetuple())
            os.utime(destination, (creation_time, creation_time))
            self.master_hash_list[resource_hash] = destination
            logger.debug(f'Hash added to master list: {resource_hash}')
        logger.info(f'Downloaded submission {submission.id} from {submission.subreddit.display_name}')