def _download_video(self, ytdl_options: dict) -> Resource: yt_logger = logging.getLogger('youtube-dl') yt_logger.setLevel(logging.CRITICAL) ytdl_options['quiet'] = True ytdl_options['logger'] = yt_logger with tempfile.TemporaryDirectory() as temp_dir: download_path = Path(temp_dir).resolve() ytdl_options['outtmpl'] = str(download_path) + '/' + 'test.%(ext)s' try: with youtube_dl.YoutubeDL(ytdl_options) as ydl: ydl.download([self.post.url]) except youtube_dl.DownloadError as e: raise SiteDownloaderError(f'Youtube download failed: {e}') downloaded_file = None downloaded_files = list(download_path.iterdir()) if len(downloaded_files) > 0: downloaded_file = downloaded_files[0] else: raise NotADownloadableLinkError( f"No media exists in the URL {self.post.url}") extension = downloaded_file.suffix with open(downloaded_file, 'rb') as file: content = file.read() out = Resource(self.post, self.post.url, extension) out.content = content out.create_hash() return out
def find_resources( self, authenticator: Optional[SiteAuthenticator] = None ) -> list[Resource]: out = Resource(self.post, self.post.url, '.txt') out.content = self.export_to_string().encode('utf-8') out.create_hash() return [out]
def _compute_image_url(self, image: dict) -> Resource: ext = self._validate_extension(image['ext']) if image.get('prefer_video', False): ext = '.mp4' image_url = 'https://i.imgur.com/' + image['hash'] + ext return Resource(self.post, image_url, Resource.retry_download(image_url))
def find_resources( self, authenticator: Optional[SiteAuthenticator] = None ) -> list[Resource]: try: res = self.get_links(self.post.url) except AttributeError: raise SiteDownloaderError( f'Could not read page at {self.post.url}') if not res: raise SiteDownloaderError( rf'No resources found at {self.post.url}') res = [Resource(self.post, r, Resource.retry_download(r)) for r in res] return res
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: try: image_urls = self._get_links(self.post.gallery_data['items']) except (AttributeError, TypeError): try: image_urls = self._get_links(self.post.crosspost_parent_list[0]['gallery_data']['items']) except (AttributeError, IndexError, TypeError): logger.error(f'Could not find gallery data in submission {self.post.id}') logger.exception('Gallery image find failure') raise SiteDownloaderError('No images found in Reddit gallery') if not image_urls: raise SiteDownloaderError('No images found in Reddit gallery') return [Resource(self.post, url, Resource.retry_download(url)) for url in image_urls]
def test_format_full(format_string_directory: str, format_string_file: str, expected: str, reddit_submission: praw.models.Submission): test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png') test_formatter = FileNameFormatter(format_string_file, format_string_directory) result = test_formatter.format_path(test_resource, Path('test')) assert str(result) == expected
def test_format_full_conform( format_string_directory: str, format_string_file: str, reddit_submission: praw.models.Submission): test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None) test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO') test_formatter.format_path(test_resource, Path('test'))
def find_resources( self, authenticator: Optional[SiteAuthenticator] = None ) -> list[Resource]: image_urls = self._get_links(self.post.url) if not image_urls: raise SiteDownloaderError('No images found in Reddit gallery') return [Resource(self.post, url) for url in image_urls]
def test_format_full( format_string_directory: str, format_string_file: str, expected: str, reddit_submission: praw.models.Submission): test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None) test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO') result = test_formatter.format_path(test_resource, Path('test')) assert do_test_path_equality(result, expected)
def erome_download(url: str) -> Callable: download_parameters = { 'headers': { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)' ' Chrome/88.0.4324.104 Safari/537.36', 'Referer': 'https://www.erome.com/', }, } return lambda global_params: Resource.http_download(url, global_params | download_parameters)
def test_shorten_filename_real(submission: MagicMock, tmp_path: Path): submission.title = 'A' * 500 submission.author.name = 'test' submission.subreddit.display_name = 'test' submission.id = 'BBBBBB' test_resource = Resource(submission, 'www.example.com/empty', lambda: None, '.jpeg') test_formatter = FileNameFormatter('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}', 'ISO') result = test_formatter.format_path(test_resource, tmp_path) result.parent.mkdir(parents=True) result.touch()
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: ytdl_options = { 'format': 'best', 'playlistend': 1, 'nooverwrites': True, } download_function = self._download_video(ytdl_options) extension = self.get_video_attributes(self.post.url)['ext'] res = Resource(self.post, self.post.url, download_function, extension) return [res]
def _download_video(self, ytdl_options: dict) -> Resource: ytdl_options['quiet'] = True with tempfile.TemporaryDirectory() as temp_dir: download_path = Path(temp_dir).resolve() ytdl_options['outtmpl'] = str(download_path) + '/' + 'test.%(ext)s' try: with youtube_dl.YoutubeDL(ytdl_options) as ydl: ydl.download([self.post.url]) except youtube_dl.DownloadError as e: raise SiteDownloaderError(f'Youtube download failed: {e}') downloaded_file = list(download_path.iterdir())[0] extension = downloaded_file.suffix with open(downloaded_file, 'rb') as file: content = file.read() out = Resource(self.post, self.post.url, extension) out.content = content out.create_hash() return out
def find_resources( self, authenticator: Optional[SiteAuthenticator] = None ) -> list[Resource]: out = Resource( self.post, self.post.url, super()._download_video({}), super().get_video_attributes(self.post.url)['ext'], ) return [out]
def test_format_full_with_index_suffix( format_string_directory: str, format_string_file: str, index: Optional[int], expected: str, reddit_submission: praw.models.Submission, ): test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png') test_formatter = FileNameFormatter(format_string_file, format_string_directory) result = test_formatter.format_path(test_resource, Path('test'), index) assert str(result) == expected
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: links = self._get_links(self.post.url) if not links: raise SiteDownloaderError('Erome parser could not find any links') out = [] for link in links: if not re.match(r'https?://.*', link): link = 'https://' + link out.append(Resource(self.post, link, self.erome_download(link))) return out
def test_format_archive_entry_comment( test_file_scheme: str, test_folder_scheme: str, test_comment_id: str, expected_name: str, tmp_path: Path, reddit_instance: praw.Reddit, ): test_comment = reddit_instance.comment(id=test_comment_id) test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO') test_entry = Resource(test_comment, '', lambda: None, '.json') result = test_formatter.format_path(test_entry, tmp_path) assert do_test_string_equality(result, expected_name)
def test_format_archive_entry_comment( test_file_scheme: str, test_folder_scheme: str, test_comment_id: str, expected_name: str, tmp_path: Path, reddit_instance: praw.Reddit, ): test_comment = reddit_instance.comment(id=test_comment_id) test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme) test_entry = Resource(test_comment, '', '.json') result = test_formatter.format_path(test_entry, tmp_path) assert result.name == expected_name
def _compute_image_url(self, image: dict) -> Resource: image_url = 'https://i.imgur.com/' + image[ 'hash'] + self._validate_extension(image['ext']) return Resource(self.post, image_url)
def _write_entry_json(self, entry: BaseArchiveEntry): resource = Resource(entry.source, '', lambda: None, '.json') content = json.dumps(entry.compile()) self._write_content_to_disk(resource, content)
def _write_entry_yaml(self, entry: BaseArchiveEntry): resource = Resource(entry.source, '', lambda: None, '.yaml') content = yaml.dump(entry.compile()) self._write_content_to_disk(resource, content)
def _write_entry_xml(self, entry: BaseArchiveEntry): resource = Resource(entry.source, '', lambda: None, '.xml') content = dict2xml.dict2xml(entry.compile(), wrap='root') self._write_content_to_disk(resource, content)
def test_filter_all(test_url: str, expected: bool, download_filter: DownloadFilter): test_resource = Resource(MagicMock(), test_url) result = download_filter.check_resource(test_resource) assert result == expected
def test_filter_empty_filter(test_url: str): download_filter = DownloadFilter() test_resource = Resource(MagicMock(), test_url) result = download_filter.check_resource(test_resource) assert result is True
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: media_url = self._get_link(self.post.url) return [Resource(self.post, media_url, '.mp4')]
def test_resource_get_extension(test_url: str, expected: str): test_resource = Resource(MagicMock(), test_url, lambda: None) result = test_resource._determine_extension() assert result == expected
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: return [Resource(self.post, self.post.url, Resource.retry_download(self.post.url))]
def test_download_online_resource(test_url: str, expected_hash: str): test_resource = Resource(MagicMock(), test_url, Resource.retry_download(test_url)) test_resource.download() assert test_resource.hash.hexdigest() == expected_hash
from bdfr.exceptions import SiteDownloaderError from bdfr.resource import Resource from bdfr.site_authenticator import SiteAuthenticator from bdfr.site_downloaders.youtube import Youtube logger = logging.getLogger(__name__) class PornHub(Youtube): def __init__(self, post: Submission): super().__init__(post) def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: ytdl_options = { 'format': 'best', 'nooverwrites': True, } if video_attributes := super().get_video_attributes(self.post.url): extension = video_attributes['ext'] else: raise SiteDownloaderError() out = Resource( self.post, self.post.url, super()._download_video(ytdl_options), extension, ) return [out]