def _download_video(self, ytdl_options: dict) -> Resource:
        yt_logger = logging.getLogger('youtube-dl')
        yt_logger.setLevel(logging.CRITICAL)
        ytdl_options['quiet'] = True
        ytdl_options['logger'] = yt_logger
        with tempfile.TemporaryDirectory() as temp_dir:
            download_path = Path(temp_dir).resolve()
            ytdl_options['outtmpl'] = str(download_path) + '/' + 'test.%(ext)s'
            try:
                with youtube_dl.YoutubeDL(ytdl_options) as ydl:
                    ydl.download([self.post.url])
            except youtube_dl.DownloadError as e:
                raise SiteDownloaderError(f'Youtube download failed: {e}')

            downloaded_file = None
            downloaded_files = list(download_path.iterdir())
            if len(downloaded_files) > 0:
                downloaded_file = downloaded_files[0]
            else:
                raise NotADownloadableLinkError(
                    f"No media exists in the URL {self.post.url}")
            extension = downloaded_file.suffix
            with open(downloaded_file, 'rb') as file:
                content = file.read()
        out = Resource(self.post, self.post.url, extension)
        out.content = content
        out.create_hash()
        return out
 def find_resources(
         self,
         authenticator: Optional[SiteAuthenticator] = None
 ) -> list[Resource]:
     out = Resource(self.post, self.post.url, '.txt')
     out.content = self.export_to_string().encode('utf-8')
     out.create_hash()
     return [out]
    def _compute_image_url(self, image: dict) -> Resource:
        ext = self._validate_extension(image['ext'])
        if image.get('prefer_video', False):
            ext = '.mp4'

        image_url = 'https://i.imgur.com/' + image['hash'] + ext
        return Resource(self.post, image_url,
                        Resource.retry_download(image_url))
 def find_resources(
         self,
         authenticator: Optional[SiteAuthenticator] = None
 ) -> list[Resource]:
     try:
         res = self.get_links(self.post.url)
     except AttributeError:
         raise SiteDownloaderError(
             f'Could not read page at {self.post.url}')
     if not res:
         raise SiteDownloaderError(
             rf'No resources found at {self.post.url}')
     res = [Resource(self.post, r, Resource.retry_download(r)) for r in res]
     return res
Esempio n. 5
0
    def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
        try:
            image_urls = self._get_links(self.post.gallery_data['items'])
        except (AttributeError, TypeError):
            try:
                image_urls = self._get_links(self.post.crosspost_parent_list[0]['gallery_data']['items'])
            except (AttributeError, IndexError, TypeError):
                logger.error(f'Could not find gallery data in submission {self.post.id}')
                logger.exception('Gallery image find failure')
                raise SiteDownloaderError('No images found in Reddit gallery')

        if not image_urls:
            raise SiteDownloaderError('No images found in Reddit gallery')
        return [Resource(self.post, url, Resource.retry_download(url)) for url in image_urls]
def test_format_full(format_string_directory: str, format_string_file: str,
                     expected: str, reddit_submission: praw.models.Submission):
    test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png')
    test_formatter = FileNameFormatter(format_string_file,
                                       format_string_directory)
    result = test_formatter.format_path(test_resource, Path('test'))
    assert str(result) == expected
Esempio n. 7
0
def test_format_full_conform(
        format_string_directory: str,
        format_string_file: str,
        reddit_submission: praw.models.Submission):
    test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
    test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
    test_formatter.format_path(test_resource, Path('test'))
 def find_resources(
         self,
         authenticator: Optional[SiteAuthenticator] = None
 ) -> list[Resource]:
     image_urls = self._get_links(self.post.url)
     if not image_urls:
         raise SiteDownloaderError('No images found in Reddit gallery')
     return [Resource(self.post, url) for url in image_urls]
Esempio n. 9
0
def test_format_full(
        format_string_directory: str,
        format_string_file: str,
        expected: str,
        reddit_submission: praw.models.Submission):
    test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
    test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
    result = test_formatter.format_path(test_resource, Path('test'))
    assert do_test_path_equality(result, expected)
Esempio n. 10
0
 def erome_download(url: str) -> Callable:
     download_parameters = {
         'headers': {
             'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
                           ' Chrome/88.0.4324.104 Safari/537.36',
             'Referer': 'https://www.erome.com/',
         },
     }
     return lambda global_params: Resource.http_download(url, global_params | download_parameters)
Esempio n. 11
0
def test_shorten_filename_real(submission: MagicMock, tmp_path: Path):
    submission.title = 'A' * 500
    submission.author.name = 'test'
    submission.subreddit.display_name = 'test'
    submission.id = 'BBBBBB'
    test_resource = Resource(submission, 'www.example.com/empty', lambda: None, '.jpeg')
    test_formatter = FileNameFormatter('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}', 'ISO')
    result = test_formatter.format_path(test_resource, tmp_path)
    result.parent.mkdir(parents=True)
    result.touch()
Esempio n. 12
0
 def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
     ytdl_options = {
         'format': 'best',
         'playlistend': 1,
         'nooverwrites': True,
     }
     download_function = self._download_video(ytdl_options)
     extension = self.get_video_attributes(self.post.url)['ext']
     res = Resource(self.post, self.post.url, download_function, extension)
     return [res]
    def _download_video(self, ytdl_options: dict) -> Resource:
        ytdl_options['quiet'] = True
        with tempfile.TemporaryDirectory() as temp_dir:
            download_path = Path(temp_dir).resolve()
            ytdl_options['outtmpl'] = str(download_path) + '/' + 'test.%(ext)s'
            try:
                with youtube_dl.YoutubeDL(ytdl_options) as ydl:
                    ydl.download([self.post.url])
            except youtube_dl.DownloadError as e:
                raise SiteDownloaderError(f'Youtube download failed: {e}')

            downloaded_file = list(download_path.iterdir())[0]
            extension = downloaded_file.suffix
            with open(downloaded_file, 'rb') as file:
                content = file.read()
        out = Resource(self.post, self.post.url, extension)
        out.content = content
        out.create_hash()
        return out
Esempio n. 14
0
 def find_resources(
         self,
         authenticator: Optional[SiteAuthenticator] = None
 ) -> list[Resource]:
     out = Resource(
         self.post,
         self.post.url,
         super()._download_video({}),
         super().get_video_attributes(self.post.url)['ext'],
     )
     return [out]
def test_format_full_with_index_suffix(
    format_string_directory: str,
    format_string_file: str,
    index: Optional[int],
    expected: str,
    reddit_submission: praw.models.Submission,
):
    test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png')
    test_formatter = FileNameFormatter(format_string_file,
                                       format_string_directory)
    result = test_formatter.format_path(test_resource, Path('test'), index)
    assert str(result) == expected
Esempio n. 16
0
    def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
        links = self._get_links(self.post.url)

        if not links:
            raise SiteDownloaderError('Erome parser could not find any links')

        out = []
        for link in links:
            if not re.match(r'https?://.*', link):
                link = 'https://' + link
            out.append(Resource(self.post, link, self.erome_download(link)))
        return out
Esempio n. 17
0
def test_format_archive_entry_comment(
        test_file_scheme: str,
        test_folder_scheme: str,
        test_comment_id: str,
        expected_name: str,
        tmp_path: Path,
        reddit_instance: praw.Reddit,
):
    test_comment = reddit_instance.comment(id=test_comment_id)
    test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO')
    test_entry = Resource(test_comment, '', lambda: None, '.json')
    result = test_formatter.format_path(test_entry, tmp_path)
    assert do_test_string_equality(result, expected_name)
def test_format_archive_entry_comment(
    test_file_scheme: str,
    test_folder_scheme: str,
    test_comment_id: str,
    expected_name: str,
    tmp_path: Path,
    reddit_instance: praw.Reddit,
):
    test_comment = reddit_instance.comment(id=test_comment_id)
    test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme)
    test_entry = Resource(test_comment, '', '.json')
    result = test_formatter.format_path(test_entry, tmp_path)
    assert result.name == expected_name
Esempio n. 19
0
 def _compute_image_url(self, image: dict) -> Resource:
     image_url = 'https://i.imgur.com/' + image[
         'hash'] + self._validate_extension(image['ext'])
     return Resource(self.post, image_url)
 def _write_entry_json(self, entry: BaseArchiveEntry):
     resource = Resource(entry.source, '', lambda: None, '.json')
     content = json.dumps(entry.compile())
     self._write_content_to_disk(resource, content)
 def _write_entry_yaml(self, entry: BaseArchiveEntry):
     resource = Resource(entry.source, '', lambda: None, '.yaml')
     content = yaml.dump(entry.compile())
     self._write_content_to_disk(resource, content)
 def _write_entry_xml(self, entry: BaseArchiveEntry):
     resource = Resource(entry.source, '', lambda: None, '.xml')
     content = dict2xml.dict2xml(entry.compile(), wrap='root')
     self._write_content_to_disk(resource, content)
def test_filter_all(test_url: str, expected: bool,
                    download_filter: DownloadFilter):
    test_resource = Resource(MagicMock(), test_url)
    result = download_filter.check_resource(test_resource)
    assert result == expected
def test_filter_empty_filter(test_url: str):
    download_filter = DownloadFilter()
    test_resource = Resource(MagicMock(), test_url)
    result = download_filter.check_resource(test_resource)
    assert result is True
Esempio n. 25
0
 def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
     media_url = self._get_link(self.post.url)
     return [Resource(self.post, media_url, '.mp4')]
def test_resource_get_extension(test_url: str, expected: str):
    test_resource = Resource(MagicMock(), test_url, lambda: None)
    result = test_resource._determine_extension()
    assert result == expected
 def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
     return [Resource(self.post, self.post.url, Resource.retry_download(self.post.url))]
def test_download_online_resource(test_url: str, expected_hash: str):
    test_resource = Resource(MagicMock(), test_url,
                             Resource.retry_download(test_url))
    test_resource.download()
    assert test_resource.hash.hexdigest() == expected_hash
from bdfr.exceptions import SiteDownloaderError
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.youtube import Youtube

logger = logging.getLogger(__name__)


class PornHub(Youtube):
    def __init__(self, post: Submission):
        super().__init__(post)

    def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
        ytdl_options = {
            'format': 'best',
            'nooverwrites': True,
        }
        if video_attributes := super().get_video_attributes(self.post.url):
            extension = video_attributes['ext']
        else:
            raise SiteDownloaderError()

        out = Resource(
            self.post,
            self.post.url,
            super()._download_video(ytdl_options),
            extension,
        )
        return [out]