Esempi in Python per download_binary, esempi in Python per processing.wrappers.http_downloader.download_binary

Esempio n. 1

0

Mostra file

File: generic_newspaper.py Progetto: vyterran/RedditDownloader

def handle(task, progress):
	url = task.url
	progress.set_status("Requesting page...")
	resp = http_downloader.page_text(url, json=False)
	if not resp:
		return False

	config = Config()
	config.memoize_articles = False
	config.verbose = False
	article = Article(url='', config=config)

	article.download()
	article.set_html(resp)
	article.parse()
	if not article.top_image:
		return None

	src = article.top_image
	if 'http' not in src:
		if 'https' in url:
			src = 'https://' + src.lstrip('/ ').strip()
		else:
			src = 'http://' + src.lstrip('/ ').strip()

	progress.set_status("Downloading image...")

	return http_downloader.download_binary(src, task.file, prog=progress, handler_id=tag)

Esempio n. 2

0

Mostra file

File: imgur.py Progetto: DrMaxis/RedditDownloader

def handle(task, progress):
    url = clean_imgur_url(task.url)
    if not url:
        return False
    direct_url = url.replace('/gallery/', '/').replace('/a/', '/')

    progress.set_status("Parsing url & verifying format...")

    album_exception = None

    # Check for an album/gallery.
    if any(x in url for x in ['gallery', '/a/']):
        if 'i.' in url:
            # Imgur redirects this, but we correct for posterity.
            url = url.replace('i.', '')
        try:
            album = ImgurAlbumDownloader(url)
            return HandlerResponse(success=True,
                                   handler=tag,
                                   album_urls=album.get_urls())
        except ImgurAlbumException as ex:
            album_exception = ex
            pass  # It's possible an image incorrectly has a Gallery location, which Imgur can resolve. Try direct dl:

    url = get_direct_link(direct_url)

    if not url:
        if album_exception:
            print("ImgurAlbumException:", album_exception)
        return False  # Unable to parse proper URL.
    return http_downloader.download_binary(url,
                                           task.file,
                                           prog=progress,
                                           handler_id=tag)

Esempio n. 3

0

Mostra file

def handle(task, progress):
    url = task.url
    if 'gfycat.com/' not in url:
        return False
    progress.set_status("Checking for direct gfycat url...")
    uid = re.findall(r"com/([a-zA-Z]+)", url)
    if not uid:
        return False
    uid = uid[0]

    files = http_downloader.page_text(
        'https://api.gfycat.com/v1/gfycats/%s' % uid, True)
    if not files:
        return False
    files = files["gfyItem"]

    opt = None
    for fm in format_opts:
        if fm in files and files[fm]:
            opt = fm
            break

    if not opt:
        return False

    progress.set_status("Downloading gfycat %s..." % opt)
    return http_downloader.download_binary(files[opt],
                                           task.file,
                                           prog=progress,
                                           handler_id=tag)

Esempio n. 4

0

Mostra file

File: direct_link.py Progetto: vyterran/RedditDownloader

def handle(task, progress):
	url = task.url
	progress.set_status("Checking for direct url...")
	ext, stat = http_downloader.is_media_url(url, return_status=True)

	if stat != 200:
		return HandlerResponse(success=False, handler=tag, failure_reason="URL Responded: %s" % stat)
	if not ext:
		return False

	progress.set_status("Downloading direct media...")
	return http_downloader.download_binary(url, task.file, prog=progress, handler_id=tag)

Esempio n. 5

0

Mostra file

File: test_http_downloader.py Progetto: vyterran/RedditDownloader

 def test_binary(self):
     """ Download a binary file """
     file = rel.SanitizedRelFile(self.dir, 'test_file')
     prog = DownloaderProgress()
     res = http.download_binary(url='https://i.imgur.com/8770jp0.png',
                                rel_file=file,
                                prog=prog,
                                handler_id='test-run')
     self.assertTrue(res.success, "The test file failed to download!")
     self.assertTrue(file.exists(), "Failed to download the test binary!")
     self.assertIn('.png', file.absolute(),
                   "Downloaded invalid filetype!")  # Downloaded a PNG.
     self.assertEqual('100', prog.get_percent(),
                      'Download did not reach 100%!')

Esempio n. 6

0

Mostra file

File: imgur.py Progetto: vyterran/RedditDownloader

def handle(task, progress):
	url = task.url
	if not is_imgur(url):
		return False

	# Check for an album/gallery.
	if is_gallery(url):
		if 'i.' in url:
			# Imgur redirects this, but we correct for posterity.
			url = url.replace('i.', '')
		urls = []
		try:
			album = ImgurAlbumDownloader(url)
			urls = album.get_urls()
		except ImgurAlbumException:
			pass  # It's possible an image incorrectly has a Gallery location prepended. Ignore error.

		if not len(urls):  # Try using the imgur API to locate this album.
			try:
				# fallback to imgur API client, if enabled, for hidden albums.
				client = make_api_client()
				if not client:
					return HandlerResponse(success=False,
										   handler=tag,
										   failure_reason="Could not locate hidden album, and API client is disabled.")
				items = client.get_album_images(extract_id(url))

				def best(o):  # Find the best-quality link available within the given Imgur object.
					for b in ['mp4', 'gifv', 'gif', 'link']:
						if hasattr(o, b):
							return getattr(o, b)
				urls = [best(i) for i in items if not getattr(i, 'is_ad', False)]
			except Exception as e:
				print('Imgur API:', e)
				pass  # It's possible an image incorrectly has a Gallery location prepended. Ignore error.
		if len(urls) == 1:
			url = urls[0]  # For single-image albums, set up to directly download the image.
		elif len(urls):
			return HandlerResponse(success=True, handler=tag, album_urls=urls)

	url = build_direct_link(url)
	ext, stat = http_downloader.is_media_url(url, return_status=True)  # Do some pre-processing, mostly to screen filetypes.
	if not ext or stat != 200:
		return HandlerResponse(success=False,
							   handler=tag,
							   failure_reason="Unable to determine imgur filetype: HTTP %s: %s" % (stat, url))
	if ext in imgur_animation_exts:
		url = '.'.join(url.split('.')[:-1]) + '.mp4'
	return http_downloader.download_binary(url, task.file, prog=progress, handler_id=tag)

Esempio n. 7

0

Mostra file

File: tumblr.py Progetto: github-userx/RedditDownloader

def handle(task, progress):
	m = re.match(regex, task.url)
	if m is None or '.media.tumblr' in task.url:
		return False
	gr = m.groups()
	progress.set_status("Parsing Tumblr page...")
	try:
		urls = get_media_urls(gr[0], gr[1])
		if not urls:
			return None
		if len(urls) > 1:
			return HandlerResponse(success=True, handler=tag, album_urls=urls)
		return http_downloader.download_binary(urls[0], task.file, progress, tag)
	except Exception as ex:
		print('Tumblr: ERROR:', ex, task.url, file=sys.stderr, flush=True)
		return False