def handle(task, progress): url = task.url progress.set_status("Requesting page...") resp = http_downloader.page_text(url, json=False) if not resp: return False config = Config() config.memoize_articles = False config.verbose = False article = Article(url='', config=config) article.download() article.set_html(resp) article.parse() if not article.top_image: return None src = article.top_image if 'http' not in src: if 'https' in url: src = 'https://' + src.lstrip('/ ').strip() else: src = 'http://' + src.lstrip('/ ').strip() progress.set_status("Downloading image...") return http_downloader.download_binary(src, task.file, prog=progress, handler_id=tag)
def handle(task, progress): url = clean_imgur_url(task.url) if not url: return False direct_url = url.replace('/gallery/', '/').replace('/a/', '/') progress.set_status("Parsing url & verifying format...") album_exception = None # Check for an album/gallery. if any(x in url for x in ['gallery', '/a/']): if 'i.' in url: # Imgur redirects this, but we correct for posterity. url = url.replace('i.', '') try: album = ImgurAlbumDownloader(url) return HandlerResponse(success=True, handler=tag, album_urls=album.get_urls()) except ImgurAlbumException as ex: album_exception = ex pass # It's possible an image incorrectly has a Gallery location, which Imgur can resolve. Try direct dl: url = get_direct_link(direct_url) if not url: if album_exception: print("ImgurAlbumException:", album_exception) return False # Unable to parse proper URL. return http_downloader.download_binary(url, task.file, prog=progress, handler_id=tag)
def handle(task, progress): url = task.url if 'gfycat.com/' not in url: return False progress.set_status("Checking for direct gfycat url...") uid = re.findall(r"com/([a-zA-Z]+)", url) if not uid: return False uid = uid[0] files = http_downloader.page_text( 'https://api.gfycat.com/v1/gfycats/%s' % uid, True) if not files: return False files = files["gfyItem"] opt = None for fm in format_opts: if fm in files and files[fm]: opt = fm break if not opt: return False progress.set_status("Downloading gfycat %s..." % opt) return http_downloader.download_binary(files[opt], task.file, prog=progress, handler_id=tag)
def handle(task, progress): url = task.url progress.set_status("Checking for direct url...") ext, stat = http_downloader.is_media_url(url, return_status=True) if stat != 200: return HandlerResponse(success=False, handler=tag, failure_reason="URL Responded: %s" % stat) if not ext: return False progress.set_status("Downloading direct media...") return http_downloader.download_binary(url, task.file, prog=progress, handler_id=tag)
def test_binary(self): """ Download a binary file """ file = rel.SanitizedRelFile(self.dir, 'test_file') prog = DownloaderProgress() res = http.download_binary(url='https://i.imgur.com/8770jp0.png', rel_file=file, prog=prog, handler_id='test-run') self.assertTrue(res.success, "The test file failed to download!") self.assertTrue(file.exists(), "Failed to download the test binary!") self.assertIn('.png', file.absolute(), "Downloaded invalid filetype!") # Downloaded a PNG. self.assertEqual('100', prog.get_percent(), 'Download did not reach 100%!')
def handle(task, progress): url = task.url if not is_imgur(url): return False # Check for an album/gallery. if is_gallery(url): if 'i.' in url: # Imgur redirects this, but we correct for posterity. url = url.replace('i.', '') urls = [] try: album = ImgurAlbumDownloader(url) urls = album.get_urls() except ImgurAlbumException: pass # It's possible an image incorrectly has a Gallery location prepended. Ignore error. if not len(urls): # Try using the imgur API to locate this album. try: # fallback to imgur API client, if enabled, for hidden albums. client = make_api_client() if not client: return HandlerResponse(success=False, handler=tag, failure_reason="Could not locate hidden album, and API client is disabled.") items = client.get_album_images(extract_id(url)) def best(o): # Find the best-quality link available within the given Imgur object. for b in ['mp4', 'gifv', 'gif', 'link']: if hasattr(o, b): return getattr(o, b) urls = [best(i) for i in items if not getattr(i, 'is_ad', False)] except Exception as e: print('Imgur API:', e) pass # It's possible an image incorrectly has a Gallery location prepended. Ignore error. if len(urls) == 1: url = urls[0] # For single-image albums, set up to directly download the image. elif len(urls): return HandlerResponse(success=True, handler=tag, album_urls=urls) url = build_direct_link(url) ext, stat = http_downloader.is_media_url(url, return_status=True) # Do some pre-processing, mostly to screen filetypes. if not ext or stat != 200: return HandlerResponse(success=False, handler=tag, failure_reason="Unable to determine imgur filetype: HTTP %s: %s" % (stat, url)) if ext in imgur_animation_exts: url = '.'.join(url.split('.')[:-1]) + '.mp4' return http_downloader.download_binary(url, task.file, prog=progress, handler_id=tag)
def handle(task, progress): m = re.match(regex, task.url) if m is None or '.media.tumblr' in task.url: return False gr = m.groups() progress.set_status("Parsing Tumblr page...") try: urls = get_media_urls(gr[0], gr[1]) if not urls: return None if len(urls) > 1: return HandlerResponse(success=True, handler=tag, album_urls=urls) return http_downloader.download_binary(urls[0], task.file, progress, tag) except Exception as ex: print('Tumblr: ERROR:', ex, task.url, file=sys.stderr, flush=True) return False