def get_files(url, session, multi_post=False, cw=None): print_ = get_print(cw) html = read_html(url, session=session) soup = Soup(html) h = soup.find('h1', class_='title') content = h.parent.parent.parent title = h.text.strip() youtube = content.find('div', class_='embedded-video') video = content.find('video') if youtube: type = 'youtube' elif video: type = 'video' else: type = 'image' print_(('type: {}').format(type)) files = [] if type == 'image': urls = set() for img in content.findAll('img'): img = urljoin(url, img.parent.attrs['href']) if '/files/' not in img: continue if img in urls: print('duplicate') continue urls.add(img) file = File(type, img, title, url, len(files), multi_post=multi_post) files.append(file) elif type == 'youtube': src = urljoin(url, youtube.find('iframe').attrs['src']) file = File(type, src, title, url) files.append(file) elif type == 'video': url_thumb = urljoin(url, video.attrs['poster']) print('url_thumb:', url_thumb) id = re.find(PATTERN_ID, url, err='no video id') url_data = urljoin(url, '/api/video/{}'.format(id)) s_json = read_html(url_data, url, session=session) data = json.loads(s_json) video = data[0] url_video = urljoin(url, video['uri']) if not downloader.ok_url(url_video, url): #4287 print_('invalid video') raise Exception('Invalid video') file = File(type, url_video, title, url) file.url_thumb = url_thumb file.thumb = BytesIO() downloader.download(url_thumb, buffer=file.thumb, referer=url) files.append(file) else: raise NotImplementedError(type) return files
def get(self, _=None): if self._url_cache: return self._url_cache print_ = get_print(self.cw) for try_ in range(self.try_n): try: d = ytdl.YoutubeDL(cw=self.cw) info = d.extract_info(self.referer) fs = info['formats'] for f in fs: print_('{} {} - {}'.format(f.get('height'), f['protocol'], f['url'])) def key(f): h = f.get('height', 0) if not f['protocol'].startswith('http'): h -= .1 return h for f in sorted(fs, key=key, reverse=True): if downloader.ok_url(f['url'], self.referer): #4185 break else: print_('invalid video: {}'.format(f['url'])) else: raise Exception('no valid videos') url = f['url'] ext = get_ext(url) self.ext = ext print_('get_video: {} {}'.format(url, ext)) if ext.lower() == '.m3u8': url = ffmpeg.Stream(url) url._live = False self._url_cache = url return url except Exception as e: e_ = e msg = print_error(e)[0] print_('\nTwitter video Error:\n{}'.format(msg)) if try_ < self.try_n - 1: sleep(10, self.cw) else: raise e_
def get(self, url, force=False): if self._url: return self._url type = self.type only_mp4 = self.only_mp4 audio_included = self.audio_included max_res = self.max_res max_abr = self.max_abr cw = self.cw print_ = get_print(cw) if force: max_abr = 0 print('max_res: {}'.format(max_res)) for try_ in range(8): try: yt = ytdl.YouTube(url) break except Exception as e: e_ = e s = print_error(e)[-1] print_('### youtube retry...\n{}'.format(s)) sleep(try_ / 2, cw) else: raise e_ streams = yt.streams.all() print_streams(streams, cw) #3528 time = datetime.strptime(yt.info['upload_date'], '%Y%m%d') self.utime = (time - datetime(1970, 1, 1)).total_seconds() print_('utime: {}'.format(self.utime)) if type == 'video': streams[:] = [ stream for stream in streams if stream.video_codec is not None ] # Only mp4 if only_mp4: streams_ = list(streams) streams[:] = [] for stream in streams_: if stream.subtype == 'mp4': streams.append(stream) # Audio included; Non-HD if audio_included: streams_ = list(streams) streams[:] = [] for stream in streams_: if stream.audio_codec is not None: streams.append(stream) # Maximum resolution streams_ = list(streams) streams[:] = [] for stream in streams_: if stream.resolution is None: continue res = int(stream.resolution.replace('p', '')) if max_res is None or res <= max_res: streams.append(stream) print_('') elif type == 'audio': streams[:] = [stream for stream in streams if stream.abr] # Maximum abr abrs = [stream.abr for stream in streams] max_abr = min(max(abrs), max_abr) streams_ = list(streams) streams[:] = [] for stream in streams_: if stream.abr is None: continue abr = stream.abr if max_abr is None or abr >= max_abr: streams.append(stream) #''' else: raise Exception(u'type "{}" is not supported'.format(type)) # Pick the best while streams: if type == 'video': ress = [ int_(stream.resolution.replace('p', '')) for stream in streams ] m = max(ress) prefer_format = 'mp4' elif type == 'audio': ress = [stream.abr for stream in streams] m = min(ress) prefer_format = 'webm' print('Resolutions:', ress) stream_final = None for stream, res in zip(streams, ress): if res == m: if type == 'video': foo = (stream_final is not None) and ( stream_final.audio_codec is None) and bool( stream.audio_codec) elif type == 'audio': foo = False if stream_final is None or ( stream_final.fps <= stream.fps and (foo or (stream_final.subtype.lower() != prefer_format and stream.subtype.lower() == prefer_format) or stream_final.fps < stream.fps)): #print(foo) print_(u'# stream_final {} {} {} {} {} {}fps'.format( stream, stream.format, stream.resolution, stream.subtype, stream.audio_codec, stream.fps)) stream_final = stream ok = downloader.ok_url(stream_final.url, referer=url) if isinstance( stream_final.url, str) else True if ok: break else: print_('stream is not valid') streams.remove(stream_final) else: if type == 'audio' and not force: return self.get(url, force=True) # 1776 raise Exception('No videos') stream = stream_final ## if stream.video_codec and stream_final.video_codec.lower().startswith('av'): ## self.vcodec = 'h264' self.yt = yt self.id = yt.video_id self.stream = stream self.username = yt.info['uploader'] self.stream_audio = None self.audio = None self.thumb = None self.thumb_url = None self.subtitles = yt.subtitles if type == 'audio' and 'DASH' in self.stream.format: self.stream.setDashType('audio') # Audio if type == 'video' and stream.audio_codec is None: print('audio required') streams = [stream for stream in yt.streams.all() if stream.abr] print_streams(streams, cw) # only mp4; https://github.com/KurtBestor/Hitomi-Downloader-issues/issues/480 def isGood(stream): return stream.audio_codec.lower().startswith('mp4') streams_good = [stream for stream in streams if isGood(stream)] if streams_good: streams = streams_good print_streams(streams, cw) # only audio? if any(stream.resolution is None for stream in streams): streams = [ stream for stream in streams if stream.resolution is None ] print_streams(streams, cw) best_audio = None best_abr = 0 for stream in streams: abr = stream.abr if abr > best_abr: best_abr = abr best_audio = stream if best_audio is None: raise Exception('No audio') print(best_audio) self.stream_audio = best_audio if 'DASH' in self.stream_audio.format: self.stream_audio.setDashType('audio') self.audio = best_audio.url if callable(self.audio): self.audio = self.audio() # Thumbnail for quality in ['sddefault', 'hqdefault', 'mqdefault', 'default']: print('####', yt.thumbnail_url) self.thumb_url = yt.thumbnail_url.replace('default', quality) f = BytesIO() try: downloader.download(self.thumb_url, buffer=f) data = f.read() if len(data) == 0: raise AssertionError('Zero thumbnail') if data == empty_thumbnail: raise AssertionError('Empty thumbnail') f.seek(0) break except Exception as e: print(print_error(e)[-1]) self.thumb = f # _url = self.stream.url if callable(_url): _url = _url() self._url = _url title = yt.title #soup = Soup(yt.watch_html) #title = soup.title.text.replace('- YouTube', '').strip() self.title = title ext = u'.' + self.stream.subtype self.filename = format_filename(title, self.id, ext) print_(u'Resolution: {}'.format(stream.resolution)) print_(u'Codec: {} / {}'.format(stream.video_codec, stream.audio_codec)) print_(u'Abr: {}'.format(stream.abr)) print_(u'Subtype: {}'.format(stream.subtype)) print_(u'FPS: {}\n'.format(stream.fps)) return self._url