def test_there_should_not_be_absolute_uris_with_loads(): with open(playlists.RELATIVE_PLAYLIST_FILENAME) as f: content = f.read() obj = m3u8.loads(content) with pytest.raises(ValueError) as e: obj.key.absolute_uri assert str(e.value) == 'There can not be `absolute_uri` with no `base_uri` set'
def run(self): command = self.ts_to_mp4_convertor + ' ' token = self.tokenize_url(self.token_url) self.url_base_entry = self.nginx_url.format(token) self.url_master = os.path.join(self.url_base_entry, 'master.m3u8') flavors_list = self.extract_flavor_dict() single_flavor = len(flavors_list) == 1 for obj in flavors_list: url_postfix = obj.url.rsplit('/', 1)[1] flavor_id = self.get_flavor_id(url_postfix, single_flavor) ts_output_filename = self.get_output_filename(flavor_id) output_full_path = os.path.join(self.recording_path, ts_output_filename) mp4_full_path = output_full_path.replace('.ts', '.mp4') command = command + ' ' + output_full_path + ' ' + mp4_full_path + ' ' + obj.language if os.path.isfile(output_full_path): self.logger.warn("file [%s] already exist", output_full_path) continue playlist = self.download_file(obj.url) self.logger.debug("load recording manifest : \n %s ", playlist) chunks = m3u8.loads(playlist).files self.download_chunks_and_concat(chunks, output_full_path) self.logger.info("Successfully concat %d files into %s", len(chunks), output_full_path) self.convert_ts_to_mp4(command)
def downloadM3u8(url, filmId) : # url = 'http://s3.amazonaws.com/_bc_dml/example-content/tears-of-steel/playlist.m3u8' # url = 'http://plist.vn-hd.com/mp4v3/4055ec62f2d3860ca57a174c121fee03/8502305be2574cf3bbd82fb3f122334f/035c135af6eb46338a22f9c4642d9e6a/10115_e1_320_1280_ivdc.smil/playlist.m3u8' fileList = [] print url filePath = '/Users/giaule/Crawler/'; filePathFilm = filePath + 'films/'; lines = url.split("/") lastLine = lines[-1] urlPath = url.replace(lastLine, '') print urlPath data = urlopen(url).read() if data.find('m3u8') <> -1 : line = data.split("\n") if line[-1] == '': m3u8Link = line[len(line) - 2] else: m3u8Link = line[-1] if m3u8Link.find('http://') <> -1: url = m3u8Link else: url = urlPath + m3u8Link print url data = urlopen(url).read() lines = url.split("/") lastLine = lines[-1] urlPath = url.replace(lastLine, '') print data obj = m3u8.loads(data) now = (date.today() - timedelta(1)).strftime('%Y%m%d') for val in obj.segments: uri = val.uri urlCrawler = urlPath + val.uri fileName = filePath + val.uri if uri.find('http://') <> -1: urlCrawler = uri uris = uri.split("/") fileName = filePath + uris[-1] else: urlCrawler = urlPath + uri fileName = filePath + uri fileList.append(fileName) with open(fileName, 'w') as f: url = urlPath + val.uri print url f.write(urlopen(urlCrawler).read()) print val.uri fileName = filePathFilm + '%s.mp4' % filmId cmd = '/Users/giaule/Softs/ffmpeg -i "concat:%s" -y -c copy -bsf:a aac_adtstoasc %s' % ('|'.join(fileList), fileName) print cmd cmd = shlex.split(cmd) result = Popen(cmd, stdout=PIPE)
def get_authorized_stream_url(self, game, m3u8_url, from_start=False): fn_name = 'get_authorized_stream_url' try: r = requests.get(m3u8_url) if r.status_code != 200: raise self.NetworkError(fn_name, self.NETWORK_ERR_NON_200, r.status_code) m3u8_obj = m3u8.loads(r.text) protocol_headers = {} if m3u8_obj.key is not None: r = requests.get(m3u8_obj.key.uri, cookies=r.cookies) if r.status_code != 200: raise self.NetworkError(fn_name, self.NETWORK_ERR_NON_200, r.status_code) protocol_headers = { 'Cookie': '', 'User-Agent': self.DEFAULT_USER_AGENT, } for cookie in r.cookies: protocol_headers['Cookie'] += '%s=%s; ' % (cookie.name, cookie.value) protocol_headers['Cookie'] += 'nlqptid=' + m3u8_url.split('?', 1)[1] if from_start and game['start_time'] is not None and self.hls_server is not None: m3u8_url = self.hls_server + \ '/playlist?url=' + urllib.quote_plus(m3u8_url) + \ '&start_at=' + game['start_time'].strftime('%Y%m%d%H%M%S') if len(protocol_headers) > 0: m3u8_url += '&headers=' + urllib.quote(urllib.urlencode(protocol_headers)) elif len(protocol_headers) > 0: m3u8_url += '|' + urllib.urlencode(protocol_headers) except requests.exceptions.ConnectionError as error: raise self.NetworkError(fn_name, error) return m3u8_url
def get_authorized_stream_url(self, game, m3u8_url, from_start=False): fn_name = "get_authorized_stream_url" try: r = requests.get(m3u8_url) if r.status_code != 200: raise self.NetworkError(fn_name, self.NETWORK_ERR_NON_200, r.status_code) m3u8_obj = m3u8.loads(r.text) protocol_headers = {} if m3u8_obj.key is not None: r = requests.get(m3u8_obj.key.uri, cookies=r.cookies) if r.status_code != 200: raise self.NetworkError(fn_name, self.NETWORK_ERR_NON_200, r.status_code) protocol_headers = {"Cookie": "", "User-Agent": self.DEFAULT_USER_AGENT} for cookie in r.cookies: protocol_headers["Cookie"] += "%s=%s; " % (cookie.name, cookie.value) protocol_headers["Cookie"] += "nlqptid=" + m3u8_url.split("?", 1)[1] if from_start and game["start_time"] is not None and self.__hls_server is not None: m3u8_url = ( self.__hls_server + "/playlist?url=" + urllib.quote_plus(m3u8_url) + "&start_at=" + game["start_time"].strftime("%Y%m%d%H%M%S") ) if len(protocol_headers) > 0: m3u8_url += "&headers=" + urllib.quote(urllib.urlencode(protocol_headers)) elif len(protocol_headers) > 0: m3u8_url += "|" + urllib.urlencode(protocol_headers) except requests.exceptions.ConnectionError as error: raise self.NetworkError(fn_name, error) return m3u8_url
def get_live_stream(channel): token, sig = get_token_and_signature(channel) r = random.randint(0, 1E7) url = USHER_API.format(channel=channel, sig=sig, token=token, random=r) r = requests.get(url) m3u8_obj = m3u8.loads(r.text) return m3u8_obj
def getSegmentedPlaylist(self): if not self._playlist: self._playlist = requests.get(self.url).text m = m3u8.loads(self._playlist) m.base_path = self.base return m
def get_live_stream(self, channel): token, sig = self.get_token_and_signature(channel) r = random.randint(0,1E7) data = json.loads(token) url = USHER_API_LIVE.format(channel=data['channel'], sig=sig, token=token, random=r) r = requests.get(url) m3u8_obj = m3u8.loads(r.text) return m3u8_obj
def get_live_stream(self): import m3u8 token, sig = self.get_token_and_signature() r = random.randint(0, 1E7) url = USHER_API.format(channel=self.streamer, sig=sig, token=token, random=r) r = requests.get(url) m3u8_obj = m3u8.loads(r.text) return m3u8_obj, url
def test_there_should_not_be_absolute_uris_with_loads(): with open(playlists.RELATIVE_PLAYLIST_FILENAME) as f: content = f.read() obj = m3u8.loads(content) obj.key.get_absolute_uri.when.called.should.have.raised( ValueError, 'There can not be `absolute_uri` with no `base_uri` set' )
def getPlaylistURL(self, url): self.originalPlaylistUrl = url p = urlparse.urlparse(url) self.base = '{0}://{1}{2}'.format(p.scheme, p.netloc, p.path.rsplit('/', 1)[0]) text = requests.get(url).text m = m3u8.loads(text) self.url = '{0}://{1}{2}'.format(p.scheme, p.netloc, m.playlists[0].uri)
def _load_playlist_m3u8(playlist_url): # We do not use m3u8.load() as it makes two # subsequent requests loading the playlists m3u8_response = urlfetch.fetch(playlist_url) playlist = m3u8.loads(m3u8_response.content) playlist.base_uri = _base_uri(playlist_url) return playlist
def parse_m3u8_novar(url, string=None): """Parse m3u8 without variable bitrates and return durations and URLs. The return value is a (float, str) tuple of duration and URL. """ if not string: m3u8_obj = m3u8.load(url) else: m3u8_obj = m3u8.loads(string) m3u8_obj.base_uri = url.rsplit('/', maxsplit=1)[0] + '/' return [(seg.duration, m3u8_obj.base_uri + seg.uri) for seg in m3u8_obj.segments]
def get_video_url(self, content_id, bit_rate): if not self.logged_in: self.login() query_values = { 'contentId': content_id, 'fingerprint': unquote(self.cookies['fprt']), 'identityPointId': self.cookies['ipid'], 'playbackScenario': 'HTTP_CLOUD_WIRED', 'platform': 'WEB_MEDIAPLAYER_5', } with requests.Session() as s: s.cookies = self.cookies response = s.get(VIDEO_URL, params=query_values).content parsed_response = parseString(response) status_code = parsed_response.getElementsByTagName('status-code')[0].childNodes[0].data if status_code != "1": raise ValueError(SOAPCODES[status_code]) media_auth_v2 = (parsed_response.getElementsByTagName("session-info")[0] .childNodes[0] .childNodes[0] .attributes.get("value").nodeValue) cookies_string = ';'.join([x + '=' + y for x, y in self.cookies.items()]) cookies_string += ';mediaAuth_v2=' + media_auth_v2 # TODO: Where does this come from... and is it important? Currently # it is just copy and pasted from what I see in my local cookie! cookies_string += ';actionxCookie=%7B%22fired%22%3Atrue%7D' m3u8_url = parsed_response.getElementsByTagName('url')[0].childNodes[0].data m3u8_object = m3u8.loads(s.get(m3u8_url).content) bandwidth_and_uri = [] for playlist in m3u8_object.playlists: bandwidth_and_uri.append((playlist.stream_info.bandwidth / 1000, playlist.uri)) bandwidth_and_uri.sort(cmp=lambda a, b: b[0] - a[0]) uri = bandwidth_and_uri[0][1] bit_rate_int = int(bit_rate.replace("K", "")) for bandwidth, potential_uri in bandwidth_and_uri: if bandwidth > bit_rate_int: uri = potential_uri stream_url = (m3u8_url[:m3u8_url.rfind('/') + 1] + uri + '|' + 'Cookie:' + cookies_string) return stream_url
def parse_m3u8_manifest(self, manifest_url): """Return the manifest URL along with its bitrate.""" streams = {} m3u8_header = { 'Connection': 'keep-alive', 'User-Agent': self.user_agent } streams['manifest_url'] = manifest_url + '|' + urllib.urlencode(m3u8_header) streams['bitrates'] = {} m3u8_manifest = self.make_request(manifest_url, 'get') m3u8_obj = m3u8.loads(m3u8_manifest) for playlist in m3u8_obj.playlists: bitrate = int(playlist.stream_info.bandwidth) / 1000 streams['bitrates'][bitrate] = manifest_url[:manifest_url.rfind('/manifest') + 1] + playlist.uri + '?' + manifest_url.split('?')[1] + '|' + urllib.urlencode(m3u8_header) return streams
def m3u8_to_dict(self, manifest_url): """Return a dict of available bitrates and their respective stream. This is especially useful if you need to pass a URL to a player that doesn't support adaptive streaming.""" streams = {} m3u8_header = { 'Connection': 'keep-alive', 'User-Agent': self.user_agent } m3u8_manifest = self.make_request(manifest_url, 'get') m3u8_obj = m3u8.loads(m3u8_manifest) for playlist in m3u8_obj.playlists: bitrate = int(playlist.stream_info.bandwidth) / 1000 streams[bitrate] = manifest_url[:manifest_url.rfind('/manifest') + 1] + playlist.uri + '?' + manifest_url.split('?')[1] + '|' + urllib.urlencode(m3u8_header) return streams
def resolve_video_url(self, video_id): # get video info data = self._get_video_info(video_id) self.plugin.log.debug('resolving video: %s' % video_id) # this method assumes there's no children if 'children' in data: raise Exception('Invalid video id: %s' % video_id) # find playlist in resources list for res in data['resources']: if '.m3u8' in res['url']: break # get hashes hashes, data_hashes = self._get_hashes(video_id, [res['_id']], 'html5') signed_hashes = util.get_signed_hashes(hashes) # resolve query string template query_string = re.sub(r'{{(\w*)}}', r'%(\1)s', res['query_string_template']) try: query_string = query_string % { 'hash': signed_hashes[0], 'key': 'html5' } except KeyError: # live videos query_string = query_string % { 'hash': signed_hashes[0], 'key': 'html5', 'openClosed': 'F', 'user': data_hashes['user'] } # build resolved url url = '?'.join([res['url'], query_string]) self.plugin.log.debug('video playlist url: %s' % url) session = requests.Session() req = session.get(url) m3u8_header = { 'Cookie': '; '.join(['%s=%s' % (key, value) for (key, value) in req.cookies.items()]) } m3u8_obj = m3u8.loads(req.text.strip()) streams = {} if m3u8_obj.is_variant: # if this m3u8 contains links to other m3u8s for playlist in m3u8_obj.playlists: bitrate = str(int(playlist.stream_info.bandwidth[:playlist.stream_info.bandwidth.find(' ')])/100) streams[bitrate] = url[:url.rfind('/') + 1] + playlist.uri + '?' + url.split('?')[1] + '|' + urllib.urlencode(m3u8_header) else: return url return util.getBestBitrateUrl(self.plugin, streams)
def parse_m3u8_manifest(self, manifest_url): """Return the stream URL along with its bitrate.""" streams = {} m3u8_manifest = self.make_request(manifest_url, 'get') m3u8_header = { 'Authorization': self.get_credentials()['auth_header'], 'User-Agent': 'FOX Sports GO/2836 CFNetwork/711.1.16 Darwin/14.0.0' } m3u8_obj = m3u8.loads(m3u8_manifest) for playlist in m3u8_obj.playlists: bitrate = int(playlist.stream_info.bandwidth) / 1000 if playlist.uri.startswith('http'): stream_url = playlist.uri else: stream_url = manifest_url[:manifest_url.rfind('/') + 1] + playlist.uri streams[str(bitrate)] = stream_url + '|' + urlencode(m3u8_header) return streams
def get_variant_playlist(video_id, headers=dict()): token, sig = get_session(video_id, headers) params = { "player": "twitchweb", "p": int(random() * 999999), "type": "any", "allow_source": "true", "allow_audio_only": "true", "nauth": token, "nauthsig": sig } r = requests.get(twitch_usher_url + "/vod/{}".format(video_id), params=params) r.encoding = 'utf-8' logging.debug('get_variant_playlist for video_id {} got data {}'.format(video_id, r.content)) #Some playlists have bandwidth set to none which is not valid m3u8 and will crash the parser #we dont care about the value so fix it by setting it something text = r.text.replace(',BANDWIDTH=None,', ',BANDWIDTH=1,') return m3u8.loads(text)
def get_publishpoint_streams(self, video_id, stream_type=None, game_type=None): """Return the URL for a stream.""" streams = {} self.get_current_season_and_week() # set cookies url = self.servlets_url + '/publishpoint' if video_id == 'nfl_network': post_data = {'id': '1', 'type': 'channel', 'nt': '1'} elif video_id == 'redzone': post_data = {'id': '2', 'type': 'channel', 'nt': '1'} elif stream_type == 'game': post_data = {'id': video_id, 'type': stream_type, 'nt': '1', 'gt': game_type} else: post_data = {'id': video_id, 'type': stream_type, 'nt': '1'} headers = {'User-Agent': 'iPad'} m3u8_data = self.make_request(url=url, method='post', payload=post_data, headers=headers) m3u8_dict = xmltodict.parse(m3u8_data)['result'] self.log('NFL Dict %s' % m3u8_dict) m3u8_url = m3u8_dict['path'].replace('_ipad', '') m3u8_param = m3u8_url.split('?', 1)[-1] # I /hate/ lying with User-Agent. # Huge points for making this work without lying. m3u8_header = {'Cookie': 'nlqptid=' + m3u8_param, 'User-Agent': 'Safari/537.36 Mozilla/5.0 AppleWebKit/537.36 Chrome/31.0.1650.57', 'Accept-encoding': 'identity, gzip, deflate', 'Connection': 'keep-alive'} try: m3u8_manifest = self.make_request(url=m3u8_url, method='get') except: m3u8_manifest = False if m3u8_manifest: m3u8_obj = m3u8.loads(m3u8_manifest) if m3u8_obj.is_variant: # if this m3u8 contains links to other m3u8s for playlist in m3u8_obj.playlists: bitrate = int(playlist.stream_info.bandwidth) / 1000 streams[str(bitrate)] = m3u8_url[:m3u8_url.rfind('/') + 1] + playlist.uri + '?' + m3u8_url.split('?')[1] + '|' + urllib.urlencode(m3u8_header) else: streams['sole available'] = m3u8_url return streams
def parse_m3u8_manifest(self, manifest_url, auth_cookie=None): """Return the stream URL along with its bitrate.""" streams = {} req = requests.get(manifest_url) m3u8_manifest = req.content self.log('HLS manifest: \n %s' % m3u8_manifest) m3u8_header = {'Cookie': auth_cookie} m3u8_obj = m3u8.loads(m3u8_manifest) for playlist in m3u8_obj.playlists: bitrate = int(playlist.stream_info.bandwidth) / 1000 if playlist.uri.startswith('http'): stream_url = playlist.uri else: stream_url = manifest_url[:manifest_url.rfind('/') + 1] + playlist.uri streams[str(bitrate)] = stream_url + '|' + urlencode(m3u8_header) return streams
def get_and_parse_m3u8(url): """Getting and parsing m3u8 playlist. In case of root playlist returns URL of playlist with choosen bandwidth. I case of playlist returns dict of URLs of *.ts and playlist itself""" #m3u8_obj = m3u8.load(url) base_url = extract_url_base(url) playlist = get_m3u8(url) m3u8_obj = m3u8.loads(playlist) m3u8_obj.base_url = base_url if m3u8_obj.is_variant: for playlist in m3u8_obj.playlists: if playlist.stream_info.bandwidth > BANDWIDTH-100 and playlist.stream_info.bandwidth < BANDWIDTH+100: return m3u8_obj.base_url+'/'+playlist.uri else: m3u8_obj.base_uri = base_url ts = {} for segm in m3u8_obj.segments: ts[segm.uri] = segm.absolute_uri return ts,playlist
def get_playlists_from_m3u8_url(self, m3u8_url, fn_name=None): if fn_name is None: fn_name = 'get_playlists_from_m3u8_url' playlists = {} try: r = self.session.get(m3u8_url) if r.status_code != 200: raise self.NetworkError(fn_name, self.NETWORK_ERR_NON_200, r.status_code) m3u8_obj = m3u8.loads(r.text) if m3u8_obj.is_variant: for playlist in m3u8_obj.playlists: bitrate = str(int(playlist.stream_info.bandwidth) / 1000) playlists[bitrate] = m3u8_url[:m3u8_url.rfind('/') + 1] + playlist.uri + '?' + m3u8_url.split('?', 1)[1] else: playlists['0'] = m3u8_url except requests.exceptions.ConnectionError as error: raise self.NetworkError(fn_name, error) return playlists
def get_playlist_details(m3u8_url, timeout, success): # Get playlist and extract m3u8 data try: r = requests.get(m3u8_url, verify=False, allow_redirects=True, timeout=(timeout['connect'], timeout['read'])) if not r.status_code in [200, 201, 302, 307]: try: success[False] += 1 except: success[False] = 1 return else: try: playlist = m3u8.loads(r.text) try: success[True] += 1 except: success[True] = 1 return playlist except: try: success[False] += 1 except: success[False] = 1 return except: try: success[False] += 1 except: success[False] = 1
def play(connect_handle, id_number): streams = {} url = urls.base_url + urls.channel_url % (plugin_info.get_setting('language'), id_number) response = connect_handle.url(url) content = connect_handle.json(response) title = content['items'][0]['title'] thumb = content['items'][0]['landscape_large'] url = content['playlist_items'][0]['url'] response = connect_handle.url(url) content = connect_handle.text(response, encoding=False) variant_m3u8 = m3u8.loads(content) for playlist in variant_m3u8.playlists: streams[playlist.stream_info.bandwidth] = playlist.uri url = streams[max(streams.keys())] list_item = xbmcgui.ListItem(title) playlist = xbmc.PlayList(xbmc.PLAYLIST_VIDEO) playlist.clear() playlist.add(url, list_item) player = xbmc.Player() player.play(playlist)
def get_stream_playlist(self, master_url): fn_name = "get_stream_playlist" playlists = {} try: r = self.__session.get(master_url) if r.status_code != 200: raise self.NetworkError(fn_name, self.NETWORK_ERR_NON_200, r.status_code) playlist_obj = m3u8.loads(r.text) if playlist_obj.is_variant: for playlist in playlist_obj.playlists: bitrate = str(int(playlist.stream_info.bandwidth) / 1000) playlists[bitrate] = ( master_url[: master_url.rfind("/") + 1] + playlist.uri + "|" + urllib.urlencode(self.__playlist_headers) ) else: playlists["0"] = master_url + "|" + urllib.urlencode(self.__playlist_headers) except requests.exceptions.ConnectionError as error: raise self.NetworkError(fn_name, error) return playlists
def parse_m3u8_manifest(self, manifest_url): """Return the stream URL along with its bitrate.""" streams = {} auth_cookie = None req = requests.get(manifest_url) m3u8_manifest = req.content self.log('HLS manifest: \n %s' % m3u8_manifest) if req.cookies: self.log('Cookies: %s' % req.cookies) # the auth cookie differs depending on the CDN if 'hdntl' and 'hdnts' in req.cookies.keys(): hdntl_cookie = req.cookies['hdntl'] hdnts_cookie = req.cookies['hdnts'] auth_cookie = 'hdntl=%s; hdnts=%s' % (hdntl_cookie, hdnts_cookie) elif 'hdntl' in req.cookies.keys(): hdntl_cookie = req.cookies['hdntl'] auth_cookie = 'hdntl=%s' % hdntl_cookie elif 'lvlt_tk' in req.cookies.keys(): lvlt_tk_cookie = req.cookies['lvlt_tk'] auth_cookie = 'lvlt_tk=%s' % lvlt_tk_cookie else: self.log('No auth cookie found.') else: self.log('Stream request didn\'t contain any cookies.') m3u8_header = {'Cookie': auth_cookie} m3u8_obj = m3u8.loads(m3u8_manifest) for playlist in m3u8_obj.playlists: bitrate = int(playlist.stream_info.bandwidth) / 1000 if playlist.uri.startswith('http'): stream_url = playlist.uri else: stream_url = manifest_url[:manifest_url.rfind('/') + 1] + playlist.uri streams[str(bitrate)] = stream_url + '|' + urlencode(m3u8_header) return streams
def _download_video(video_id, args): if args.start and args.end and args.end <= args.start: raise ConsoleError("End time must be greater than start time") print_out("<dim>Looking up video...</dim>") video = twitch.get_video(video_id) print_out("Found: <blue>{}</blue> by <yellow>{}</yellow>".format( video['title'], video['channel']['display_name'])) print_out("<dim>Fetching access token...</dim>") access_token = twitch.get_access_token(video_id) print_out("<dim>Fetching playlists...</dim>") playlists_m3u8 = twitch.get_playlists(video_id, access_token) playlists = list(_parse_playlists(playlists_m3u8)) playlist_uri = (_get_playlist_by_name(playlists, args.quality) if args.quality else _select_playlist_interactive(playlists)) print_out("<dim>Fetching playlist...</dim>") response = requests.get(playlist_uri) response.raise_for_status() playlist = m3u8.loads(response.text) base_uri = re.sub("/[^/]+$", "/", playlist_uri) target_dir = _crete_temp_dir(base_uri) vod_paths = _get_vod_paths(playlist, args.start, args.end) # Save playlists for debugging purposes with open(path.join(target_dir, "playlists.m3u8"), "w") as f: f.write(playlists_m3u8) with open(path.join(target_dir, "playlist.m3u8"), "w") as f: f.write(response.text) print_out("\nDownloading {} VODs using {} workers to {}".format( len(vod_paths), args.max_workers, target_dir)) path_map = download_files(base_uri, target_dir, vod_paths, args.max_workers) # Make a modified playlist which references downloaded VODs # Keep only the downloaded segments and skip the rest org_segments = playlist.segments.copy() playlist.segments.clear() for segment in org_segments: if segment.uri in path_map: segment.uri = path_map[segment.uri] playlist.segments.append(segment) playlist_path = path.join(target_dir, "playlist_downloaded.m3u8") playlist.dump(playlist_path) if args.no_join: print_out("\n\n<dim>Skipping joining files...</dim>") print_out("VODs downloaded to:\n<blue>{}</blue>".format(target_dir)) return print_out("\n\nJoining files...") target = _video_target_filename(video, args.format) _join_vods(playlist_path, target, args.overwrite) if args.keep: print_out( "\n<dim>Temporary files not deleted: {}</dim>".format(target_dir)) else: print_out("\n<dim>Deleting temporary files...</dim>") shutil.rmtree(target_dir) print_out("\nDownloaded: <green>{}</green>".format(target))
def run(self): logger.info('Starting recording\n' 'Channel name => {0}\n' 'Channel number => {1}\n' 'Program title => {2}\n' 'Start date & time => {3}\n' 'End date & time => {4}'.format(self._recording.channel_name, self._recording.channel_number, self._recording.program_title, self._recording.start_date_time_in_utc.astimezone( get_localzone()).strftime('%Y-%m-%d %H:%M:%S'), self._recording.end_date_time_in_utc.astimezone( get_localzone()).strftime('%Y-%m-%d %H:%M:%S'))) actual_start_date_time_in_utc = datetime.now(pytz.utc) self._create_recording_directory_tree() persisted_recording_id = '{0}'.format(uuid.uuid4()) self._save_manifest_file(None, actual_start_date_time_in_utc.strftime('%Y-%m-%d %H:%M:%S%z'), persisted_recording_id, None, 'Started') for number_of_times_attempted_to_download_playlist_m3u8 in range(1, 11): try: # <editor-fold desc="Download playlist.m3u8"> playlist_m3u8_content = SmoothStreamsProxy.download_playlist_m3u8('127.0.0.1', '/live/playlist.m3u8', self._recording.channel_number, self._id, 'hls') # </editor-fold> self._save_manifest_file(None, actual_start_date_time_in_utc.strftime('%Y-%m-%d %H:%M:%S%z'), persisted_recording_id, None, 'In Progress') playlist_m3u8_object = m3u8.loads(playlist_m3u8_content) chunks_url = '/live/{0}'.format(playlist_m3u8_object.data['playlists'][0]['uri']) break except requests.exceptions.HTTPError: time_to_sleep_before_next_attempt = math.ceil( number_of_times_attempted_to_download_playlist_m3u8 / 5) * 5 logger.error('Attempt #{0}\n' 'Failed to download playlist.m3u8\n' 'Will try again in {1} seconds'.format(number_of_times_attempted_to_download_playlist_m3u8, time_to_sleep_before_next_attempt)) time.sleep(time_to_sleep_before_next_attempt) else: logger.error('Exhausted attempts to download playlist.m3u8') logger.info('Canceling recording\n' 'Channel name => {0}\n' 'Channel number => {1}\n' 'Program title => {2}\n' 'Start date & time => {3}\n' 'End date & time => {4}'.format(self._recording.channel_name, self._recording.channel_number, self._recording.program_title, self._recording.start_date_time_in_utc.astimezone( get_localzone()).strftime('%Y-%m-%d %H:%M:%S'), self._recording.end_date_time_in_utc.astimezone( get_localzone()).strftime('%Y-%m-%d %H:%M:%S'))) self._save_manifest_file(datetime.now(pytz.utc).strftime('%Y-%m-%d %H:%M:%S%z'), actual_start_date_time_in_utc.strftime('%Y-%m-%d %H:%M:%S%z'), persisted_recording_id, None, 'Canceled') return vod_playlist_m3u8_object = None downloaded_segment_file_names = [] while not self._stop_recording_event.is_set(): try: # <editor-fold desc="Download chunks.m3u8"> chunks_url_components = urllib.parse.urlparse(chunks_url) chunks_query_string_parameters = dict(urllib.parse.parse_qsl(chunks_url_components.query)) channel_number_parameter_value = chunks_query_string_parameters.get('channel_number', None) client_uuid_parameter_value = chunks_query_string_parameters.get('client_uuid', None) nimble_session_id_parameter_value = chunks_query_string_parameters.get('nimblesessionid', None) smooth_streams_hash_parameter_value = chunks_query_string_parameters.get('wmsAuthSign', None) nimble_session_id_parameter_value = SmoothStreamsProxy.map_nimble_session_id( '127.0.0.1', chunks_url_components.path, channel_number_parameter_value, client_uuid_parameter_value, nimble_session_id_parameter_value, smooth_streams_hash_parameter_value) chunks_m3u8_content = SmoothStreamsProxy.download_chunks_m3u8('127.0.0.1', chunks_url_components.path, channel_number_parameter_value, client_uuid_parameter_value, nimble_session_id_parameter_value) # </editor-fold> chunks_m3u8_download_date_time_in_utc = datetime.now(pytz.utc) chunks_m3u8_total_duration = 0 chunks_m3u8_object = m3u8.loads(chunks_m3u8_content) if not vod_playlist_m3u8_object: vod_playlist_m3u8_object = chunks_m3u8_object indices_of_skipped_segments = [] for (segment_index, segment) in enumerate(chunks_m3u8_object.segments): segment_url = '/live/{0}'.format(segment.uri) segment_url_components = urllib.parse.urlparse(segment_url) segment_query_string_parameters = dict(urllib.parse.parse_qsl(segment_url_components.query)) segment_file_name = re.sub(r'(/.*)?(/)(.*\.ts)', r'\3', segment_url_components.path) chunks_m3u8_total_duration += segment.duration if segment_file_name not in downloaded_segment_file_names: try: # <editor-fold desc="Download ts file"> channel_number_parameter_value = segment_query_string_parameters.get('channel_number', None) client_uuid_parameter_value = segment_query_string_parameters.get('client_uuid', None) nimble_session_id_parameter_value = segment_query_string_parameters.get('nimblesessionid', None) ts_file_content = SmoothStreamsProxy.download_ts_file('127.0.0.1', segment_url_components.path, channel_number_parameter_value, client_uuid_parameter_value, nimble_session_id_parameter_value) # </editor-fold> logger.debug('Downloaded segment\n' 'Segment => {0}'.format(segment_file_name)) downloaded_segment_file_names.append(segment_file_name) self._save_segment_file(segment_file_name, ts_file_content) segment.uri = '{0}?client_uuid={1}&program_title={2}'.format( segment_file_name, client_uuid_parameter_value, urllib.parse.quote(self._recording.base_recording_directory)) if segment not in vod_playlist_m3u8_object.segments: vod_playlist_m3u8_object.segments.append(segment) except requests.exceptions.HTTPError: logger.error('Failed to download segment\n' 'Segment => {0}'.format(segment_file_name)) else: logger.debug('Skipped segment since it was already downloaded\n' 'Segment => {0} '.format(segment_file_name)) indices_of_skipped_segments.append(segment_index) for segment_index_to_delete in indices_of_skipped_segments: del chunks_m3u8_object.segments[segment_index_to_delete] except requests.exceptions.HTTPError: logger.error('Failed to download chunks.m3u8') return current_date_time_in_utc = datetime.now(pytz.utc) wait_duration = chunks_m3u8_total_duration - ( current_date_time_in_utc - chunks_m3u8_download_date_time_in_utc).total_seconds() if wait_duration > 0: self._stop_recording_event.wait(wait_duration) if vod_playlist_m3u8_object: vod_playlist_m3u8_object.playlist_type = 'VOD' self._save_playlist_file('playlist.m3u8', '{0}\n' '{1}'.format(vod_playlist_m3u8_object.dumps(), '#EXT-X-ENDLIST')) self._save_manifest_file(datetime.now(pytz.utc).strftime('%Y-%m-%d %H:%M:%S%z'), actual_start_date_time_in_utc.strftime('%Y-%m-%d %H:%M:%S%z'), persisted_recording_id, 'playlist.m3u8', 'Completed') SmoothStreamsProxy.delete_active_recording(self._recording) logger.info('Finished recording\n' 'Channel name => {0}\n' 'Channel number => {1}\n' 'Program title => {2}\n' 'Start date & time => {3}\n' 'End date & time => {4}'.format(self._recording.channel_name, self._recording.channel_number, self._recording.program_title, self._recording.start_date_time_in_utc.astimezone( get_localzone()).strftime('%Y-%m-%d %H:%M:%S'), self._recording.end_date_time_in_utc.astimezone( get_localzone()).strftime('%Y-%m-%d %H:%M:%S')))
def _copy_media_playlist(src_playlist, dst_playlist, dst_playlist_base_uri, copy_segments, inject_ads): temp_ad_filepath = None try: curr_media_seq = None dst_media_playlist = None while True: iter_start_time = time.time() # URLError: <urlopen error [Errno 101] Network is unreachable> src_media_playlist = _load_media_playlist( src_playlist.absolute_uri) if dst_media_playlist is None: dst_media_playlist = m3u8.loads(src_media_playlist.dumps()) dst_media_playlist.media_sequence -= len( dst_media_playlist.segments) # AttributeError: 'NoneType' object has no attribute 'media_sequence' x 2 src_media_seq = src_media_playlist.media_sequence if curr_media_seq is None or curr_media_seq < src_media_seq: curr_media_seq = src_media_seq for src_segm in src_media_playlist.segments[curr_media_seq - src_media_seq:]: dst_segm = copy.deepcopy(src_segm) dst_segm.base_uri = _base_uri(dst_playlist.absolute_uri) ad_filepath = memcache.get(dst_segm.absolute_uri) ad_start_time = _START_AD_ZERO_MILLIS_IN if ad_filepath is not None and inject_ads: temp_ad_filepath = _create_temp_filepath(suffix='.ts') shutil.copyfile(ad_filepath, temp_ad_filepath) ad_start_time = _START_AD_1500_MILLIS_IN if temp_ad_filepath is not None or copy_segments: with tempfile.NamedTemporaryFile( suffix='.ts') as temp_segm_file: _download_media_segment(src_segm.absolute_uri, temp_segm_file.name) if temp_ad_filepath is not None: _inject_ad(temp_segm_file.name, temp_ad_filepath, ad_start_time) if not os.path.isfile(temp_ad_filepath): temp_ad_filepath = None try: dst_segm_rel_uri = ( dst_segm.absolute_uri[len(dst_playlist_base_uri ):]) _upload_media_segment(temp_segm_file.name, dst_segm_rel_uri) except gcs.ServerError: logging.error( 'Cannot upload media segment to GCS: ' + dst_segm_rel_uri) else: dst_segm.uri = src_segm.absolute_uri dst_media_playlist.segments.pop(0) dst_media_playlist.add_segment(dst_segm) dst_media_playlist.media_sequence += 1 curr_media_seq += 1 try: dst_playlist_rel_uri = ( dst_playlist.absolute_uri[len(dst_playlist_base_uri):]) _upload_media_playlist(dst_media_playlist, dst_playlist_rel_uri) except gcs.ServerError: logging.error('Cannot upload media playlist to GCS: ' + dst_playlist_rel_uri) iter_time = time.time() - iter_start_time logging.debug('Iteration time is %s seconds' % iter_time) if iter_time < src_media_playlist.target_duration: sleep_time = src_media_playlist.target_duration - iter_time logging.debug('Sleeping for %d seconds...' % sleep_time) time.sleep(sleep_time) finally: if temp_ad_filepath is not None: os.remove(temp_ad_filepath)
except IOError as e: print e logger.debug(cookie_dict) logger.debug(header_dict) # Get Main Playlist mpl_res = control.get(playlist_url, cookies=cookie_dict, headers=header_dict) content = mpl_res.content playlist_url = mpl_res.url logger.info("Main Playlist %s ST %d" % (playlist_url, mpl_res.status_code)) # Detect Resolution variant_m3u8 = m3u8.loads(content) streams_uri = dict() for playlist in variant_m3u8.playlists: if playlist.stream_info.resolution: resolution = int(playlist.stream_info.resolution[1]) logger.info("Stream at %dp detected!" % resolution) else: resolution = int(playlist.stream_info.bandwidth) logger.info("Stream with bandwidth %d detected!" % resolution) streams_uri[resolution] = urljoin(playlist_url, playlist.uri) # playlist.uri auto_highest = True
def test_get( self, api_client_no_auth, video_factory, transcode_job_factory, mocker, simple_uploaded_file_factory, rendition_playlist_file, ): video = video_factory( video_path=VIDEO_PATH, description='description', tags=['tag1', 'tag2'], visibility='draft', title='title', ) file_ = simple_uploaded_file_factory(video_path=VIDEO_PATH) models.VideoRendition.objects.create( video=video, file=file_, playlist_file=rendition_playlist_file, file_size=1000, width=256, height=144, duration=10, ext='webm', container='webm', audio_codec='opus', video_codec='vp9', name='144p', framerate=30, metadata=services.get_metadata(VIDEO_PATH), ) response = api_client_no_auth.get( f'/api/v1/video/{video.id}/playlist.m3u8') assert response.status_code == OK resp_text = response.content.decode() assert resp_text.startswith('#EXTM3U') assert m3u8.loads(resp_text).data == { 'iframe_playlists': [], 'is_endlist': False, 'is_i_frames_only': False, 'is_independent_segments': False, 'is_variant': True, 'keys': [], 'media': [], 'media_sequence': None, 'part_inf': {}, 'playlist_type': None, 'playlists': [{ 'stream_info': { 'bandwidth': 182464, 'closed_captions': 'NONE', 'program_id': 1, 'resolution': '256x144', }, 'uri': mocker.ANY, }], 'rendition_reports': [], 'segments': [], 'session_data': [], 'session_keys': [], 'skip': {}, }
import m3u8 import requests, ssl, urllib3 import argparse parser = argparse.ArgumentParser() parser.add_argument('url', help='m3u8 file URL') parser.add_argument('filename', help='target file') args = parser.parse_args() urllib3.disable_warnings() ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE base_uri = args.url[:args.url.rfind("/") + 1] m3u8_response = requests.get(args.url, verify=False) m3u8_obj = m3u8.loads(m3u8_response.text) with open(args.filename, 'wb') as f: for segment in m3u8_obj.segments: print('Segment', segment.uri) seg_response = requests.get( ('' if segment.uri[:4] == 'http' else base_uri) + segment.uri, verify=False) f.write(seg_response.content)
def download_video(video_id, quality="worst", workers=20, video_format="mkv", path=None, filename=None): # Matching video_id match = None for pattern in VIDEO_PATTERNS: match = re.match(pattern, video_id) if match: break if not match: raise ValueError(f"Invalid video: {video_id}") video_id = match.group('id') # Looking up video video = twitch.get_video(video_id) # Fetching access token access_token = twitch.get_access_token(video_id) # Fetching playlists playlists_m3u8 = twitch.get_playlists(video_id, access_token) playlists = list(_parse_playlists(playlists_m3u8)) playlist_uri = (_get_playlist_by_name(playlists, quality)) # Fetching playlist response = requests.get(playlist_uri) response.raise_for_status() playlist = m3u8.loads(response.text) base_uri = re.sub("/[^/]+$", "/", playlist_uri) target_dir = _crete_temp_dir(base_uri) vod_paths = _get_vod_paths(playlist) # Save playlists for debugging purposes with open(os.path.join(target_dir, "playlists.m3u8"), "w") as f: f.write(playlists_m3u8) with open(os.path.join(target_dir, "playlist.m3u8"), "w") as f: f.write(response.text) # Downloading VODs to target_dir path_map = download_files(base_uri, target_dir, vod_paths, workers) # Make a modified playlist which references downloaded VODs # Keep only the downloaded segments and skip the rest org_segments = playlist.segments.copy() playlist.segments.clear() for segment in org_segments: if segment.uri in path_map: segment.uri = path_map[segment.uri] playlist.segments.append(segment) playlist_path = os.path.join(target_dir, "playlist_downloaded.m3u8") playlist.dump(playlist_path) # Joining files target = _video_target_filename(video, video_format, path=path, filename=filename) _join_vods(playlist_path, target) # Deleting temporary files shutil.rmtree(target_dir)
def parse(self, resolution=None, check_only=False): """ Function to parse gyao url """ if self.verbose: print('[DEBUG] Requesting data to GYAO/Brightcove API') res_list = [ '240p-0', '360p-0', '480p-0', '720p-0', '1080p-0', '240p-1', '360p-1', '480p-1', '720p-1', '1080p-1', 'best', 'worst' ] if resolution not in res_list: if not check_only: return None, 'Resolution {} are non-existant. (Check it with `-R`)'.format(resolution) if resolution == 'best': resolution = '1080p-0' if resolution == 'worst': resolution = '240p-1' v_id = re.findall(r'(?isx)http(?:|s)://gyao.yahoo.co.jp/(?:player|title[\w])/(?P<p1>[\w]*.*)', self.url) if not v_id: return None, 'Video URL are not valid' headers = {'X-User-Agent': 'Unknown Pc GYAO!/2.0.0 Web'} r_vid = self.session.get('https://gyao.yahoo.co.jp/dam/v1/videos/' + v_id[0].replace('/', ':').rstrip(':') + '?fields=title%2Cid%2CvideoId%2CshortTitle', headers=headers).json() title = r_vid['title'] ep_title = r_vid['shortTitle'] output_name = title.replace(ep_title, '').replace('\u3000', ' ') + ' - ' + ep_title headers_pk = { 'Accept': 'application/json;pk=' + self.policy_key, } error_bc = { 'CLIENT_GEO': 'Video are geo-locked to Japanese only.' } req_bc = self.session.get('https://edge.api.brightcove.com/playback/v1/accounts/{}/videos/{}'.format(self.account, r_vid['videoId']), headers=headers_pk) if req_bc.status_code == 403: error_reason = req_bc[0]['error_subcode'] return None, error_bc[error_reason] if self.verbose and req_bc.status_code == 200: print('[DEBUG] Data requested') print('[DEBUG] Parsing json API') jsdata = req_bc.json() hls_list = jsdata['sources'][2]['src'] # Use EXT-V4 http version as the base hls_list2 = jsdata['sources'][0]['src'] # Use EXT-V3 http version as the one that will be sended over if self.verbose: print('[DEBUG] M3U8 Link: {}'.format(hls_list)) print('[DEBUG] Title: {}'.format(output_name)) self.m3u8_url_list = hls_list if self.verbose: print('[DEBUG] Requesting m3u8 list') r = self.session.get(hls_list) r2 = self.session.get(hls_list2) if self.verbose and r.status_code == 200: if r.status_code == 200: print('[DEBUG] m3u8 requested') print('[DEBUG] Parsing m3u8') if r.status_code == 403: return None, 'Video are geo-locked to Japanese only.' r_all = m3u8.loads(r.text) r2_all = m3u8.loads(r2.text) band_list_v4 = [] for v4 in r_all.playlists: s_info = v4.stream_info audio_inf = s_info.audio.strip('audio') if resolution[-2:] == audio_inf: band_list_v4.append(s_info.bandwidth) for v3 in r2_all.playlists: bw = v3.stream_info.bandwidth for bwv4 in band_list_v4: if bw == bwv4: self.m3u8_url = v3.uri self.resolution = resolution self.est_filesize = round(bw / 1024 / 5, 2) if not self.m3u8_url: return None, 'Resolution {} are not exist in this video.'.format(self.resolution) return output_name, None
def test_variant_playlist_with_multiple_media(): variant_m3u8 = m3u8.loads(playlists.MULTI_MEDIA_PLAYLIST) assert variant_m3u8.dumps() == playlists.MULTI_MEDIA_PLAYLIST
self.percentageMutex.release() # _______________________ MODIFY HERE ____________________________________ fileName = 'VideoName' threadCount = 5 # 32 can be written if your system is good threadCount -= 1 # 1 for display urlInfo = '' urlCompletion = 'index.m3u8' # ________________________________________________________________________ masterUrl = urlInfo + urlCompletion r_1 = requests.get(masterUrl) m3u8_master = m3u8.loads(r_1.text) # print(m3u8_master.data['segments'][0]['uri']) fileNumber = 0 for segment in m3u8_master.data['segments']: fileNumber += 1 urlParser = UrlParser(threadCount, m3u8_master, urlInfo, fileNumber) displayMutex = threading.Lock() displayMutex.acquire() displayPercentage = DisplayPercentage(threadCount) displayMutex.release() partDownloaderList = [] partDownloaderThreadsList = [] fileNameList = [] for i in range(0, threadCount):
def get_chunks(url): resp = se.get("{}/{}".format(prefix, url)) ll = m3u8.loads(resp.text).segments cli = url.split('/')[0] # mid = (ll[0].uri.split('_')[0]).split('-')[1][1:] return ["{}/{}/{}".format(prefix, cli, x.uri) for x in ll]
def test_loads_should_create_object_from_string(): obj = m3u8.loads(playlists.SIMPLE_PLAYLIST) assert isinstance(obj, m3u8.M3U8) assert 5220 == obj.target_duration assert 'http://media.example.com/entire.ts' == obj.segments[0].uri
'--destination-file', dest='destination_filename', help='destination file path') ap.add_argument('m3u8_hls_url', help='source hls url') args = ap.parse_args() temporary_ts_filename = arrow.now().format('YYYYMMDDHHmmss') if args.destination_filename: destination_filename = args.destination_filename else: destination_filename = temporary_ts_filename + '.mp3' temporary_ts_filename += '.ts' r = download_object(args.m3u8_hls_url) if not r: sys.exit() print(args.m3u8_hls_url) try: m3u8_object = m3u8.loads(r.text, uri=args.m3u8_hls_url) except: print('ERROR: Invalid M3U8 data!') sys.exit() if m3u8_object.playlist_type == 'event' or m3u8_object.playlist_type == 'vod': try: f = open(temporary_ts_filename, 'wb') except IOError as e: print('ERROR: Unable to create file! Code:', e.errno) sys.exit() for s in m3u8_object.segments: print(s.absolute_uri, end=' ') if m3u8_object.keys != [None] and s.key.method == 'AES-128': r = download_object(s.key.uri) if not r: f.close()
def get_condensed_nfl_game(game_id): import xmltodict import m3u8 import urllib import xbmc streams = {} username = '******' password = '******' base_url = 'https://gamepass.nfl.com/nflgp' servlets_url = base_url + '/servlets' simlple_console_url = servlets_url + '/simpleconsole' login_url = base_url + '/secure/nfllogin' session = requests.Session() session.post(login_url, data={ 'username': username, 'password': password }) # login simlple_console_data = session.post(simlple_console_url, data={ 'isflex': 'true' }).content simlple_console_dict = xmltodict.parse(simlple_console_data) current_season = simlple_console_dict['result']['currentSeason'] current_week = simlple_console_dict['result']['currentWeek'] thumbnail = "http://www.officialpsds.com/images/thumbs/NFL-Logo-psd95853.png" fanart = "http://wallpapercave.com/wp/8iHFIg1.png" url = servlets_url + '/publishpoint' headers = {'User-Agent': 'iPad'} post_data = {'id': game_id, 'type': 'game', 'nt': '1', 'gt': 'condensed'} m3u8_data = session.post(url, data=post_data, headers=headers).content try: m3u8_dict = xmltodict.parse(m3u8_data)['result'] except: return "" m3u8_url = m3u8_dict['path'].replace('_ipad', '') m3u8_param = m3u8_url.split('?', 1)[-1] m3u8_header = { 'Cookie': 'nlqptid=' + m3u8_param, 'User-Agent': 'Safari/537.36 Mozilla/5.0 AppleWebKit/537.36 Chrome/31.0.1650.57', 'Accept-encoding': 'identity, gzip, deflate', 'Connection': 'keep-alive' } try: m3u8_manifest = session.get(m3u8_url).content except: m3u8_manifest = False if m3u8_manifest: m3u8_obj = m3u8.loads(m3u8_manifest) if m3u8_obj.is_variant: # if this m3u8 contains links to other m3u8s for playlist in m3u8_obj.playlists: bitrate = int(playlist.stream_info.bandwidth) / 1000 streams[str( bitrate )] = m3u8_url[:m3u8_url.rfind('/') + 1] + playlist.uri + '?' + m3u8_url.split( '?')[1] + '|' + urllib.urlencode(m3u8_header) else: game_xml = "<item>\n" \ "\t<title>stream</title>\n" \ "\t<link>{1}</link>\n" \ "\t<thumbnail>{2}</thumbnail>\n" \ "\t<fanart>{3}</fanart>\n" \ "</item>\n".format(m3u8_url, thumbnail, fanart) return game_xml xml = '' keys = sorted(streams.keys(), key=lambda key: int(key)) for key in keys: game_xml = "<item>\n" \ "\t<title>{0} kbps</title>\n" \ "\t<link>{1}</link>\n" \ "\t<thumbnail>{2}</thumbnail>\n" \ "\t<fanart>{3}</fanart>\n" \ "</item>\n".format(key, streams[key], thumbnail, fanart) xml += game_xml boblist = BobList(xml) display_list(boblist.get_list(), boblist.get_content_type())
def test_endswith_newline(): obj = m3u8.loads(playlists.SIMPLE_PLAYLIST) manifest = obj.dumps() assert manifest.endswith('#EXT-X-ENDLIST\n')
headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Origin': 'http://www.r18.com', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*' } def decrypt(data, key): decryptor = AES.new(key, AES.MODE_CBC, IV=None) return decryptor.decrypt(data) playlist = requests.get(m3u8_url, headers=headers).text m3u8_obj = m3u8.loads(playlist) # m3u8_obj = m3u8.load(m3u8_url) # set ur cookie here headers['Cookie'] = '' key = requests.get(m3u8_obj.keys[-1].uri, headers=headers).text # set the m3u8 base url base_url = '' urls = [] for iframe in m3u8_obj.segments: if ".ts" in iframe.uri: urls.append(base_url + iframe.uri)
def test_version_settable_as_string(): obj = m3u8.loads(playlists.VERSION_PLAYLIST) obj.version = '9' assert "#EXT-X-VERSION:9" in obj.dumps().strip()
# If we haven't forced the download, then we will display an error and quit if not force_download and db_q: print("You have already downloaded this video.") print("If you want to download this file anyway, please use --force or -f") print("Quitting.") exit() # Get the base url of our video base_url = stream_url[0].split(".m3u8")[0].rsplit("/", 1) # Initialise the downloader download = download_util.download() index_m3u8 = download.get_index_m3u8(stream_url[0]) index_m3u8_obj = m3u8.loads(index_m3u8.data.decode('utf-8')) # Get our audio playlist audio_qualities = [] for i in index_m3u8_obj.media: # We want English audio, so any files with eng as its language is added to our list if "eng" in i.language: audio_qualities.append( (int(i.group_id.split('audio-')[1]), base_url[0] + "/" + i.uri)) # Sort the audio quality from high to low audio_qualities.sort(reverse=True) # Choose the playlist we want audio_playlist = download.get_playlist_object(audio_qualities[0][1]) # The kwargs we will pass to the downloader
def run(self): logger.info( 'Starting recording\n' 'Provider => {0}\n' 'Channel number => {1}\n' 'Channel name => {2}\n' 'Program title => {3}\n' 'Start date & time => {4}\n' 'End date & time => {5}'.format( self._recording.provider, self._recording.channel_number, self._recording.channel_name, self._recording.program_title, self._recording.start_date_time_in_utc.astimezone( tzlocal.get_localzone()).strftime('%Y-%m-%d %H:%M:%S'), self._recording.end_date_time_in_utc.astimezone( tzlocal.get_localzone()).strftime('%Y-%m-%d %H:%M:%S'))) self._create_recording_directory_tree() try: hls_client = HLSClient(self._id, self._recording.provider.lower(), self._recording.channel_number) playlist_m3u8_object = m3u8.loads( hls_client.download_playlist_m3u8()) chunks_m3u8_object = None try: chunks_url = '/live/{0}/{1}'.format( self._recording.provider.lower(), playlist_m3u8_object.data['playlists'][0]['uri']) except IndexError: chunks_m3u8_object = playlist_m3u8_object downloaded_segment_file_names = [] while not self._stop_recording_event.is_set(): try: chunks_m3u8_object = m3u8.loads( hls_client.download_chunks_m3u8(chunks_url)) except NameError: if chunks_m3u8_object is None: chunks_m3u8_object = m3u8.loads( hls_client.download_playlist_m3u8()) chunks_m3u8_download_date_time_in_utc = datetime.now(pytz.utc) chunks_m3u8_total_duration = 0 for (segment_index, segment) in enumerate(chunks_m3u8_object.segments): segment_url = '/live/{0}'.format(segment.uri) segment_url_components = urllib.parse.urlparse(segment_url) segment_file_name = re.sub(r'(/.*)?(/)(.*\.ts)', r'\3', segment_url_components.path) if segment_file_name not in downloaded_segment_file_names: try: ts_file_content = CacheManager.query_cache( self._recording.provider.lower(), self._recording.channel_number, segment_file_name.lower()) if ts_file_content is None: ts_file_content = hls_client.download_ts_file( segment_url) CacheManager.update_cache( self._recording.provider.lower(), self._recording.channel_number, segment_file_name.lower(), ts_file_content) logger.debug( 'Downloaded segment\n' 'Segment => {0}'.format(segment_file_name)) segment.uri = '{0}?recording_id={1}'.format( segment_file_name, urllib.parse.quote(self._recording.id)) downloaded_segment_file_names.append( segment_file_name) Utility.write_file( ts_file_content, os.path.join(self._recording_directory_path, segment_file_name), in_binary=True) with Database.get_write_lock(): db_session = Database.create_session() try: db_session.add( Segment( segment_file_name, self._recording.id, pickle.dumps(segment, protocol=pickle. HIGHEST_PROTOCOL), self._recording_directory_path)) db_session.commit() except Exception: (type_, value_, traceback_) = sys.exc_info() logger.error('\n'.join( traceback.format_exception( type_, value_, traceback_))) db_session.rollback() finally: db_session.close() except requests.exceptions.HTTPError: logger.error( 'Failed to download segment\n' 'Segment => {0}'.format(segment_file_name)) else: logger.debug( 'Skipped segment since it was already downloaded\n' 'Segment => {0} '.format(segment_file_name)) chunks_m3u8_total_duration += segment.duration current_date_time_in_utc = datetime.now(pytz.utc) wait_duration = chunks_m3u8_total_duration - ( current_date_time_in_utc - chunks_m3u8_download_date_time_in_utc).total_seconds() if wait_duration > 0: self._stop_recording_event.wait(wait_duration) chunks_m3u8_object = None self._recording.status = RecordingStatus.PERSISTED.value db_session.merge(self._recording) db_session.commit() logger.info( 'Finished recording\n' 'Provider => {0}\n' 'Channel number => {1}\n' 'Channel name => {2}\n' 'Program title => {3}\n' 'Start date & time => {4}\n' 'End date & time => {5}'.format( self._recording.provider, self._recording.channel_number, self._recording.channel_name, self._recording.program_title, self._recording.start_date_time_in_utc.astimezone( tzlocal.get_localzone()).strftime('%Y-%m-%d %H:%M:%S'), self._recording.end_date_time_in_utc.astimezone( tzlocal.get_localzone()).strftime( '%Y-%m-%d %H:%M:%S'))) except (HLSPlaylistDownloadError, ProviderNotFoundError): if self._stop_recording_event.is_set(): self._recording.status = RecordingStatus.PERSISTED.value db_session.merge(self._recording) db_session.commit() logger.info( 'Finished recording\n' 'Provider => {0}\n' 'Channel number => {1}\n' 'Channel name => {2}\n' 'Program title => {3}\n' 'Start date & time => {4}\n' 'End date & time => {5}'.format( self._recording.provider, self._recording.channel_number, self._recording.channel_name, self._recording.program_title, self._recording.start_date_time_in_utc.astimezone( tzlocal.get_localzone()).strftime( '%Y-%m-%d %H:%M:%S'), self._recording.end_date_time_in_utc.astimezone( tzlocal.get_localzone()).strftime( '%Y-%m-%d %H:%M:%S'))) else: logger.info( 'Canceling recording\n' 'Provider => {0}\n' 'Channel number => {1}\n' 'Channel name => {2}\n' 'Program title => {3}\n' 'Start date & time => {4}\n' 'End date & time => {5}'.format( self._recording.provider, self._recording.channel_number, self._recording.channel_name, self._recording.program_title, self._recording.start_date_time_in_utc.astimezone( tzlocal.get_localzone()).strftime( '%Y-%m-%d %H:%M:%S'), self._recording.end_date_time_in_utc.astimezone( tzlocal.get_localzone()).strftime( '%Y-%m-%d %H:%M:%S'))) finally: PVR.cleanup_live_recording(self._recording)
def _new_playlist(source_playlist, media_sequence): new_playlist = m3u8.loads(source_playlist.dumps()) new_playlist.media_sequence = media_sequence new_playlist.segments[:] = [] return new_playlist
def _save_if_is_master_playlist(self, res_body: str, req_full_url: str) -> None: playlist = m3u8.loads(res_body) if playlist.is_variant: playlist.base_uri = urljoin(req_full_url, ".") self._current_master_playlist = playlist
def download_course(self, course_id): self.logger.info('start download %s ....' % self.course_details[course_id]['course_name']) res = self._make_chapter_list(course_id) if not res: return chapter_id = 0 chapter_name = '无名章' for lesson in self.course_details[course_id]['course_data']: if lesson['type'] == 'chapter': chapter_id = lesson['number'] chapter_name = lesson['title'] elif lesson['status'] == 'published': title = lesson['title'] section_id = lesson['number'] if lesson['type'] == 'video': # 为视频 section_md5 = md5(str(course_id) + chapter_name + str(section_id) + title) # 首先判断该数据是否已存在于数据库,是则跳过 if self.sql_session.query(Course.section_md5).filter(Course.section_md5 == section_md5).count() > 0: self.logger.debug('当前课程信息已存在') continue # 获取播放链接信息 self.logger.debug('start log %s...' % title) url = urljoin(self.host, 'course/%s/task/%s/activity_show' % (course_id, lesson['taskId'])) resp = self.session.get(url=url) if '不能访问教学计划' in resp.text: self.logger.error('无权限访问该课程!') return playlist_url = re.search('data-url="(.*?)"', resp.text).group(1) # 获取播放列表 resp = self.session.get(playlist_url) if resp.status_code != self.HTTP_OK: continue playlist_text = resp.text # 解析播放列表:对每一种清晰度,获取对应的m3u8文件与相应的aeskey m3u8_obj = m3u8.loads(playlist_text) for playlist in m3u8_obj.playlists: uri = playlist.absolute_uri bandwidth = playlist.stream_info.bandwidth # 解析对应清晰度的m3u8 resp = self.session.get(uri) m3u8_full = resp.text # 只存储必要的数据 m3u8_text = filter_header(m3u8_full) m3u8_obj = m3u8.loads(m3u8_full) # 下载密钥 keys = {} for key in m3u8_obj.keys: u = TRIM_KEY_URI.sub(r'\1', key.absolute_uri) if u not in keys: resp = self.session.get(key.absolute_uri) keys[u] = decode_key(resp.text) # 存储数据信息 course_detail = CourseDetail(section_md5=section_md5, type='video', width=bandwidth, m3u8=m3u8_text, m3u8_full=None, aes_key=json.dumps(keys), has_download=False) self.sql_session.add(course_detail) # 添加课程节信息 course = Course(course_id=course_id, course_name=self.course_details[course_id]['course_name'], chapter_id=chapter_id, chapter_name=chapter_name, section_id=section_id, section_name=title, section_md5=section_md5) self.sql_session.add(course) try: self.sql_session.commit() except Exception as e: self.logger.error('提交数据失败:' + str(e)) self.sql_session.rollback() elif lesson['type'] == 'download': # 检查是否已存在 section_md5 = md5(str(course_id) + str(chapter_name) + str(section_id) + str(title)) if self.sql_session.query(Course.section_md5).filter(Course.section_md5 == section_md5).count() > 0: self.logger.debug('当前课程信息已存在') continue """ 暂时不处理该类文件
def download(self): self.__debug('Tweet URL', self.tweet_data['tweet_url']) # Get the bearer token token = self.__get_bearer_token() # Get the M3u8 file - this is where rate limiting has been happening video_host, playlist = self.__get_playlist(token) if playlist.is_variant: print('[+] Multiple resolutions found. Slurping all resolutions.') plist = playlist.playlists[len(playlist.playlists) - 1] resolution = str(plist.stream_info.resolution[0]) + 'x' + str( plist.stream_info.resolution[1]) print('[+] Downloading ' + resolution) playlist_url = video_host + plist.uri ts_m3u8_response = self.requests.get( playlist_url, headers={'Authorization': None}) ts_m3u8_parse = m3u8.loads(ts_m3u8_response.text) ts_list = [] ts_full_file_list = [] for ts_uri in ts_m3u8_parse.segments.uri: # ts_list.append(video_host + ts_uri) ts_file = requests.get(video_host + ts_uri) fname = ts_uri.split('/')[-1] ts_path = Path(self.storage) / Path(fname) ts_list.append(ts_path) ts_path.write_bytes(ts_file.content) ts_full_file = Path(self.storage) / Path(resolution + '.ts') ts_full_file = str(ts_full_file) #ts_full_file_list.append(ts_full_file) # Shamelessly taken from https://stackoverflow.com/questions/13613336/python-concatenate-text-files/27077437#27077437 with open(str(ts_full_file), 'wb') as wfd: for f in ts_list: with open(f, 'rb') as fd: shutil.copyfileobj(fd, wfd, 1024 * 1024 * 10) print('[+] Doing cleanup') for ts in ts_list: p = Path(ts) p.unlink() for ts in ts_full_file_list: p = Path(ts) p.unlink() return ts_full_file else: print( '[-] Sorry, single resolution video download is not yet implemented. Please submit a bug report with the link to the tweet.' ) return ''
def get_playlist(m3u8_url, live, loop, results, status, success, duration, playlists, timeout, cookies, lock, pid): # Extract HLS segments from M3U8 file for VOD or Live content playlist = get_playlist_details(m3u8_url, timeout, success) base_url = m3u8_url if playlist: loop_iterator, loop_limit = 1, 1000 seconds_since_new_file = 0 no_file_timeout = 120 segments = {} segments['count'] = 0 segments['played'] = {} # For live content if live.lower() == 'true': # If playlist contains nested playlists, use the first if len(playlist.playlists) > 0: base_url = construct_url(m3u8_url, playlist.playlists[0].uri) while segments['count'] < int(loop): # In case no files are found, break loop after 1000 iterations loop_iterator += 1 if loop_iterator >= loop_limit: return # If playlists are continually requested with the same list of segments, timeout after no_file_timeout if seconds_since_new_file > no_file_timeout: return playlist = get_playlist_details(base_url, timeout, success) if not playlist: continue for idx, file in enumerate(playlist.files): # Break when enough segments (user set) have been requested if segments['count'] >= int(loop): return # Only request segments from [n - 3, n] if idx < (len(playlist.files) - 3): continue segment_url = construct_url(base_url, file) # If segement has not yet been requested (some playlists will overlap TS files if files if requested too fast) if not segments['played'].has_key(segment_url): seconds_since_new_file = 0 lock.acquire() segments['count'] += 1 lock.release() segments['played'][segment_url] = True time.sleep(timeout['sleep']) get_segment(segment_url, status, results, duration, timeout, lock) # Sleep before getting next playlists (in case there are no new segments, this loops too quickly) time.sleep(timeout['sleep']) seconds_since_new_file += int(timeout['sleep']) else: # VOD for loop_number in range(0, int(loop)): # If playlist contains all TS files directly if len(playlist.files) > 0: for idx, file in enumerate(playlist.files): time.sleep(timeout['sleep']) segment_url = construct_url(base_url, file) get_segment(segment_url, status, results, duration, timeout, lock) # If playlist contains nested playlists else: for sub_playlist in playlist.playlists: sub_playlist_url = construct_url(base_url, sub_playlist.uri) nested_playlist = requests.get(url=sub_playlist_url, verify=False, allow_redirects=True, timeout=(timeout['connect'], timeout['read'])) for idx, file in enumerate(m3u8.loads(nested_playlist.text).files): time.sleep(timeout['sleep']) segment_url = construct_url(nested_playlist.url, file) get_segment(segment_url, status, results, duration, timeout, lock)
import time from urllib import urlopen import m3u8 import os import shlex from subprocess import Popen, PIPE from datetime import date, timedelta data = urlopen("http://118.69.252.4/tv/vtv3HD/index.m3u8").read() obj = m3u8.loads(data) now = (date.today() - timedelta(1)).strftime('%Y%m%d') #time is 11:45 AM to 12:00 start_time = int(now + '230000') #start_time = int(now + '015000') end_time = int(now + '232500') #end_time = int(now + '015500') #file = '/home/hadn/ffmpeg/' file = '/opt/hadn/' fileList = [] for val in obj.segments: uri = int( (time.strftime('%Y%m%d%H%M%S', time.gmtime(float(val.uri.split('.ts')[0]) / 1000)))) if uri > start_time and uri < end_time: url = "http://118.69.252.4/tv/vtv3HD/%s" % val.uri print url #: save all file which mat condition file_name = file + val.uri #: put all file ts into a list fileList.append(file_name) with open(file_name, 'w') as f:
def download(self): resolution_file = Path(self.storage) / Path(self.tweet_data['id'] + '.mp4') # abs_path = os.path.abspath(__file__) # base_dir = os.path.dirname(os.path.dirname(abs_path)) # file_path = os.path.join(base_dir, resolution_file) if os.path.exists(resolution_file): print('已下载:%s' % resolution_file) return self.__debug('Tweet URL', self.tweet_data['tweet_url']) # Get the bearer token token = self.__get_bearer_token() # Get the M3u8 file - this is where rate limiting has been happening video_host, playlist = self.__get_playlist(token) if playlist.is_variant: print('[+] Multiple resolutions found. Slurping all resolutions.') for plist in playlist.playlists[-1:]: resolution = str(plist.stream_info.resolution[0]) + 'x' + str(plist.stream_info.resolution[1]) print('[+] Downloading ' + self.tweet_data['id']) playlist_url = video_host + plist.uri ts_m3u8_response = self.requests.get(playlist_url, headers={'Authorization': None}) ts_m3u8_parse = m3u8.loads(ts_m3u8_response.text) ts_list = [] ts_full_file_list = [] for ts_uri in ts_m3u8_parse.segments.uri: # ts_list.append(video_host + ts_uri) ts_file = requests.get(video_host + ts_uri, proxies=proxies) fname = ts_uri.split('/')[-1] ts_path = Path(self.storage) / Path(fname) ts_list.append(ts_path) ts_path.write_bytes(ts_file.content) ts_full_file = Path(self.storage) / Path(resolution + '.ts') ts_full_file = str(ts_full_file) ts_full_file_list.append(ts_full_file) # Shamelessly taken from https://stackoverflow.com/questions/13613336/python-concatenate-text-files/27077437#27077437 with open(str(ts_full_file), 'wb') as wfd: for f in ts_list: with open(f, 'rb') as fd: shutil.copyfileobj(fd, wfd, 1024 * 1024 * 10) for ts in ts_full_file_list: print('\t[*] Doing the magic:%s'%self.tweet_data['tweet_url']) ffmpeg \ .input(ts) \ .output(str(resolution_file), acodec='copy', vcodec='libx264', format='mp4', loglevel='error') \ .overwrite_output() \ .run() print('\t[+] Doing cleanup:%s'%self.tweet_data['tweet_url']) print(resolution_file) for ts in ts_list: p = Path(ts) p.unlink() for ts in ts_full_file_list: p = Path(ts) p.unlink() else: print( '[-] Sorry,single resolution video download is not yet implemented. Please submit a bug report with the link to the tweet:%s'%self.tweet_data['tweet_url'])
data = r.json() # Fetch vod index url = _index_api_url.format(video_id) payload = { 'nauth': data['token'], 'nauthsig': data['sig'], 'allow_source': True, 'allow_spectre': False, "player": "twitchweb", "p": int(random() * 999999), "allow_audio_only": True, "type": "any" } r = requests.get(url, params=payload, headers=common_headers) m = m3u8.loads(r.content) index_url = m.playlists[0].uri index = m3u8.load(index_url) # Get the piece we need position = 0 chunks = [] for seg in index.segments: # Add duration of current segment position += seg.duration # Check if we have gotten to the start of the clip if position < int(app.pargs.start): continue
import time from urllib import urlopen import m3u8 import os import shlex from subprocess import Popen, PIPE data = urlopen("http://118.69.252.4/tv/htv7HD/index.m3u8").read() obj = m3u8.loads(data) now = time.strftime('%Y%m%d', time.localtime()) #time is 11:45 AM to 12:00 start_time = int(now + '044800') end_time = int(now + '050500') #end_time = int(now + '015500') #file = '/home/hadn/ffmpeg/' file = '/opt/hadn/monngon/' fileList = [] for val in obj.segments: uri = int((time.strftime('%Y%m%d%H%M%S', time.gmtime(float(val.uri.split('.ts')[0])/1000)))) if uri > start_time and uri < end_time: url = "http://118.69.252.4/tv/htv7HD/%s" % val.uri print url #: save all file which mat condition file_name = file + val.uri #: put all file ts into a list fileList.append(file_name) with open(file_name, 'w') as f: url = "http://118.69.252.4/tv/htv7HD/%s" % val.uri f.write(urlopen(url).read()) cmd = 'ffmpeg -i "concat:%s" -y -c copy -bsf:a aac_adtstoasc /opt/hadn/monngon/mon_ngon_moi_ngay_%s.mp4' % ('|'.join(fileList), now)
def download(video_url): video_player_url_prefix = 'https://twitter.com/i/videos/tweet/' video_host = '' output_dir = './output' # Parse the tweet ID video_url = video_url.split('?', 1)[0] tweet_user = video_url.split('/')[3] tweet_id = video_url.split('/')[5] tweet_dir = Path(output_dir + '/' + tweet_user + '/' + tweet_id) Path.mkdir(tweet_dir, parents=True, exist_ok=True) # Grab the video client HTML video_player_url = video_player_url_prefix + tweet_id video_player_response = requests.get(video_player_url) # Get the JS file with the Bearer token to talk to the API. # Twitter really changed things up. js_file_soup = BeautifulSoup(video_player_response.text, 'html.parser') js_file_url = js_file_soup.find('script')['src'] js_file_response = requests.get(js_file_url) # Pull the bearer token out bearer_token_pattern = re.compile('Bearer ([a-zA-Z0-9%-])+') bearer_token = bearer_token_pattern.search(js_file_response.text) bearer_token = bearer_token.group(0) # Talk to the API to get the m3u8 URL player_config = requests.get( 'https://api.twitter.com/1.1/videos/tweet/config/' + tweet_id + '.json', headers={'Authorization': bearer_token}) m3u8_url_get = json.loads(player_config.text) m3u8_url_get = m3u8_url_get['track']['playbackUrl'] # Get m3u8 m3u8_response = requests.get(m3u8_url_get, headers={'Authorization': bearer_token}) m3u8_url_parse = urllib.parse.urlparse(m3u8_url_get) video_host = m3u8_url_parse.scheme + '://' + m3u8_url_parse.hostname m3u8_parse = m3u8.loads(m3u8_response.text) if m3u8_parse.is_variant: print('Multiple resolutions found. Slurping all resolutions.') for playlist in m3u8_parse.playlists: resolution = str(playlist.stream_info.resolution[0]) + 'x' + str( playlist.stream_info.resolution[1]) resolution_file = Path(tweet_dir) / Path(resolution + '.mp4') print('[+] Downloading ' + resolution) playlist_url = video_host + playlist.uri ts_m3u8_response = requests.get(playlist_url) ts_m3u8_parse = m3u8.loads(ts_m3u8_response.text) ts_list = [] for ts_uri in ts_m3u8_parse.segments.uri: ts_list.append(video_host + ts_uri) # Convert TS to MP4 ts_streams = [ffmpeg.input(str(_)) for _ in ts_list] (ffmpeg.concat(*ts_streams).output( str(resolution_file), strict=-2, loglevel='error').overwrite_output().run())
def download(video_url): video_player_url_prefix = 'https://twitter.com/i/videos/tweet/' video_host = '' output_dir = './output' # Parse the tweet ID video_url = video_url.split('?', 1)[0] tweet_user = video_url.split('/')[3] tweet_id = video_url.split('/')[5] tweet_dir = Path(output_dir + '/' + tweet_user + '/' + tweet_id) Path.mkdir(tweet_dir, parents=True, exist_ok=True) # Grab the video client HTML video_player_url = video_player_url_prefix + tweet_id video_player_response = requests.get(video_player_url) # Get the JS file with the Bearer token to talk to the API. # Twitter really changed things up. js_file_soup = BeautifulSoup(video_player_response.text, 'html.parser') js_file_url = js_file_soup.find('script')['src'] js_file_response = requests.get(js_file_url) # Pull the bearer token out bearer_token_pattern = re.compile('Bearer ([a-zA-Z0-9%-])+') bearer_token = bearer_token_pattern.search(js_file_response.text) bearer_token = bearer_token.group(0) # Talk to the API to get the m3u8 URL player_config = requests.get( 'https://api.twitter.com/1.1/videos/tweet/config/' + tweet_id + '.json', headers={'Authorization': bearer_token}) m3u8_url_get = json.loads(player_config.text) m3u8_url_get = m3u8_url_get['track']['playbackUrl'] # Get m3u8 m3u8_response = requests.get(m3u8_url_get, headers={'Authorization': bearer_token}) m3u8_url_parse = urllib.parse.urlparse(m3u8_url_get) video_host = m3u8_url_parse.scheme + '://' + m3u8_url_parse.hostname m3u8_parse = m3u8.loads(m3u8_response.text) if m3u8_parse.is_variant: print('Multiple resolutions found. Slurping all resolutions.') for playlist in m3u8_parse.playlists: resolution = str(playlist.stream_info.resolution[0]) + 'x' + str( playlist.stream_info.resolution[1]) resolution_dir = Path(tweet_dir) / Path(resolution) Path.mkdir(resolution_dir, parents=True, exist_ok=True) playlist_url = video_host + playlist.uri ts_m3u8_response = requests.get(playlist_url) ts_m3u8_parse = m3u8.loads(ts_m3u8_response.text) ts_list = [] for ts_uri in ts_m3u8_parse.segments.uri: print('[+] Downloading ' + resolution) ts_file = requests.get(video_host + ts_uri) fname = ts_uri.split('/')[-1] ts_path = resolution_dir / Path(fname) ts_list.append(ts_path) ts_path.write_bytes(ts_file.content) ts_full_file = Path(resolution_dir) / Path(resolution + '.ts') # Shamelessly taken from https://stackoverflow.com/questions/13613336/python-concatenate-text-files/27077437#27077437 with open(str(ts_full_file), 'wb') as wfd: for f in ts_list: with open(str(f), 'rb') as fd: shutil.copyfileobj(fd, wfd, 1024 * 1024 * 10)
driver.switch_to_default_content() data = urllib2.urlopen(m3u8_url).read() m3u8_data = json.loads(data) #取m3u8数组中最后一个数据,一般对应超清的m3u8链接, 这个URL不需要cookie,直接用urllib2读取 url_src = m3u8_data[-1].get('src') #driver.get(url_src) #m3u8_file_data = driver.page_source; m3u8_file_data = urllib2.urlopen(url_src).read() m3u8_file_name='output\\'+ title +'.m3u8' with open(m3u8_file_name, 'w') as file_: file_.write(m3u8_file_data) #解析m3u8文件,获取key的url,请求key必须带上cookie m3u8_obj = m3u8.loads(m3u8_file_data); keyuri = m3u8_obj.segments[0].key.uri; print 'keyuri:'+keyuri; #Key读取有限制,最多不能超过2次,否则后面的数字都是错误的,48个字节 driver.get(keyuri) m3u8_key_data = driver.find_element_by_css_selector('body').text; print 'key len:' +str(len(m3u8_key_data)) + ', key data:' + m3u8_key_data key_file_name='output\\'+title+'.key' with open(key_file_name, 'w') as file_: file_.write(m3u8_key_data) except: print 'failed to fetch '+ title +' ' +url; print traceback.print_exc()
import os tv_channel = [ "https://www.dailymotion.com/embed/video/kxm1wihUkjNiINrAqlg", # NTV7 "https://www.dailymotion.com/embed/video/kdFzSYy1bHxrForBrar", # TV8 ] ch_addr = {} for index, channel in enumerate(tv_channel): html = requests.get(channel) match_obj = re.search( r'\"url\":\"(https:\\/\\/www\.dailymotion\.com\\/cdn.*/(.*)\.m3u8\?sec=.*\"\}\]\},)', html.text) m3u8_link = match_obj.group(1).rstrip("\"}]},") m3u8_link = re.sub(r'\\/', '/', m3u8_link) print(m3u8_link) html = requests.get(m3u8_link) m3u8_obj = m3u8.loads(html.text) print(m3u8_obj.data) for url in m3u8_obj.data['playlists']: if url['stream_info']['name'] == '"720"': # print(url['uri']) if match_obj.group(2) in url['uri']: print(url['uri'].split('#cell')[0]) if index == 0: ch_addr['NTV7'] = url['uri'].split('#cell')[0] else: ch_addr['TV8'] = url['uri'].split('#cell')[0] # os.chdir("helloworld") # os.system("git pull") git_change = False with open("testing_1.m3u", 'w', encoding="utf-8") as output_testing: