def get_link(): """ Uses youtube-dl to fetch the direct link """ try: url = request.args.get('url') data = decode_data(get_key(), url) vid_id = data['id'] title = data['title'] retval = get_download_link_youtube(vid_id, 'm4a/bestaudio') if not LOCAL: retval = encode_data(get_key(), id=vid_id, title=title, url=retval, length=data['length']) retval = url_for('download_file', url=retval) ret_dict = { 'status': 200, 'requestLocation': '/api/v1/g', 'url': retval } return jsonify(ret_dict) except Exception as e: logger.info(traceback.format_exc()) return make_error_response(msg=str(e), endpoint='/api/v1/g')
def suggest_songs(): try: url = request.args.get('url') decoded_data = decode_data(get_key(), url) vid_id = decoded_data["id"] vids = get_suggestions(vid_id) count = len(vids) return jsonify( { "status": 200, "request_location": "/api/v1/suggest", "metadate": { "count": count }, "results": vids } ) except Exception as e: logger.info(traceback.format_exc()) return make_error_response(msg=str(e), endpoint='/api/v1/suggest')
def stream(): url = request.args.get('url') stream_settings = { 'lo': 'webm[abr<=64]/webm[abr<=80]/m4a[abr<=64]/[abr<=96]/m4a', 'md': 'webm[abr>=64][abr<=96]/[abr>=64][abr<=96]/webm[abr>=96][abr<=128]/webm/m4a', 'hi': 'webm/m4a' } try: req = decode_data(get_key(), url) vid_id = req['id'] quality = req.get('quality', 'md') url = get_or_create_video_download_link( vid_id, stream_settings[quality], get_download_link_youtube ) except Exception as e: logger.info(traceback.format_exc()) return make_error_response(msg=str(e), endpoint='api/v1/stream') return jsonify( status=200, url=url_for( 'stream_handler', url=encode_data(get_key(), url=url) ) )
def generate_data(): r = requests.get(url, stream=True) logger.info('Streaming.. %s (%s bytes)' % ( r.headers.get('content-type'), r.headers.get('content-length') )) for data_chunk in r.iter_content(chunk_size=2048): yield data_chunk
def get_download_link_youtube(vid_id, frmat): """ gets the download link of a youtube video """ command = 'youtube-dl https://www.youtube.com/watch?v=%s -f %s -g' % (vid_id, frmat) logger.info(command) retval = check_output(command.split()) return retval.strip().decode('utf-8')
def download_file(): """ Download the file from the server. First downloads the file on the server using wget and then converts it using ffmpeg """ try: url = request.args.get('url') download_format = request.args.get('format', 'm4a') try: abr = int(request.args.get('bitrate', '128')) abr = abr if abr >= 64 else 128 # Minimum bitrate is 128 except ValueError: abr = 128 download_album_art = request.args.get('cover', 'false').lower() # decode info from url data = decode_data(get_key(), url) vid_id = data['id'] url = data['url'] filename = get_filename_from_title(data['title'], ext='') m4a_path = 'static/%s.m4a' % vid_id mp3_path = 'static/%s.mp3' % vid_id # ^^ vid_id regex is filename friendly [a-zA-Z0-9_-]{11} # download and convert command = 'curl -o %s %s' % (m4a_path, url) check_output(command.split()) if download_album_art == 'true': add_cover(m4a_path, vid_id) if download_format == 'mp3': if extends_length(data['length'], 20 * 60): # sound more than 20 mins raise Exception() command = get_ffmpeg_path() command += ' -i %s -acodec libmp3lame -ab %sk %s -y' % (m4a_path, abr, mp3_path) call(command, shell=True) # shell=True only works, return ret_code with open(mp3_path, 'r') as f: data = f.read() content_type = 'audio/mpeg' # or audio/mpeg3' filename += '.mp3' else: with open(m4a_path, 'r') as f: data = f.read() content_type = 'audio/mp4' filename += '.m4a' response = make_response(data) # set headers # http://stackoverflow.com/questions/93551/how-to-encode-the-filename- response.headers['Content-Disposition'] = 'attachment; filename="%s"' % filename response.headers['Content-Type'] = content_type response.headers['Content-Length'] = str(len(data)) # remove files delete_file(m4a_path) delete_file(mp3_path) # stream return response except Exception as e: logger.info('Error %s' % str(e)) logger.info(traceback.format_exc()) return 'Bad things have happened', 500
def get_or_create_video_download_link(video_id, format, callback): key = 'video:download:%s:%s' % (video_id, format) download_url = redis_client.get(key) if not download_url: logger.info('[Redis] cache miss for %s' % key) download_url = callback(video_id, format) redis_client.set(key, download_url, ex=60 * 60 * 6) # Expires in 6 hours else: logger.info('[Redis] cache hit for %s' % key) download_url = download_url.decode('utf-8') return download_url
def download_file(): """ Download the file from the server. First downloads the file on the server using wget and then converts it using ffmpeg """ try: url = request.args.get('url') # decode info from url data = decode_data(get_key(), url) vid_id = data['id'] url = data['url'] filename = get_filename_from_title(data['title'], ext='') m4a_path = 'static/%s.m4a' % vid_id # ^^ vid_id regex is filename friendly [a-zA-Z0-9_-]{11} # Handle partial request range_header = request.headers.get('Range', None) if range_header: from_bytes, until_bytes = range_header.replace('bytes=', '').split('-') if not until_bytes: until_bytes = int(from_bytes) + int( 1024 * 1024 * 1) # 1MB * 1 = 1MB headers = {'Range': 'bytes=%s-%s' % (from_bytes, until_bytes)} resp = requests.get(url, headers=headers, stream=True) rv = Response(generate_data(resp), 206, mimetype='audio/mp4', direct_passthrough=True) rv.headers.add('Content-Range', resp.headers.get('Content-Range')) rv.headers.add('Content-Disposition', 'attachment; filename="%s"' % filename) rv.headers.add('Content-Length', resp.headers['Content-Length']) return rv resp = requests.get(url, stream=True) filename += '.m4a' response = Response(generate_data(resp), mimetype='audio/mp4') # set headers # http://stackoverflow.com/questions/93551/how-to-encode-the-filename- response.headers.add('Content-Disposition', 'attachment; filename="%s"' % filename) response.headers.add('Content-Length', resp.headers['Content-Length']) return response except Exception as e: logger.info('Error %s' % str(e)) logger.info(traceback.format_exc()) return 'Bad things have happened', 500
def _worker(self, pl): logger.info('Crawling playlist "%s"' % pl[0]) playlist_name = pl[0] playlist_url = pl[1] html = open_page( url=playlist_url, sleep_upper_limit=self.connection_delay, ) song_data = get_trending_videos(html) clear_trending(playlist_name) save_trending_songs(playlist_name, song_data) logger.info('Saved playlist "%s"' % pl[0])
def run(self): """ Run the trending crawler """ for pl in self.playlist: logger.info('Crawling playlist "%s"' % pl[0]) playlist_name = pl[0] playlist_url = pl[1] html = open_page( url=playlist_url, sleep_upper_limit=self.connection_delay, ) song_data = get_trending_videos(html) clear_trending(playlist_name) save_trending_songs(playlist_name, song_data)
def get_trending_videos(html): """ Get trending youtube videos from html """ regex = '<tr.*?data-video-id="(.*?)".*?src="(.*?)".*?<a cl.*?>(.*?)</a>.*?by.*?>(.*?)</a>.*?<span .*?>(.*?)</' raw_results = re.findall( regex, html, re.DOTALL)[:int(environ.get('PLAYLIST_VIDEOS_LIMIT', 100))] vids = [] for raw_result in raw_results: try: url = 'https://www.youtube.com/watch?v=' + raw_result[0] html = open_page(url) vids.append({ 'id': raw_result[0], 'thumb': 'https://img.youtube.com/vi/{0}/0.jpg'.format(raw_result[0]), 'title': html_unescape(raw_result[2].strip().decode('utf-8')), 'uploader': raw_result[3].decode('utf8'), 'length': raw_result[4], 'views': get_views(html), 'get_url': encode_data(get_key(), id=raw_result[0], title=raw_result[2].strip(), length=raw_result[4]), 'description': html_unescape(get_description(html)) }) except Exception as e: logger.info( 'Getting trending video failed. Message: %s, Video: %s' % (str(e), raw_result[0])) return vids
def search(): """ Search youtube and return results """ try: search_term = request.args.get('q') raw_html = get_search_results_html(search_term) vids = get_videos(raw_html) ret_vids = [] for _ in vids: temp = get_video_attrs(_) if temp: temp['get_url'] = '/api/v1' + temp['get_url'] temp['stream_url'] = '/api/v1' + temp['stream_url'] temp['suggest_url'] = temp['get_url'].replace('/g?', '/suggest?', 1) ret_vids.append(temp) ret_dict = make_search_api_response(search_term, ret_vids, '/api/v1/search') except Exception as e: logger.info(traceback.format_exc()) return make_error_response(msg=str(e), endpoint='/api/v1/search') return jsonify(ret_dict)
def find_stream(streams, prefs): """ finds stream by priority streams = streams in descending order of bitrate prefs = [[format, bitrate]] bitrate - assumed to be less than equal to """ final = '' for item in prefs: # fallback case if item[0] == '*': final = streams[0] break # general preferences for stream in streams: if stream.extension == item[0] and int( stream.bitrate.replace('k', '')) <= item[1]: final = stream break if final: break logger.info(final) return final.url
def run(self): command = 'pip install --upgrade youtube-dl' logger.info('{0} Running : {1}'.format(self.name, command)) subprocess.call(command, shell=True)