Exemple #1
0
def get_subscriptions_page():
    page = int(request.args.get('page', 1))
    with open_database() as connection:
        with connection as cursor:
            tag = request.args.get('tag', None)
            videos, number_of_videos_in_db = _get_videos(cursor, 60, (page - 1)*60, tag)
            for video in videos:
                video['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + video['id'] + '.jpg'
                video['type'] = 'video'
                video['item_size'] = 'small'
                util.add_extra_html_info(video)

            tags = _get_all_tags(cursor)


            subscription_list = []
            for channel_name, channel_id, muted in _get_subscribed_channels(cursor):
                subscription_list.append({
                    'channel_url': util.URL_ORIGIN + '/channel/' + channel_id,
                    'channel_name': channel_name,
                    'channel_id': channel_id,
                    'muted': muted,
                })

    return flask.render_template('subscriptions.html',
        header_playlist_names = local_playlist.get_playlist_names(),
        videos = videos,
        num_pages = math.ceil(number_of_videos_in_db/60),
        parameters_dictionary = request.args,
        tags = tags,
        current_tag = tag,
        subscription_list = subscription_list,
    )
def get_local_playlist_videos(name, offset=0, amount=50):
    try:
        thumbnails = set(os.listdir(os.path.join(thumbnails_directory, name)))
    except FileNotFoundError:
        thumbnails = set()
    missing_thumbnails = []

    videos = []
    with open(os.path.join(playlists_directory, name + ".txt"),
              'r',
              encoding='utf-8') as file:
        data = file.read()
    videos_json = data.splitlines()
    for video_json in videos_json:
        try:
            info = json.loads(video_json)
            if info['id'] + ".jpg" in thumbnails:
                info[
                    'thumbnail'] = "/youtube.com/data/playlist_thumbnails/" + name + "/" + info[
                        'id'] + ".jpg"
            else:
                info['thumbnail'] = util.get_thumbnail_url(info['id'])
                missing_thumbnails.append(info['id'])
            info['type'] = 'video'
            util.add_extra_html_info(info)
            videos.append(info)
        except json.decoder.JSONDecodeError:
            if not video_json.strip() == '':
                print('Corrupt playlist video entry: ' + video_json)
    gevent.spawn(util.download_thumbnails,
                 os.path.join(thumbnails_directory, name), missing_thumbnails)
    return videos[offset:offset + amount], len(videos)
def add_extra_info_to_videos(videos, playlist_name):
    '''Adds extra information necessary for rendering the video item HTML

    Downloads missing thumbnails'''
    try:
        thumbnails = set(
            os.listdir(os.path.join(thumbnails_directory, playlist_name)))
    except FileNotFoundError:
        thumbnails = set()
    missing_thumbnails = []

    for video in videos:
        video['type'] = 'video'
        util.add_extra_html_info(video)
        if video['id'] + '.jpg' in thumbnails:
            video['thumbnail'] = (
                '/https://youtube.com/data/playlist_thumbnails/' +
                playlist_name + '/' + video['id'] + '.jpg')
        else:
            video['thumbnail'] = util.get_thumbnail_url(video['id'])
            missing_thumbnails.append(video['id'])

    gevent.spawn(util.download_thumbnails,
                 os.path.join(thumbnails_directory, playlist_name),
                 missing_thumbnails)
Exemple #4
0
def post_process_channel_info(info):
    info['avatar'] = util.prefix_url(info['avatar'])
    info['channel_url'] = util.prefix_url(info['channel_url'])
    for item in info['items']:
        util.prefix_urls(item)
        util.add_extra_html_info(item)
    if info['current_tab'] == 'about':
        for i, (text, url) in enumerate(info['links']):
            if util.YOUTUBE_URL_RE.fullmatch(url):
                info['links'][i] = (text, util.prefix_url(url))
Exemple #5
0
def get_playlist_page():
    if 'list' not in request.args:
        abort(400)

    playlist_id = request.args.get('list')
    page = request.args.get('page', '1')

    if page == '1':
        first_page_json = playlist_first_page(playlist_id)
        this_page_json = first_page_json
    else:
        tasks = (gevent.spawn(playlist_first_page,
                              playlist_id,
                              report_text="Retrieved playlist info"),
                 gevent.spawn(get_videos, playlist_id, page))
        gevent.joinall(tasks)
        util.check_gevent_exceptions(*tasks)
        first_page_json, this_page_json = tasks[0].value, tasks[1].value

    info = yt_data_extract.extract_playlist_info(this_page_json)
    if info['error']:
        return flask.render_template('error.html', error_message=info['error'])

    if page != '1':
        info['metadata'] = yt_data_extract.extract_playlist_metadata(
            first_page_json)

    util.prefix_urls(info['metadata'])
    for item in info.get('items', ()):
        util.prefix_urls(item)
        util.add_extra_html_info(item)
        if 'id' in item:
            item[
                'thumbnail'] = settings.img_prefix + 'https://i.ytimg.com/vi/' + item[
                    'id'] + '/default.jpg'

        item['url'] += '&list=' + playlist_id
        if item['index']:
            item['url'] += '&index=' + str(item['index'])

    video_count = yt_data_extract.deep_get(info, 'metadata', 'video_count')
    if video_count is None:
        video_count = 40

    return flask.render_template(
        'playlist.html',
        header_playlist_names=local_playlist.get_playlist_names(),
        video_list=info.get('items', []),
        num_pages=math.ceil(video_count / 20),
        parameters_dictionary=request.args,
        **info['metadata']).encode('utf-8')
Exemple #6
0
def get_search_page():
    if len(request.args) == 0:
        return flask.render_template('base.html', title="Search")

    if 'query' not in request.args:
        abort(400)

    query = request.args.get("query")
    page = request.args.get("page", "1")
    autocorrect = int(request.args.get("autocorrect", "1"))
    sort = int(request.args.get("sort", "0"))
    filters = {}
    filters['time'] = int(request.args.get("time", "0"))
    filters['type'] = int(request.args.get("type", "0"))
    filters['duration'] = int(request.args.get("duration", "0"))
    polymer_json = get_search_json(query, page, autocorrect, sort, filters)

    search_info = yt_data_extract.extract_search_info(polymer_json)
    if search_info['error']:
        return flask.render_template('error.html',
                                     error_message=search_info['error'])

    for extract_item_info in search_info['items']:
        util.prefix_urls(extract_item_info)
        util.add_extra_html_info(extract_item_info)

    corrections = search_info['corrections']
    if corrections['type'] == 'did_you_mean':
        corrected_query_string = request.args.to_dict(flat=False)
        corrected_query_string['query'] = [corrections['corrected_query']]
        corrections[
            'corrected_query_url'] = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(
                corrected_query_string, doseq=True)
    elif corrections['type'] == 'showing_results_for':
        no_autocorrect_query_string = request.args.to_dict(flat=False)
        no_autocorrect_query_string['autocorrect'] = ['0']
        no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(
            no_autocorrect_query_string, doseq=True)
        corrections['original_query_url'] = no_autocorrect_query_url

    return flask.render_template(
        'search.html',
        header_playlist_names=local_playlist.get_playlist_names(),
        query=query,
        estimated_results=search_info['estimated_results'],
        estimated_pages=search_info['estimated_pages'],
        corrections=search_info['corrections'],
        results=search_info['items'],
        parameters_dictionary=request.args,
    )
Exemple #7
0
def post_process_channel_info(info):
    info['avatar'] = util.prefix_url(info['avatar'])
    info['channel_url'] = util.prefix_url(info['channel_url'])
    for item in info['items']:
        util.prefix_urls(item)
        util.add_extra_html_info(item)
Exemple #8
0
def get_watch_page(video_id=None):
    video_id = request.args.get('v') or video_id
    if not video_id:
        return flask.render_template('error.html',
                                     error_message='Missing video id'), 404
    if len(video_id) < 11:
        return flask.render_template(
            'error.html',
            error_message='Incomplete video id (too short): ' + video_id), 404

    lc = request.args.get('lc', '')
    playlist_id = request.args.get('list')
    index = request.args.get('index')
    use_invidious = bool(int(request.args.get('use_invidious', '1')))
    tasks = (gevent.spawn(comments.video_comments,
                          video_id,
                          int(settings.default_comment_sorting),
                          lc=lc),
             gevent.spawn(extract_info,
                          video_id,
                          use_invidious,
                          playlist_id=playlist_id,
                          index=index))
    gevent.joinall(tasks)
    util.check_gevent_exceptions(tasks[1])
    comments_info, info = tasks[0].value, tasks[1].value

    if info['error']:
        return flask.render_template('error.html', error_message=info['error'])

    video_info = {
        "duration": util.seconds_to_timestamp(info["duration"] or 0),
        "id": info['id'],
        "title": info['title'],
        "author": info['author'],
    }

    for item in info['related_videos']:
        util.prefix_urls(item)
        util.add_extra_html_info(item)

    if info['playlist']:
        playlist_id = info['playlist']['id']
        for item in info['playlist']['items']:
            util.prefix_urls(item)
            util.add_extra_html_info(item)
            if playlist_id:
                item['url'] += '&list=' + playlist_id
            if item['index']:
                item['url'] += '&index=' + str(item['index'])
        info['playlist']['author_url'] = util.prefix_url(
            info['playlist']['author_url'])

    if settings.gather_googlevideo_domains:
        with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'),
                  'a+',
                  encoding='utf-8') as f:
            url = info['formats'][0]['url']
            subdomain = url[0:url.find(".googlevideo.com")]
            f.write(subdomain + "\n")

    download_formats = []

    for format in (info['formats'] + info['hls_formats']):
        if format['acodec'] and format['vcodec']:
            codecs_string = format['acodec'] + ', ' + format['vcodec']
        else:
            codecs_string = format['acodec'] or format['vcodec'] or '?'
        download_formats.append({
            'url': format['url'],
            'ext': format['ext'] or '?',
            'audio_quality': audio_quality_string(format),
            'video_quality': video_quality_string(format),
            'file_size': format_bytes(format['file_size']),
            'codecs': codecs_string,
        })

    video_sources = get_video_sources(info)
    video_height = yt_data_extract.deep_get(video_sources,
                                            0,
                                            'height',
                                            default=360)
    video_width = yt_data_extract.deep_get(video_sources,
                                           0,
                                           'width',
                                           default=640)
    # 1 second per pixel, or the actual video width
    theater_video_target_width = max(640, info['duration'] or 0, video_width)

    # Check for false determination of disabled comments, which comes from
    # the watch page. But if we got comments in the separate request for those,
    # then the determination is wrong.
    if info['comments_disabled'] and len(comments_info['comments']) != 0:
        info['comments_disabled'] = False
        print('Warning: False determination that comments are disabled')
        print('Comment count:', info['comment_count'])
        info['comment_count'] = None  # hack to make it obvious there's a bug

    return flask.render_template(
        'watch.html',
        header_playlist_names=local_playlist.get_playlist_names(),
        uploader_channel_url=('/' + info['author_url'])
        if info['author_url'] else '',
        time_published=info['time_published'],
        view_count=(lambda x: '{:,}'.format(x)
                    if x is not None else "")(info.get("view_count", None)),
        like_count=(lambda x: '{:,}'.format(x)
                    if x is not None else "")(info.get("like_count", None)),
        dislike_count=(lambda x: '{:,}'.format(x)
                       if x is not None else "")(info.get(
                           "dislike_count", None)),
        download_formats=download_formats,
        video_info=json.dumps(video_info),
        video_sources=video_sources,
        hls_formats=info['hls_formats'],
        subtitle_sources=get_subtitle_sources(info),
        related=info['related_videos'],
        playlist=info['playlist'],
        music_list=info['music_list'],
        music_attributes=get_ordered_music_list_attributes(info['music_list']),
        comments_info=comments_info,
        comment_count=info['comment_count'],
        comments_disabled=info['comments_disabled'],
        theater_mode=settings.theater_mode,
        related_videos_mode=settings.related_videos_mode,
        comments_mode=settings.comments_mode,
        video_height=video_height,
        theater_video_target_width=theater_video_target_width,
        title=info['title'],
        uploader=info['author'],
        description=info['description'],
        unlisted=info['unlisted'],
        limited_state=info['limited_state'],
        age_restricted=info['age_restricted'],
        live=info['live'],
        playability_error=info['playability_error'],
        allowed_countries=info['allowed_countries'],
        ip_address=info['ip_address'] if settings.route_tor else None,
        invidious_used=info['invidious_used'],
        invidious_reload_button=info['invidious_reload_button'],
        video_url=util.URL_ORIGIN + '/watch?v=' + video_id,
    )
Exemple #9
0
def get_watch_page(video_id=None):
    video_id = request.args.get('v') or video_id
    if not video_id:
        return flask.render_template('error.html',
                                     error_message='Missing video id'), 404
    if len(video_id) < 11:
        return flask.render_template(
            'error.html',
            error_message='Incomplete video id (too short): ' + video_id), 404

    time_start_str = request.args.get('t', '0s')
    time_start = 0
    if re.fullmatch(r'(\d+(h|m|s))+', time_start_str):
        for match in re.finditer(r'(\d+)(h|m|s)', time_start_str):
            time_start += int(match.group(1)) * time_table[match.group(2)]
    elif re.fullmatch(r'\d+', time_start_str):
        time_start = int(time_start_str)

    lc = request.args.get('lc', '')
    playlist_id = request.args.get('list')
    index = request.args.get('index')
    use_invidious = bool(int(request.args.get('use_invidious', '1')))
    tasks = (gevent.spawn(comments.video_comments,
                          video_id,
                          int(settings.default_comment_sorting),
                          lc=lc),
             gevent.spawn(extract_info,
                          video_id,
                          use_invidious,
                          playlist_id=playlist_id,
                          index=index))
    gevent.joinall(tasks)
    util.check_gevent_exceptions(tasks[1])
    comments_info, info = tasks[0].value, tasks[1].value

    if info['error']:
        return flask.render_template('error.html', error_message=info['error'])

    video_info = {
        "duration": util.seconds_to_timestamp(info["duration"] or 0),
        "id": info['id'],
        "title": info['title'],
        "author": info['author'],
    }

    # prefix urls, and other post-processing not handled by yt_data_extract
    for item in info['related_videos']:
        util.prefix_urls(item)
        util.add_extra_html_info(item)
    if info['playlist']:
        playlist_id = info['playlist']['id']
        for item in info['playlist']['items']:
            util.prefix_urls(item)
            util.add_extra_html_info(item)
            if playlist_id:
                item['url'] += '&list=' + playlist_id
            if item['index']:
                item['url'] += '&index=' + str(item['index'])
        info['playlist']['author_url'] = util.prefix_url(
            info['playlist']['author_url'])
    # Don't prefix hls_formats for now because the urls inside the manifest
    # would need to be prefixed as well.
    for fmt in info['formats']:
        fmt['url'] = util.prefix_url(fmt['url'])

    # Add video title to end of url path so it has a filename other than just
    # "videoplayback" when downloaded
    title = urllib.parse.quote(util.to_valid_filename(info['title'] or ''))
    for fmt in info['formats']:
        filename = title
        ext = fmt.get('ext')
        if ext:
            filename += '.' + ext
        fmt['url'] = fmt['url'].replace('/videoplayback',
                                        '/videoplayback/name/' + filename)

    if settings.gather_googlevideo_domains:
        with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'),
                  'a+',
                  encoding='utf-8') as f:
            url = info['formats'][0]['url']
            subdomain = url[0:url.find(".googlevideo.com")]
            f.write(subdomain + "\n")

    download_formats = []

    for format in (info['formats'] + info['hls_formats']):
        if format['acodec'] and format['vcodec']:
            codecs_string = format['acodec'] + ', ' + format['vcodec']
        else:
            codecs_string = format['acodec'] or format['vcodec'] or '?'
        download_formats.append({
            'url': format['url'],
            'ext': format['ext'] or '?',
            'audio_quality': audio_quality_string(format),
            'video_quality': video_quality_string(format),
            'file_size': format_bytes(format['file_size']),
            'codecs': codecs_string,
        })

    video_sources = get_video_sources(info, tor_bypass=info['tor_bypass_used'])
    video_height = yt_data_extract.deep_get(video_sources,
                                            0,
                                            'height',
                                            default=360)
    video_width = yt_data_extract.deep_get(video_sources,
                                           0,
                                           'width',
                                           default=640)
    # 1 second per pixel, or the actual video width
    theater_video_target_width = max(640, info['duration'] or 0, video_width)

    # Check for false determination of disabled comments, which comes from
    # the watch page. But if we got comments in the separate request for those,
    # then the determination is wrong.
    if info['comments_disabled'] and len(comments_info['comments']) != 0:
        info['comments_disabled'] = False
        print('Warning: False determination that comments are disabled')
        print('Comment count:', info['comment_count'])
        info['comment_count'] = None  # hack to make it obvious there's a bug

    # captions and transcript
    subtitle_sources = get_subtitle_sources(info)
    other_downloads = []
    for source in subtitle_sources:
        best_caption_parse = urllib.parse.urlparse(source['url'].lstrip('/'))
        transcript_url = (util.URL_ORIGIN + '/watch/transcript' +
                          best_caption_parse.path + '?' +
                          best_caption_parse.query)
        other_downloads.append({
            'label': 'Video Transcript: ' + source['label'],
            'ext': 'txt',
            'url': transcript_url
        })

    return flask.render_template(
        'watch.html',
        header_playlist_names=local_playlist.get_playlist_names(),
        uploader_channel_url=('/' + info['author_url'])
        if info['author_url'] else '',
        time_published=info['time_published'],
        view_count=(lambda x: '{:,}'.format(x)
                    if x is not None else "")(info.get("view_count", None)),
        like_count=(lambda x: '{:,}'.format(x)
                    if x is not None else "")(info.get("like_count", None)),
        dislike_count=(lambda x: '{:,}'.format(x)
                       if x is not None else "")(info.get(
                           "dislike_count", None)),
        download_formats=download_formats,
        other_downloads=other_downloads,
        video_info=json.dumps(video_info),
        video_sources=video_sources,
        hls_formats=info['hls_formats'],
        subtitle_sources=subtitle_sources,
        related=info['related_videos'],
        playlist=info['playlist'],
        music_list=info['music_list'],
        music_attributes=get_ordered_music_list_attributes(info['music_list']),
        comments_info=comments_info,
        comment_count=info['comment_count'],
        comments_disabled=info['comments_disabled'],
        video_height=video_height,
        video_width=video_width,
        theater_video_target_width=theater_video_target_width,
        title=info['title'],
        uploader=info['author'],
        description=info['description'],
        unlisted=info['unlisted'],
        limited_state=info['limited_state'],
        age_restricted=info['age_restricted'],
        live=info['live'],
        playability_error=info['playability_error'],
        allowed_countries=info['allowed_countries'],
        ip_address=info['ip_address'] if settings.route_tor else None,
        invidious_used=info['invidious_used'],
        invidious_reload_button=info['invidious_reload_button'],
        video_url=util.URL_ORIGIN + '/watch?v=' + video_id,
        time_start=time_start,
        js_data={
            'video_id': video_info['id'],
        })