Ejemplo n.º 1
0
def request_comments(ctoken, replies=False):
    if replies:  # let's make it use different urls for no reason despite all the data being encoded
        base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
    else:
        base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
    url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"

    for i in range(0, 8):  # don't retry more than 8 times
        content = util.fetch_url(url,
                                 headers=mobile_headers,
                                 report_text="Retrieved comments",
                                 debug_name='request_comments')
        if content[
                0:
                4] == b")]}'":  # random closing characters included at beginning of response for some reason
            content = content[4:]
        elif content[
                0:
                10] == b'\n<!DOCTYPE':  # occasionally returns html instead of json for no reason
            content = b''
            print("got <!DOCTYPE>, retrying")
            continue
        break

    polymer_json = json.loads(util.uppercase_escape(content.decode('utf-8')))
    return polymer_json
Ejemplo n.º 2
0
def playlist_first_page(playlist_id, report_text="Retrieved playlist"):
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
    content = util.fetch_url(url,
                             util.mobile_ua + headers_1,
                             report_text=report_text,
                             debug_name='playlist_first_page')
    content = json.loads(util.uppercase_escape(content.decode('utf-8')))

    return content
Ejemplo n.º 3
0
def playlist_first_page(playlist_id, report_text="Retrieved playlist"):
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
    content = util.fetch_url(url,
                             util.mobile_ua + headers_1,
                             report_text=report_text)
    '''with open('debug/playlist_debug', 'wb') as f:
        f.write(content)'''
    content = json.loads(util.uppercase_escape(content.decode('utf-8')))

    return content
Ejemplo n.º 4
0
def get_videos(playlist_id, page):

    url = "https://m.youtube.com/playlist?ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1"
    headers = {
        'User-Agent': '  Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.5',
        'X-YouTube-Client-Name': '2',
        'X-YouTube-Client-Version': '2.20180508',
    }

    content = util.fetch_url(url, headers, report_text="Retrieved playlist", debug_name='playlist_videos')

    info = json.loads(util.uppercase_escape(content.decode('utf-8')))
    return info
Ejemplo n.º 5
0
def parse_comments_ajax(content, replies=False):
    try:
        content = json.loads(util.uppercase_escape(content.decode('utf-8')))
        #print(content)
        comments_raw = content['content']['continuation_contents']['contents']
        ctoken = util.default_multi_get(content,
                                        'content',
                                        'continuation_contents',
                                        'continuations',
                                        0,
                                        'continuation',
                                        default='')

        comments = []
        for comment_raw in comments_raw:
            replies_url = ''
            if not replies:
                if comment_raw['replies'] is not None:
                    reply_ctoken = comment_raw['replies']['continuations'][0][
                        'continuation']
                    comment_id, video_id = get_ids(reply_ctoken)
                    replies_url = util.URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id
                comment_raw = comment_raw['comment']
            comment = {
                'author': comment_raw['author']['runs'][0]['text'],
                'author_url': comment_raw['author_endpoint']['url'],
                'author_channel_id': '',
                'author_id': '',
                'author_avatar': comment_raw['author_thumbnail']['url'],
                'likes': comment_raw['like_count'],
                'published': comment_raw['published_time']['runs'][0]['text'],
                'text': comment_raw['content']['runs'],
                'reply_count': '',
                'replies_url': replies_url,
            }
            comments.append(comment)
    except Exception as e:
        print('Error parsing comments: ' + str(e))
        comments = ()
        ctoken = ''

    return {'ctoken': ctoken, 'comments': comments}
Ejemplo n.º 6
0
def parse_comments_polymer(content, replies=False):
    try:
        video_title = ''
        content = json.loads(util.uppercase_escape(content.decode('utf-8')))
        url = content[1]['url']
        ctoken = urllib.parse.parse_qs(url[url.find('?') + 1:])['ctoken'][0]
        video_id = ctoken_metadata(ctoken)['video_id']
        #print(content)
        try:
            comments_raw = content[1]['response']['continuationContents'][
                'commentSectionContinuation']['items']
        except KeyError:
            comments_raw = content[1]['response']['continuationContents'][
                'commentRepliesContinuation']['contents']
            replies = True

        ctoken = util.default_multi_get(content,
                                        1,
                                        'response',
                                        'continuationContents',
                                        'commentSectionContinuation',
                                        'continuations',
                                        0,
                                        'nextContinuationData',
                                        'continuation',
                                        default='')

        comments = []
        for comment_raw in comments_raw:
            replies_url = ''
            view_replies_text = ''
            try:
                comment_raw = comment_raw['commentThreadRenderer']
            except KeyError:
                pass
            else:
                if 'commentTargetTitle' in comment_raw:
                    video_title = comment_raw['commentTargetTitle']['runs'][0][
                        'text']

                parent_id = comment_raw['comment']['commentRenderer'][
                    'commentId']
                # TODO: move this stuff into the comments_html function
                if 'replies' in comment_raw:
                    #reply_ctoken = comment_raw['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']['continuation']
                    #comment_id, video_id = get_ids(reply_ctoken)
                    replies_url = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id
                    view_replies_text = yt_data_extract.get_plain_text(
                        comment_raw['replies']['commentRepliesRenderer']
                        ['moreText'])
                    match = reply_count_regex.search(view_replies_text)
                    if match is None:
                        view_replies_text = '1 reply'
                    else:
                        view_replies_text = match.group(1) + " replies"
                elif not replies:
                    view_replies_text = "Reply"
                    replies_url = util.URL_ORIGIN + '/post_comment?parent_id=' + parent_id + "&video_id=" + video_id
                comment_raw = comment_raw['comment']

            comment_raw = comment_raw['commentRenderer']
            comment = {
                'author_id':
                comment_raw.get('authorId', ''),
                'author_avatar':
                comment_raw['authorThumbnail']['thumbnails'][0]['url'],
                'likes':
                comment_raw['likeCount'],
                'published':
                yt_data_extract.get_plain_text(
                    comment_raw['publishedTimeText']),
                'text':
                comment_raw['contentText'].get('runs', ''),
                'view_replies_text':
                view_replies_text,
                'replies_url':
                replies_url,
                'video_id':
                video_id,
                'comment_id':
                comment_raw['commentId'],
            }

            if 'authorText' in comment_raw:  # deleted channels have no name or channel link
                comment['author'] = yt_data_extract.get_plain_text(
                    comment_raw['authorText'])
                comment['author_url'] = comment_raw['authorEndpoint'][
                    'commandMetadata']['webCommandMetadata']['url']
                comment['author_channel_id'] = comment_raw['authorEndpoint'][
                    'browseEndpoint']['browseId']
            else:
                comment['author'] = ''
                comment['author_url'] = ''
                comment['author_channel_id'] = ''

            comments.append(comment)
    except Exception as e:
        print('Error parsing comments: ' + str(e))
        comments = ()
        ctoken = ''

    return {'ctoken': ctoken, 'comments': comments, 'video_title': video_title}
Ejemplo n.º 7
0
def parse_comments_polymer(content):
    try:
        video_title = ''
        content = json.loads(util.uppercase_escape(content.decode('utf-8')))
        url = content[1]['url']
        ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0]
        metadata = ctoken_metadata(ctoken)

        try:
            comments_raw = content[1]['response']['continuationContents']['commentSectionContinuation']['items']
        except KeyError:
            comments_raw = content[1]['response']['continuationContents']['commentRepliesContinuation']['contents']

        ctoken = util.default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='')

        comments = []
        for comment_json in comments_raw:
            number_of_replies = 0
            try:
                comment_thread = comment_json['commentThreadRenderer']
            except KeyError:
                comment_renderer = comment_json['commentRenderer']
            else:
                if 'commentTargetTitle' in comment_thread:
                    video_title = comment_thread['commentTargetTitle']['runs'][0]['text']

                if 'replies' in comment_thread:
                    view_replies_text = yt_data_extract.get_plain_text(comment_thread['replies']['commentRepliesRenderer']['moreText'])
                    view_replies_text = view_replies_text.replace(',', '')
                    match = re.search(r'(\d+)', view_replies_text)
                    if match is None:
                        number_of_replies = 1
                    else:
                        number_of_replies = int(match.group(1))
                comment_renderer = comment_thread['comment']['commentRenderer']

            comment = {
                'author_id': comment_renderer.get('authorId', ''),
                'author_avatar': comment_renderer['authorThumbnail']['thumbnails'][0]['url'],
                'likes': comment_renderer['likeCount'],
                'published': yt_data_extract.get_plain_text(comment_renderer['publishedTimeText']),
                'text': comment_renderer['contentText'].get('runs', ''),
                'number_of_replies': number_of_replies,
                'comment_id': comment_renderer['commentId'],
            }

            if 'authorText' in comment_renderer:     # deleted channels have no name or channel link
                comment['author'] = yt_data_extract.get_plain_text(comment_renderer['authorText'])
                comment['author_url'] = comment_renderer['authorEndpoint']['commandMetadata']['webCommandMetadata']['url']
                comment['author_channel_id'] = comment_renderer['authorEndpoint']['browseEndpoint']['browseId']
            else:
                comment['author'] = ''
                comment['author_url'] = ''
                comment['author_channel_id'] = ''

            comments.append(comment)
    except Exception as e:
        print('Error parsing comments: ' + str(e))
        comments = ()
        ctoken = ''

    return {
        'ctoken': ctoken,
        'comments': comments,
        'video_title': video_title,
        'video_id': metadata['video_id'],
        'offset': metadata['offset'],
        'is_replies': metadata['is_replies'],
        'sort': metadata['sort'],
    }