Beispiel #1
0
def scribe(video_id: str, youtube_api_key: str):

    if not youtube_api_key:
        log.error(
            'You need to provide an API key either by --youtube-api0key or by setting YOUTUBE_API_KEY'
        )
        sys.exit(1)

    api = Api(api_key=youtube_api_key)
    video_metadata = api.get_video_by_id(video_id=video_id).items[0]

    #log.debug(json.dumps(video_metadata.to_dict(), sort_keys=True, indent=2))

    title = video_metadata.snippet.title
    preview_image_path = get_preview_image(
        img_url=video_metadata.snippet.thumbnails.default.url,
        video_id=video_id)
    description = video_metadata.snippet.description
    date = datetime.datetime.strptime(video_metadata.snippet.publishedAt,
                                      "%Y-%m-%dT%H:%M:%S%z")
    captions = YouTubeTranscriptApi.get_transcript(video_id)

    print(
        gen_markdown_page(video_id=video_id,
                          title=title,
                          image_path=preview_image_path,
                          description=description,
                          date=date,
                          captions=captions))
def get_all_comments(YOUTUBE_API_KEY,
                     query,
                     count_video=10,
                     limit=30,
                     maxResults=10,
                     nextPageToken=''):
    """
    Выгрузка maxResults комментариев
    """
    api = Api(api_key=YOUTUBE_API_KEY)
    video_by_keywords = api.search_by_keywords(q=query,
                                               search_type=["video"],
                                               count=count_video,
                                               limit=limit)
    videoId = [x.id.videoId for x in video_by_keywords.items]

    comments_all = []
    for id_video in videoId:
        try:
            data = get_data(YOUTUBE_API_KEY,
                            id_video,
                            maxResults=maxResults,
                            nextPageToken=nextPageToken)
            comment = list(get_text_of_comment(data))
            comments_all.append(comment)
        except:
            continue
    comments = sum(comments_all, [])
    return comments
Beispiel #3
0
def get_channel_info(username):
    api = Api(api_key='redacted')
    channel_by_name = api.get_channel_info(channel_name=username)
    try:
        response = channel_by_name.items[0].to_dict()
    except:
        return 'User doesnt exist'
    channel_name = response["snippet"]["localized"]["title"]
    created_at = response["snippet"]["publishedAt"]
    pfp_url = response["snippet"]["thumbnails"]["default"]["url"]
    view_count = response["statistics"]["viewCount"]
    subscribers = response["statistics"]["subscriberCount"]
    video_count = response["statistics"]["videoCount"]
    userid = response["id"]
    description = response["brandingSettings"]["channel"]["description"]
    match = re.findall(r'[\w\.-]+@[\w\.-]+', str(description))
    if len(match) > 0:
        emails = ','.join(match)
    else:
        emails = "None found"
    data = {
        "name": channel_name,
        "created_at": parser.parse(created_at).timestamp(),
        "pfp_url": pfp_url,
        "total_views": view_count,
        "subsribers": subscribers,
        "video_count": video_count,
        "userid": userid,
        "emails": emails,
        "url": f'https://www.youtube.com/channel/{userid}'
    }
    return data
def get_amount_of_videos_uploaded():
    """
    :return: Returns the amount of videos vakantie vincent has uploaded.
    """
    api = Api(api_key=get_youtube_api_key())
    channel_by_id = api.get_channel_info(channel_id=channel_id)
    return channel_by_id.items[0].to_dict()["statistics"]["videoCount"]
Beispiel #5
0
 def get_latest_video(self, youtubeChannelName):
     res = {}
     apiKey = 'AIzaSyBOCLFDDz4wHFmatH-fPxsjjRnBfPzcOFQ'
     try:
         api = YoutubeApi(api_key=apiKey)
         r = api.search_by_keywords(q=youtubeChannelName,
                                    search_type=["channel"],
                                    count=2,
                                    limit=2)
         idChannel = r.items[0].snippet.channelId
         link = 'https://www.googleapis.com/youtube/v3/search?key=' + apiKey + '&channelId=' + idChannel + '&part=snippet,id&order=date&maxResults=2'
         resp = requests.get(url=link)
         data = resp.json()
         if (data):
             res['result'] = data['items'][0]
         else:
             res['result'] = {
                 'error': 'Youtube url not updated',
                 'msg': 'Channel not found.'
             }
         return jsonify(res)
     except Exception as e:
         print("error : get_channel_ID\n", str(e), flush=True)
         res['error'] = "internal error"
         res['message'] = 'Youtube Data API exceded'
         return res
Beispiel #6
0
    def __init__(self, API_KEY, JSON_PATH):
        # Declare an API object using our API_KEY
        self.api = Api(api_key=API_KEY)
        self.data_path = JSON_PATH

        with open(JSON_PATH) as f:
            self.database = json.load(f)
Beispiel #7
0
 def get_number_subscribers_youtube_channel(self, youtubeChannelName):
     res = {}
     apiKey = 'AIzaSyBOCLFDDz4wHFmatH-fPxsjjRnBfPzcOFQ'
     try:
         api = YoutubeApi(api_key=apiKey)
         r = api.search_by_keywords(q=youtubeChannelName,
                                    search_type=["channel"],
                                    count=2,
                                    limit=2)
         idChannel = r.items[0].snippet.channelId
         channel = api.get_channel_info(channel_id=idChannel)
         if (channel.items):
             res['result'] = {
                 'subscriberCount':
                 channel.items[0].statistics.subscriberCount,
                 'channel': channel.items[0].snippet
             }
         else:
             res['result'] = {
                 'error': 'Youtube url not updated',
                 'msg': 'Channel not found.'
             }
         return jsonify(res)
     except Exception as e:
         print("error : get_number_subscribers_youtube_channel\n",
               str(e),
               flush=True)
         res['error'] = "internal error"
         res['message'] = 'Youtube Data API exceded'
         return res
Beispiel #8
0
def youtube(request):
    api = Api(api_key="AIzaSyDHAS3sDLVtUqM1vx-kxykrBHMVSi0BLJI")
    query = request.session['user-input']
    res = api.search_by_keywords(q=query,
                                 search_type=["channel"],
                                 count=25,
                                 limit=8)
    res = res.to_dict()
    res_items = res["items"]
    result = []

    for data in res_items:
        temp = {
            "channel_name":
            data["snippet"]["title"],
            "channel_url":
            "https:/www.youtube.com/channel/" +
            str(data["snippet"]["channelId"]),
            "channel_logo":
            data["snippet"]["thumbnails"]["default"]["url"]
        }

        result.append(temp)

    context = {"result": result, "text": query}

    return render(request, 'youtube.html', context)
Beispiel #9
0
class YTSearch():
    def __init__(self, api_key=API_KEY):
        self.key = api_key
        self.api = Api(api_key=self.key)

    def check_video_eligibility(self, ids, expected_type):

        like_max = {"yt_id": "", "youtube_rating": 0.0}
        for id in ids:
            video = list(
                self.api.get_video_by_id(video_id=id).to_dict().items())
            try:
                vid = video[5][1][0]
                duration = isodate.parse_duration(
                    vid['contentDetails']['duration']).total_seconds()
                definition = vid['contentDetails']['definition']
                like_count = float(vid["statistics"]["likeCount"])
                dislike_count = float(vid["statistics"]["dislikeCount"])
                duration_min = Duration[expected_type][0]
                duration_max = Duration[expected_type][1]
                like_percentage = like_count / (like_count + dislike_count)
                channel_title = vid["snippet"]["channelTitle"]
                if like_percentage > like_max["youtube_rating"] and (duration_min < duration <= duration_max) and \
                        channel_title != 'YouTube Movies':
                    like_max["yt_id"] = id
                    like_max["yt_duration"] = duration
                    like_max["yt_likes"] = like_count
                    like_max["youtube_rating"] = like_percentage
                    like_max["yt_definition"] = definition
            except Exception as e:
                print("ignoring the error")
                continue
        return like_max

    def obsolete_get_stats(self,
                           key_word,
                           expected_type="movie",
                           max_results=10):
        videos = list(
            self.api.search_by_keywords(q=key_word,
                                        search_type=["video"]).items)
        vids = [vid.to_dict()["id"]['videoId'] for vid in videos]
        ret = self.check_video_eligibility(vids, expected_type)
        print(ret)

    def get_youtube_stats(self,
                          key_word,
                          expected_type="movie",
                          max_results=10):
        get_mov_stats = YoutubeSearch(key_word,
                                      max_results=max_results).to_dict()
        ids = [d['id'] for d in get_mov_stats]
        ret = self.check_video_eligibility(ids, expected_type)
        for mov in get_mov_stats:
            if mov["id"] == ret["yt_id"]:
                ret["youtube_link"] = "https://youtube.com" + mov["link"]
        return (ret)
Beispiel #10
0
def process_play_list_step(message):
    try:
        chat_id = message.chat.id
        text = message.text
        if ":" not in text:
            bot.reply_to(message, 'Wrong message format')
            return

        data = str(text).split(':')
        plaid = data[0]
        title = data[1]

        if len(plaid) <= 5 or len(title) <= 5:
            bot.reply_to(message, 'Wrong message format')
            return

        api = Api(api_key=youtube_api_key)
        playlist_item_by_playlist = api.get_playlist_items(playlist_id=plaid,
                                                           count=1000)
        videos = playlist_item_by_playlist.items
        if len(videos) <= 0:
            bot.send_message(message.chat.id,
                             "No Vidoes found for playlist = " + str(title))
            return

        bot.send_message(message.chat.id,
                         "Found " + str(len(videos)) + " videos from youtube")
        real_video_count = len(videos)
        count = 0
        lessons = []
        for video in videos:
            video_by_id = api.get_video_by_id(
                video_id=video.snippet.resourceId.videoId,
                parts=('snippet', 'contentDetails', 'statistics'))
            if len(video_by_id.items) <= 0:
                real_video_count = real_video_count - 1
                continue
            count = count + 1
            item = video_by_id.items[0]
            title = title + " " + formatLessonName(
                count) + " " + item.snippet.title
            time_val = isodate.parse_duration(item.contentDetails.duration)
            code = getYoutubeEmbedCode(video.snippet.resourceId.videoId)
            lesson = Lesson(title, code, time_val)
            lessons.append(lesson)

        bot.send_message(
            message.chat.id,
            "Total avalibale vidoe count = " + str(real_video_count))
        if len(lessons) <= 0:
            bot.send_message(message.chat.id, "No lesson found!")
        loginAndUpdate(lessons, message, plaid)

    except Exception as e:
        bot.reply_to(message, str(e))
Beispiel #11
0
def fetchYoutubeData():
    processedChannelDataID = None

    try:
        successMessage('- Gathering youtube channel & video data...')

        api = Api(api_key=os.getenv('YOUTUBE_DATA_API_KEY'))
        channelById = api.get_channel_info(
            channel_id=os.getenv('YOUTUBE_CHANNEL_ID'))

        successMessage('- Fetched youtube channel & video data...')

        uploadsPlaylistId = channelById.items[
            0].contentDetails.relatedPlaylists.uploads
        allChannelVideos = api.get_playlist_items(
            playlist_id=uploadsPlaylistId, count=30, limit=30)
        successMessage('- Constructing youtube channel & video data...')

        processedData = []
        for video in allChannelVideos.items:
            processedData.append({
                "videoUrl": video.contentDetails.videoId,
                "videoTitle": video.snippet.channelTitle,
                "videoDescription": video.snippet.description,
            })

        successMessage('- Storing youtube video & channel data...')
        processedChannelDataID = saveDataToMongoDB(
            {
                "thumbnail":
                channelById.items[0].snippet.thumbnails.high.url,
                "channelName":
                channelById.items[0].snippet.title,
                "channelDescription":
                channelById.items[0].snippet.description,
                "keywords":
                channelById.items[0].brandingSettings.channel.keywords.split(),
                "resetAt":
                round(time.time())
            }, "youtubeChannelData")
        saveDataToMongoDB(
            {
                "_id": processedChannelDataID,
                "channelName": channelById.items[0].snippet.title,
                "videos": processedData,
                "resetAt": round(time.time()),
                "hasBeenProcessed": False
            }, "youtubeVideoData")
        successMessage('- Completed storing youtube video & channel data...')
    except:
        errorMessage('- An exception occurred')
    else:
        successMessage('- Completed youtube data step... ')

    return processedChannelDataID
Beispiel #12
0
 def get_youtube_statistics(video_ids):
     api_key = frappe.db.get_single_value("Video Settings", "api_key")
     api = Api(api_key=api_key)
     try:
         video = api.get_video_by_id(video_id=video_ids)
         video_stats = video.items
         return video_stats
     except Exception:
         title = "Failed to Update YouTube Statistics"
         frappe.log_error(title + "\n\n" + frappe.get_traceback(),
                          title=title)
Beispiel #13
0
def random_video(mood):
    api = Api(api_key=apikey)

    videos = []
    playlist_item_by_playlist = api.get_playlist_items(
        playlist_id=playlists_by_mood[mood], count=None).items
    for item in iter(playlist_item_by_playlist):
        resource = item.snippet.resourceId
        if resource.kind == 'youtube#video':
            videos.append(resource.videoId)

    video = videos[random.randint(0, len(videos) - 1)]
    return 'https://www.youtube.com/watch?v=' + video
Beispiel #14
0
async def find_first_youtube_match(keyword: str):
    youtube = Api(api_key=os.environ['YOUTUBE_TOKEN'])
    results = youtube.search_by_keywords(
        q=keyword,
        search_type=('video', ),
        count=1,
    ).items

    if len(results):
        msg = f'https://www.youtube.com/watch?v={results[0].id.videoId}'
    else:
        msg = 'I have found nothing'

    return msg
Beispiel #15
0
def get_music_titles(playlistID):
    #Returns the music titles on the youtube's playlist

    api = Api(api_key=YOUTUBE_KEY)

    titles = api.get_playlist_items(playlist_id=playlistID, count=None)
    list_music = []
    for titulo in titles.items:
        musica = titulo.snippet.title.upper()

        list_music.append(musica)

    print(list_music)
    return list_music
Beispiel #16
0
    def set_youtube_statistics(self):
        api_key = frappe.db.get_single_value("Video Settings", "api_key")
        api = Api(api_key=api_key)

        try:
            video = api.get_video_by_id(video_id=self.youtube_video_id)
            video_stats = video.items[0].to_dict().get('statistics')

            self.like_count = video_stats.get('likeCount')
            self.view_count = video_stats.get('viewCount')
            self.dislike_count = video_stats.get('dislikeCount')
            self.comment_count = video_stats.get('commentCount')

        except Exception:
            title = "Failed to Update YouTube Statistics for Video: {0}".format(
                self.name)
            frappe.log_error(title + "\n\n" + frappe.get_traceback(),
                             title=title)
Beispiel #17
0
def GetFridayFeature(ctx):
    api = Api(api_key=API)
    playlist_item_by_playlist = api.get_playlist_items(
        playlist_id="PLLUkxbIkknLuK6BLdOs-QDAJiDQTM7Xei", count=None)

    totalVideos = len(playlist_item_by_playlist.items)
    number = randrange(totalVideos)
    print(number)
    print(playlist_item_by_playlist.items[number].snippet.title)
    video = playlist_item_by_playlist.items[number]
    embedVar = discord.Embed(title=video.snippet.title,
                             url="https://www.youtube.com/watch?v=" +
                             video.contentDetails.videoId,
                             color=0x0ed0f1)
    embedVar.add_field(name="Published",
                       value=video.contentDetails.videoPublishedAt)
    embedVar.set_image(url=video.snippet.thumbnails.standard.url)

    return embedVar
Beispiel #18
0
 def get_channel_ID(self, youtubeChannelName):
     res = {}
     apiKey = 'AIzaSyBOCLFDDz4wHFmatH-fPxsjjRnBfPzcOFQ'
     try:
         api = YoutubeApi(api_key=apiKey)
         r = api.search_by_keywords(q=youtubeChannelName,
                                    search_type=["channel"],
                                    count=2,
                                    limit=2)
         idChannel = r.items[0].snippet.channelId
         if (idChannel):
             res['result'] = idChannel
         else:
             res['result'] = {
                 'error': 'Youtube url not updated',
                 'msg': 'Channel not found.'
             }
         return jsonify(res)
     except Exception as e:
         print("error : get_channel_ID\n", str(e), flush=True)
         res['error'] = "internal error"
         res['message'] = 'Youtube Data API exceded'
         return res
Beispiel #19
0
def main():
    api = Api(
        api_key=API_KEY,
        client_secret=CLIENT_SECRET,
        client_id=CLIENT_ID,
    )

    comments = api.get_comment_threads(all_to_channel_id=CHANNEL_ID,
                                       count=SEARCH_AMOUNT).items
    comments_with_replies = [
        comment for comment in comments if comment.replies
    ]

    for comment in comments_with_replies:
        is_faker = False
        has_already_been_informed = False

        # Check if there is a fake account in a reply
        for reply in comment.replies.comments:
            reply_name = reply.snippet.authorDisplayName
            reply_id = reply.snippet.authorChannelId

            if reply_name.lower() in FAKE_NAMES:
                # Check if it's a faker
                if reply_id.value != CHANNEL_ID:
                    # Faker found!
                    is_faker = True

                    if reply.snippet.textOriginal == NOTIFY_MESSAGE:
                        has_already_been_informed = True

        if is_faker and not has_already_been_informed:
            video_id = comment.snippet.videoId
            comment_id = comment.id
            url = build_comment_link(video_id, comment_id)

            print(f"Faker found on video {url}")
Beispiel #20
0
def get_audio(url=None,
              file_name=None,
              index_file=None,
              since=None,
              limit=None,
              prefix_name=None,
              prefix_num=None):
    api = Api(api_key=conf['api_key'])
    if index_file and os.path.exists(index_file):
        return get_audios_from_indexed_list(api, store_dir, index_file, since,
                                            limit, prefix_name, prefix_num)
    vid = get_video_id(url)
    if not vid:
        return 'Please provide video ID or full URL'
    return get_audio_by_id(api, store_dir, vid, file_name)
Beispiel #21
0
def youtube_data(group):
    """Runs all the YouTube related tasks

    It scrapes data from YouTube for the whole group and the single artists

    Args:
      group: dictionary with the data of the group to scrape

    Returns:
      the same group dictionary with updated data
    """

    print("[{}] Starting tasks...".format(module))
    api = Api(api_key=youtube_api_key)

    # Getting channel data and stats
    channel_data = youtube_get_channel(api, group["youtube"]["url"])
    group["youtube"] = youtube_check_channel_change(group["youtube"],
                                                    channel_data,
                                                    group["hashtags"])

    # Getting video data and stats
    videos = youtube_get_videos(api, group["youtube"]["playlist"],
                                group["youtube"]["name"])
    group["youtube"]["videos"] = youtube_check_videos_change(
        group["name"], group["youtube"]["videos"], videos, group["hashtags"])

    # Getting Youtube data for each member
    for member in group["members"]:
        if "youtube" in member:
            channel_data = youtube_get_channel(api, member["youtube"]["url"])
            member["youtube"] = youtube_check_channel_change(
                member["youtube"], channel_data, member["hashtags"])

            videos = youtube_get_videos(api, member["youtube"]["playlist"],
                                        member["youtube"]["name"])
            member["youtube"]["videos"] = youtube_check_videos_change(
                member["name"], member["youtube"]["videos"], videos,
                member["hashtags"])

    print()
    return group
Beispiel #22
0
 def __init__(self, api_key=API_KEY):
     self.key = api_key
     self.api = Api(api_key=self.key)
Beispiel #23
0
 def __init__(self, bot):
     self.bot: commands.Bot = bot
     self.api: Api = Api(api_key=os.environ.get("YOUTUBEKEY"))
     self.pagetoken = None
     self.randompool = defaultdict(list)
Beispiel #24
0
from urllib.parse import urlparse

import isodate
from django.conf import settings
from pyyoutube import Api

YOUTUBE_KEY = settings.YOUTUBE_KEY

api = Api(api_key=YOUTUBE_KEY)


def get_youtube_data(url):
    if "youtube.com/" in url:
        new_data = {}
        query_data = urlparse(url).query
        video_id = "".join(query_data.split("v=")).split("&")[0]
        video = api.get_video_by_id(video_id=video_id)
        video_data = video.items[0].to_dict()
        title = video_data["snippet"]["title"]
        description = video_data["snippet"]["description"]
        image_thumbnails = video_data["snippet"]["thumbnails"]
        if image_thumbnails["maxres"] is None:
            preview = image_thumbnails["high"]["url"]
        else:
            preview = image_thumbnails["maxres"]["url"]
        duration = isodate.parse_duration(
            video_data["contentDetails"]["duration"]
        )
        author = video_data["snippet"]["channelTitle"]
        date = video_data["snippet"]["publishedAt"]
        new_data.update(
def main(channel_name="YaleCourses", load_from_file=False):
    # find all the videos

    api = Api(api_key="AIzaSyCw0j0aCe0y_T42q3RLBoVtGXlTOMGGaSM")
    # AIzaSyCw0j0aCe0y_T42q3RLBoVtGXlTOMGGaSM

    print("Setup dir to save the transcripts of %s channel" % (channel_name))
    channel_dir = os.path.join(raw_dir, "transcripts", channel_name)
    channel_id_file = os.path.join(raw_dir, "video_ids", channel_name + ".txt")

    if not os.path.exists(channel_dir):
        os.mkdir(channel_dir)
    else:
        print("\tThe folder of the channel %s is already exist\n"
              "\tdelete it before executing this script -"
              "we don't want to override your data" % (channel_name))
        return
    '''
        Since google is blocking after a while the retrival of the IDs,
        We will write the IDs to a file as a buffer for safety.
    '''
    if load_from_file is False:
        print("Retriving %s channel information" % (channel_name))
        channel_by_name = api.get_channel_info(channel_name=channel_name)
        print("\tFetch all the playlists")
        playlists_by_channel = api.get_playlists(
            channel_id=channel_by_name.items[0].id, count=None)
        print("\tFetch all the videos of the playlist")
        playlists_videos = []
        for playlist in playlists_by_channel.items:
            print("\t\tFetching videos IDs of playlist %s" % (playlist.id))
            playlists_videos.append(
                api.get_playlist_items(playlist_id=playlist.id, count=None))

        videos_ids = []
        for playlist in playlists_videos:
            for video in playlist.items:
                videos_ids.append(video.snippet.resourceId.videoId)
        print("We gathered now %s videos, saving save to file" %
              (len(videos_ids)))
        with open(channel_id_file, 'w') as f:
            json.dump(videos_ids, f)
    else:
        with open(channel_id_file, 'r') as f:
            videos_ids = json.load(f)

    print("Save %s channel videos transcripts" % (channel_name))
    #map(save_transcript,videos_ids)
    #[save_transcript(vd) for vd in videos_ids]

    for video_id in videos_ids:
        print("The video ID is %s" % (video_id))
        try:
            transcript_list = YouTubeTranscriptApi.list_transcripts(
                video_id)  #,languages=['en']
            #transcript_list = [transcript for transcript in transcript_list\
            #                   if bool(re.match(transcript.language,"[en]*"))]
            video_transcripts = None
            for transcript in transcript_list:
                # the Transcript object provides metadata properties
                print("Video id : ", transcript.video_id)
                print("\tlanguage : %s , language code : %s" %
                      (transcript.language, transcript.language_code))
                print("\tis_generated: %s, is_translatable: %s" %
                      (transcript.is_generated, transcript.is_translatable))
                if transcript.language_code == 'en' and transcript.is_generated is False:
                    actual_transcript = transcript.fetch()
                    video_transcripts = actual_transcript

            if video_transcripts is not None:
                #print( "Current length json of trancsript is " ,len(transcript))
                video_path = os.path.join(raw_dir, "transcripts", channel_name,
                                          video_id + ".json")
                with open(video_path, 'w') as outfile:
                    json.dump(video_transcripts, outfile)
        except Exception as e:
            print(e)

    print("Finish main")
Beispiel #26
0
from pyyoutube import Api
import sys
sys.path.append("./chat-replay-downloader")
from chat_replay_downloader import get_chat_replay, get_youtube_messages
import vtuber_list
import csv
import dateutil.parser
import datetime
import pytz
import re
import emoji
import pandas as pd
from datetime import timedelta
from collections import Counter

api = Api(api_key='GOOGLE_API_KEY_HERE')

NAMES=['AZki', 'Miko', 'Roboco', 'Sora', 'Suisei', 'Mel', 'Haato', 'Fubuki', 'Matsuri', 'Aki', 'Shion', 'Aqua',
    'Ayame', 'Choco', 'ChocoSub', 'Subaru', 'Korone', 'Mio', 'Okayu', 'Noel', 'Rushia', 'Pekora', 'Flare', 'Marine',
    'Luna', 'Coco', 'Watame', 'Kanata', 'Towa', 'Lamy', 'Nene', 'Botan', 'Polka', 'Calli', 'Kiara', 'Ina', 'Gura',
    'Amelia']

PLAYLIST_IDS=['UU0TXe_LYZ4scaW2XMyi5_kw', 'UU-hM6YJuNYVAmUWxeIr9FeA', 'UUDqI2jOz0weumE8s7paEk6g', 'UUp6993wxpyDPHUpavwDFqgg',
    'UU5CwaMl1eIgY8h02uZw7u8A', 'UUD8HOxPs4Xvsm8H0ZxXGiBw', 'UU1CfXB_kRs3C-zaeTG3oGyg', 'UUdn5BQ06XqgXoAxIhbqw5Rg',
    'UUQ0UDLQCjY0rmuxCDE38FGg', 'UUFTLzh12_nrtzqBPsTCqenA', 'UUXTpFs_3PqI41qX2d9tL2Rw', 'UU1opHUrw8rvnsadT-iGp7Cg',
    'UU7fk0CB07ly8oSl0aqKkqFg', 'UU1suqwovbL1kzsoaZgFZLKg', 'UUp3tgHXw_HI0QMk1K8qh3gQ', 'UUvzGlP9oQwU--Y0r9id_jnA',
    'UUhAnqc_AY5_I3Px5dig3X1Q', 'UUp-5t9SrOQwXMU7iIjQfARg', 'UUvaTdHTWBGv3MKj3KVqJVCw', 'UUdyqAaZDKHXg4Ahi7VENThQ',
    'UUl_gCybOJRIgOXw6Qb4qJzQ', 'UU1DCedRgGHBdm81E1llLhOQ', 'UUvInZx9h3jC2JzsIzoOebWg', 'UUCzUftO8KOVkV4wQG1vkUvg',
    'UUa9Y57gfeY0Zro_noHRVrnw', 'UUS9uQI-jC3DE0L4IpXyvr6w', 'UUqm3BQLlJfvkTsX_hvm0UmA', 'UUZlDXzGoo7d44bwdNObFacg',
    'UU1uv2Oq6kNxgATlCiez59hw', 'UUFKOVgVbGmX65RxO3EtH3iw', 'UUAWSyEs_Io8MtpY3m-zqILA', 'UUUKD-uaobj9jiqB-VXt71mA',
    'UUK9V2B22uJYu3N7eR_BT9QA', 'UUL_qhgtOy0dy1Agp8vkySQg', 'UUHsx4Hqa-1ORjQTh9TYDhww', 'UUMwGHR0BTZuLsmjY_NT5Pwg',
apikey = 'AIzaSyC053n6_uqpUiOd1X4YfD0Vkx1QcTL-0R8'
playlist = 'PL_MH8gOS_ETiNT1NF8B46JYHZe6fXWfVW'

from pyyoutube import Api
import random
api = Api(api_key=apikey)

videos = []
playlist_item_by_playlist = api.get_playlist_items(playlist_id=playlist, count=None).items
for item in iter(playlist_item_by_playlist):
    resource = item.snippet.resourceId
    if resource.kind == 'youtube#video':
        videos.append(resource.videoId)
random_video = videos[random.randint(0, len(videos) - 1)]
print('https://www.youtube.com/watch?v='+ random_video)
Beispiel #28
0
#!/usr/bin/env python3

from pyyoutube import Api
import pytube
import urllib.request
from googleapiclient.discovery import build

api = Api(api_key='AIzaSyBagf-_AQk4bISDaXIkyD0cGCPZCLHYHuU')
playListId = "PLt0cfLFa-ZYzZB54dKA6EV2McLY26hNGJ"
downLoadCount = 70
playlist_item = api.get_playlist_items(playlist_id=playListId, count = downLoadCount) #222

# Get item id and title
itemList = []
for item in playlist_item.items:
    itemList.append([item.snippet.resourceId.videoId, item.snippet.title])

# Download videos
api_key='AIzaSyBagf-_AQk4bISDaXIkyD0cGCPZCLHYHuU'
youtube = build('youtube', 'v3', developerKey=api_key)

itemCounts = len(itemList)
print("==================================")
print(itemCounts," videos found.")
print("==================================")
count = 0
videoSubFolder = './Video'
for vid, vtitle in itemList:
    count = count + 1
    itemurl = 'https://www.youtube.com/watch?v=' + vid
    print(itemurl)
Beispiel #29
0
import os

import pandas as pd
from pyyoutube import Api

PLAYLIST_ID = "PLvHyFbz_PpaZ7833xPxXXPgSi50phCH-P"

api = Api(api_key=os.environ["GOOGLE_API_KEY"])


def fetch_youtube_data():
    video_list = []
    playlist = api.get_playlist_items(playlist_id=PLAYLIST_ID)
    next_page = playlist.nextPageToken
    video_list.extend(playlist.items)
    while next_page:
        playlist = api.get_playlist_items(playlist_id=PLAYLIST_ID,
                                          page_token=next_page)
        next_page = playlist.nextPageToken
        video_list.extend(playlist.items)

    dates = []
    titles = []
    views = []
    for video in video_list:
        videos = api.get_video_by_id(
            video_id=video.contentDetails.videoId).items
        if videos:
            video = videos[0]
            dates.append(video.snippet.publishedAt)
            titles.append(video.snippet.title)
Beispiel #30
0
class Video_Search_Json:
    """Class to retrieve videos

    This class helps query and search our video database
    using an API YouTube key if needed to pull from Youtube.
    Automatically updates the json file.
    """
    
    def __init__(self, API_KEY, JSON_PATH):
        # Declare an API object using our API_KEY
        self.api = Api(api_key=API_KEY)
        self.data_path = JSON_PATH

        with open(JSON_PATH) as f:
            self.database = json.load(f)


    def search_by_keywords(self, subtopic, channels=[Channel['MIT']], num_videos=10, force_query=False, include_transcripts=True):
        """Searches for videos by subtopic

        Takes in a query string to search for on youtube.
        Returns a JSON.

        Parameters:
        subtopic -- the subtopic to search for in the query
        channels -- a list of Channel enums to specify which channels that must be included (default MIT OpenCourseWare)
        num_vidoes -- minimum number of videos to include (default 10)
        force_query -- whether or not to query regardless of inclusion in json (default False)
        include_transcripts -- whether or not to include transcripts (default True)
        """
        
        if subtopic not in self.database.keys() and not force_query:
            # YouTube retrieve
            self.query_youtube(subtopic, channels, num_videos=num_videos, include_transcripts=include_transcripts)

            self.write_to_json(self.database)
        return self.database[subtopic]
    
    
    def query_youtube(self, subtopic, channels=[], num_videos=5, include_transcripts=True, search_count=50):
        """Query Youtube for a subtopic
        
        Queries the YouTube database for 5 videos pertaining to a certain subtopic. 
        Automatically re-queries if a video doesn't include a transcript if transcripts
        are required.
        
        Parameters:
        subtopic -- the topic to search for
        channels -- specifically which Channel enums to also include from the Channels class (default empty)
        num_videos -- the number of videos to return. Could be more if a channels argument is non-empty (default 5)
        include_transcripts -- requires videos to have transcripts (default True)
        count -- the number if videos to query for everytime (default 50)
        """
        assert num_videos >= 0 # Number of videos cannot be negative

        # Query Youtube and add videos pertaining to the subtopic
        r = self.api.search_by_keywords(q=subtopic, search_type=["video"], count=search_count, limit=search_count, video_caption="any", video_duration=["any"])
        
        # Add list of videos to database
        videos = []

        # Creates deep copy of Channel(s) to keep track of which ones are included
        includes_channels = []
        for channel_enum in channels:
            includes_channels.append(channel_enum.name)

        # Maximum amount of videos to include (one video from each channel plus top 5 from results)
        video_counter = num_videos

        for vid in r.items:
            should_append = False

            # Check to see if max number of videos has been reached
            if video_counter <= 0 and len(includes_channels) == 0:
                break

            # Filter the video from the YouTube API
            filtered_video = self.filter_video_information(vid.to_dict())

            # Conditions for when to add a video
            if filtered_video["channelId"] in includes_channels:
                # Remove minimum one video from channel requirement if video found
                includes_channels.remove(filtered_video["channelId"])
                should_append = True
            elif video_counter > 0:
                # Add video if we still need to add videos to reach minimum number of videos
                should_append = True
            
            if not should_append:
                continue
            
            # Include transcripts if specified
            if include_transcripts and should_append:
                filtered_video["transcript"] = self.get_youtube_transcript(filtered_video["videoId"])
                if filtered_video["transcript"] == None:
                    continue
            
            if should_append:
                # Add in other fields
                filtered_video["url"] = "www.youtube.com/watch?v=" + filtered_video["videoId"]
                filtered_video["source"] = "Youtube"
                filtered_video["difficulty"] = 3 # Default difficulty level

                # Add video to list
                videos.append(filtered_video)
                video_counter -= 1 # Decrement video_counter for minimum number of videos to include

        # Add filtered videos into the database (mutates)
        self.database[subtopic] = videos
    

    def filter_video_information(self, video, keys=["publishedAt", "channelId", "title", "description", "channelTitle", "videoId"]):
        """Filters video dict for certain keys

        Filters a YouTube Video entry to only include a certain number of keys
        specified by a keys list taken from the YouTube API.

        Parameters:
        video -- the video information as a dictionary to filter through
        keys -- the keys to include (default ["publishedAt", "channelId", "title", "description", "channelTitle", "videoId])
        """

        new_video = {}
        self.recur_dict(video, new_video, keys) # Recursively loop through nested dictionaries and put everything on first layer
        return new_video


    def get_youtube_transcript(self, video_id):
        """Returns video's transcript from YouTube

        Returns the video's transcripts given the video_id on YouTube. Returns
        None if no transcript was found. This functionality is included in order
        to check whether or not a video holds a transcript.

        Parameters:
        video_id -- the id of the video on YouTube. Can be found after the "v=" part in the link.
        """
        
        # Try grabbing the raw translation using the YouTubeTranscriptApi
        raw_trans = []
        try:
            raw_trans = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])
        except (Exception):
            print(video_id, "excluded")
            return None
        
        # Use only the text portion of transcript and throwaway time
        transcript = ""
        for i in raw_trans:
            transcript += i["text"] + " "
        return transcript


    def recur_dict(self, data, output, keys_to_include):
        """Recursively loop through nested dict

        Recursive function for reading nested dictionaries and 
        retrieving the keys to a one-layer dictionary.
        """

        # Loop through key value pair in dictionary
        for key, value in data.items():
            if isinstance(value, dict):
                # If the value is a dict, recursively loop
                self.recur_dict(value, output, keys_to_include)
            elif key in keys_to_include:
                # If value is not a dictionary, add to new dict
                output[key] = value

    
    def write_to_json(self, data_dict):
        """Write dictionary to JSON file

        Writes the file into a JSON and includes Exception
        protection and null dictionary protection.
        
        Parameters:
        data_dict -- the dictionary to write to json
        """

        try:
            # Only write data to JSON if it is non Null
            if data_dict:
                with open(self.data_path, 'w') as json_file:
                    json.dump(data_dict, json_file)
        except (json.decoder.JSONDecodeError):
            print("Error Writing to Json File, Dictionary improperly formatted.")