def get_amount_of_videos_uploaded():
    """
    :return: Returns the amount of videos vakantie vincent has uploaded.
    """
    api = Api(api_key=get_youtube_api_key())
    channel_by_id = api.get_channel_info(channel_id=channel_id)
    return channel_by_id.items[0].to_dict()["statistics"]["videoCount"]
Beispiel #2
0
def get_channel_info(username):
    api = Api(api_key='redacted')
    channel_by_name = api.get_channel_info(channel_name=username)
    try:
        response = channel_by_name.items[0].to_dict()
    except:
        return 'User doesnt exist'
    channel_name = response["snippet"]["localized"]["title"]
    created_at = response["snippet"]["publishedAt"]
    pfp_url = response["snippet"]["thumbnails"]["default"]["url"]
    view_count = response["statistics"]["viewCount"]
    subscribers = response["statistics"]["subscriberCount"]
    video_count = response["statistics"]["videoCount"]
    userid = response["id"]
    description = response["brandingSettings"]["channel"]["description"]
    match = re.findall(r'[\w\.-]+@[\w\.-]+', str(description))
    if len(match) > 0:
        emails = ','.join(match)
    else:
        emails = "None found"
    data = {
        "name": channel_name,
        "created_at": parser.parse(created_at).timestamp(),
        "pfp_url": pfp_url,
        "total_views": view_count,
        "subsribers": subscribers,
        "video_count": video_count,
        "userid": userid,
        "emails": emails,
        "url": f'https://www.youtube.com/channel/{userid}'
    }
    return data
Beispiel #3
0
 def get_number_subscribers_youtube_channel(self, youtubeChannelName):
     res = {}
     apiKey = 'AIzaSyBOCLFDDz4wHFmatH-fPxsjjRnBfPzcOFQ'
     try:
         api = YoutubeApi(api_key=apiKey)
         r = api.search_by_keywords(q=youtubeChannelName,
                                    search_type=["channel"],
                                    count=2,
                                    limit=2)
         idChannel = r.items[0].snippet.channelId
         channel = api.get_channel_info(channel_id=idChannel)
         if (channel.items):
             res['result'] = {
                 'subscriberCount':
                 channel.items[0].statistics.subscriberCount,
                 'channel': channel.items[0].snippet
             }
         else:
             res['result'] = {
                 'error': 'Youtube url not updated',
                 'msg': 'Channel not found.'
             }
         return jsonify(res)
     except Exception as e:
         print("error : get_number_subscribers_youtube_channel\n",
               str(e),
               flush=True)
         res['error'] = "internal error"
         res['message'] = 'Youtube Data API exceded'
         return res
Beispiel #4
0
def fetchYoutubeData():
    processedChannelDataID = None

    try:
        successMessage('- Gathering youtube channel & video data...')

        api = Api(api_key=os.getenv('YOUTUBE_DATA_API_KEY'))
        channelById = api.get_channel_info(
            channel_id=os.getenv('YOUTUBE_CHANNEL_ID'))

        successMessage('- Fetched youtube channel & video data...')

        uploadsPlaylistId = channelById.items[
            0].contentDetails.relatedPlaylists.uploads
        allChannelVideos = api.get_playlist_items(
            playlist_id=uploadsPlaylistId, count=30, limit=30)
        successMessage('- Constructing youtube channel & video data...')

        processedData = []
        for video in allChannelVideos.items:
            processedData.append({
                "videoUrl": video.contentDetails.videoId,
                "videoTitle": video.snippet.channelTitle,
                "videoDescription": video.snippet.description,
            })

        successMessage('- Storing youtube video & channel data...')
        processedChannelDataID = saveDataToMongoDB(
            {
                "thumbnail":
                channelById.items[0].snippet.thumbnails.high.url,
                "channelName":
                channelById.items[0].snippet.title,
                "channelDescription":
                channelById.items[0].snippet.description,
                "keywords":
                channelById.items[0].brandingSettings.channel.keywords.split(),
                "resetAt":
                round(time.time())
            }, "youtubeChannelData")
        saveDataToMongoDB(
            {
                "_id": processedChannelDataID,
                "channelName": channelById.items[0].snippet.title,
                "videos": processedData,
                "resetAt": round(time.time()),
                "hasBeenProcessed": False
            }, "youtubeVideoData")
        successMessage('- Completed storing youtube video & channel data...')
    except:
        errorMessage('- An exception occurred')
    else:
        successMessage('- Completed youtube data step... ')

    return processedChannelDataID
def main(channel_name="YaleCourses", load_from_file=False):
    # find all the videos

    api = Api(api_key="AIzaSyCw0j0aCe0y_T42q3RLBoVtGXlTOMGGaSM")
    # AIzaSyCw0j0aCe0y_T42q3RLBoVtGXlTOMGGaSM

    print("Setup dir to save the transcripts of %s channel" % (channel_name))
    channel_dir = os.path.join(raw_dir, "transcripts", channel_name)
    channel_id_file = os.path.join(raw_dir, "video_ids", channel_name + ".txt")

    if not os.path.exists(channel_dir):
        os.mkdir(channel_dir)
    else:
        print("\tThe folder of the channel %s is already exist\n"
              "\tdelete it before executing this script -"
              "we don't want to override your data" % (channel_name))
        return
    '''
        Since google is blocking after a while the retrival of the IDs,
        We will write the IDs to a file as a buffer for safety.
    '''
    if load_from_file is False:
        print("Retriving %s channel information" % (channel_name))
        channel_by_name = api.get_channel_info(channel_name=channel_name)
        print("\tFetch all the playlists")
        playlists_by_channel = api.get_playlists(
            channel_id=channel_by_name.items[0].id, count=None)
        print("\tFetch all the videos of the playlist")
        playlists_videos = []
        for playlist in playlists_by_channel.items:
            print("\t\tFetching videos IDs of playlist %s" % (playlist.id))
            playlists_videos.append(
                api.get_playlist_items(playlist_id=playlist.id, count=None))

        videos_ids = []
        for playlist in playlists_videos:
            for video in playlist.items:
                videos_ids.append(video.snippet.resourceId.videoId)
        print("We gathered now %s videos, saving save to file" %
              (len(videos_ids)))
        with open(channel_id_file, 'w') as f:
            json.dump(videos_ids, f)
    else:
        with open(channel_id_file, 'r') as f:
            videos_ids = json.load(f)

    print("Save %s channel videos transcripts" % (channel_name))
    #map(save_transcript,videos_ids)
    #[save_transcript(vd) for vd in videos_ids]

    for video_id in videos_ids:
        print("The video ID is %s" % (video_id))
        try:
            transcript_list = YouTubeTranscriptApi.list_transcripts(
                video_id)  #,languages=['en']
            #transcript_list = [transcript for transcript in transcript_list\
            #                   if bool(re.match(transcript.language,"[en]*"))]
            video_transcripts = None
            for transcript in transcript_list:
                # the Transcript object provides metadata properties
                print("Video id : ", transcript.video_id)
                print("\tlanguage : %s , language code : %s" %
                      (transcript.language, transcript.language_code))
                print("\tis_generated: %s, is_translatable: %s" %
                      (transcript.is_generated, transcript.is_translatable))
                if transcript.language_code == 'en' and transcript.is_generated is False:
                    actual_transcript = transcript.fetch()
                    video_transcripts = actual_transcript

            if video_transcripts is not None:
                #print( "Current length json of trancsript is " ,len(transcript))
                video_path = os.path.join(raw_dir, "transcripts", channel_name,
                                          video_id + ".json")
                with open(video_path, 'w') as outfile:
                    json.dump(video_transcripts, outfile)
        except Exception as e:
            print(e)

    print("Finish main")
Beispiel #6
0
class YTParser:
    """Получаем информацию из Ютуба
    """
    def __init__(self, api, audio_path, kaldi_path):
        self.api = Api(api_key=api)
        self.filename = False
        self.path = audio_path
        self.kaldi_path = kaldi_path

    def url2id(self, url):
        return url.split('watch?v=')[1]

    def id2url(self, id):
        return 'https://www.youtube.com/watch?v=' + id

    def get_latest_videos_by_channel_link(self, url):
        """Получаем ссылки на последние видео по именной ссылке на канал
        """
        channel_name = url.split('/user/')[1]
        channel_by_id = self.api.get_channel_info(channel_name=channel_name)
        channel_info = channel_by_id.items[0].to_dict()
        uploads_plst = channel_info['contentDetails']['relatedPlaylists'][
            'uploads']
        items = self.api.get_playlist_items(playlist_id=uploads_plst,
                                            count=100)
        videos = []
        for item in items.items:
            if item.snippet.resourceId.kind == 'youtube#video':
                videos.append({
                    'id':
                    item.snippet.resourceId.videoId,
                    'url':
                    self.id2url(item.snippet.resourceId.videoId)
                })
        return videos

    def _catch_filename(self, d):
        if d['status'] == 'finished':
            self.filename = os.path.splitext(d['filename'])[0] + '.mp3'

    def _downloaded_data(self):
        """Мета-данные скачанного видео
        """
        if self.filename == False:
            return False
        self.description_file = os.path.splitext(
            self.filename)[0] + '.info.json'
        with open(os.path.join(self.path, self.description_file)) as fp:
            description = json.load(fp)
        return {
            'id':
            description['id'],
            'uploader_url':
            description['uploader_url'],
            'channel_id':
            description['channel_id'],
            'channel_url':
            description['channel_url'],
            'upload_date':
            datetime.datetime.strptime(description['upload_date'], "%Y%m%d"),
            'title':
            description['title'],
            'description':
            description['description'],
            'webpage_url':
            description['webpage_url'],
            'view_count':
            description['view_count'],
            'like_count':
            description['like_count'],
            'dislike_count':
            description['dislike_count'],
            'average_rating':
            description['average_rating'],
        }

    def video2data(self, url):
        """Получаем распознанный текст ролика по его url
        """
        current_dir = os.getcwd()
        os.chdir(self.path)
        ydl_opts = {
            'format':
            'bestaudio/best',
            'writeinfojson':
            'info',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
            'progress_hooks': [self._catch_filename],
        }
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        time.sleep(20)
        video_description = self._downloaded_data()

        model = Model(self.kaldi_path)
        rec = KaldiRecognizer(model, 16000)

        process = subprocess.Popen([
            'ffmpeg', '-loglevel', 'quiet', '-i',
            os.path.join(self.path, self.filename), '-ar',
            str(16_000), '-ac', '1', '-f', 's16le', '-'
        ],
                                   stdout=subprocess.PIPE)

        full_text = ''
        while True:
            data = process.stdout.read(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                res = json.loads(rec.Result())
                full_text += ' ' + res['text']
        full_text += ' ' + json.loads(rec.FinalResult())['text']

        os.remove(os.path.join(self.path, self.description_file))
        os.remove(os.path.join(self.path, self.filename))

        os.chdir(current_dir)
        return full_text, video_description