def process_play_list_step(message): try: chat_id = message.chat.id text = message.text if ":" not in text: bot.reply_to(message, 'Wrong message format') return data = str(text).split(':') plaid = data[0] title = data[1] if len(plaid) <= 5 or len(title) <= 5: bot.reply_to(message, 'Wrong message format') return api = Api(api_key=youtube_api_key) playlist_item_by_playlist = api.get_playlist_items(playlist_id=plaid, count=1000) videos = playlist_item_by_playlist.items if len(videos) <= 0: bot.send_message(message.chat.id, "No Vidoes found for playlist = " + str(title)) return bot.send_message(message.chat.id, "Found " + str(len(videos)) + " videos from youtube") real_video_count = len(videos) count = 0 lessons = [] for video in videos: video_by_id = api.get_video_by_id( video_id=video.snippet.resourceId.videoId, parts=('snippet', 'contentDetails', 'statistics')) if len(video_by_id.items) <= 0: real_video_count = real_video_count - 1 continue count = count + 1 item = video_by_id.items[0] title = title + " " + formatLessonName( count) + " " + item.snippet.title time_val = isodate.parse_duration(item.contentDetails.duration) code = getYoutubeEmbedCode(video.snippet.resourceId.videoId) lesson = Lesson(title, code, time_val) lessons.append(lesson) bot.send_message( message.chat.id, "Total avalibale vidoe count = " + str(real_video_count)) if len(lessons) <= 0: bot.send_message(message.chat.id, "No lesson found!") loginAndUpdate(lessons, message, plaid) except Exception as e: bot.reply_to(message, str(e))
def fetchYoutubeData(): processedChannelDataID = None try: successMessage('- Gathering youtube channel & video data...') api = Api(api_key=os.getenv('YOUTUBE_DATA_API_KEY')) channelById = api.get_channel_info( channel_id=os.getenv('YOUTUBE_CHANNEL_ID')) successMessage('- Fetched youtube channel & video data...') uploadsPlaylistId = channelById.items[ 0].contentDetails.relatedPlaylists.uploads allChannelVideos = api.get_playlist_items( playlist_id=uploadsPlaylistId, count=30, limit=30) successMessage('- Constructing youtube channel & video data...') processedData = [] for video in allChannelVideos.items: processedData.append({ "videoUrl": video.contentDetails.videoId, "videoTitle": video.snippet.channelTitle, "videoDescription": video.snippet.description, }) successMessage('- Storing youtube video & channel data...') processedChannelDataID = saveDataToMongoDB( { "thumbnail": channelById.items[0].snippet.thumbnails.high.url, "channelName": channelById.items[0].snippet.title, "channelDescription": channelById.items[0].snippet.description, "keywords": channelById.items[0].brandingSettings.channel.keywords.split(), "resetAt": round(time.time()) }, "youtubeChannelData") saveDataToMongoDB( { "_id": processedChannelDataID, "channelName": channelById.items[0].snippet.title, "videos": processedData, "resetAt": round(time.time()), "hasBeenProcessed": False }, "youtubeVideoData") successMessage('- Completed storing youtube video & channel data...') except: errorMessage('- An exception occurred') else: successMessage('- Completed youtube data step... ') return processedChannelDataID
def random_video(mood): api = Api(api_key=apikey) videos = [] playlist_item_by_playlist = api.get_playlist_items( playlist_id=playlists_by_mood[mood], count=None).items for item in iter(playlist_item_by_playlist): resource = item.snippet.resourceId if resource.kind == 'youtube#video': videos.append(resource.videoId) video = videos[random.randint(0, len(videos) - 1)] return 'https://www.youtube.com/watch?v=' + video
def get_music_titles(playlistID): #Returns the music titles on the youtube's playlist api = Api(api_key=YOUTUBE_KEY) titles = api.get_playlist_items(playlist_id=playlistID, count=None) list_music = [] for titulo in titles.items: musica = titulo.snippet.title.upper() list_music.append(musica) print(list_music) return list_music
def GetFridayFeature(ctx): api = Api(api_key=API) playlist_item_by_playlist = api.get_playlist_items( playlist_id="PLLUkxbIkknLuK6BLdOs-QDAJiDQTM7Xei", count=None) totalVideos = len(playlist_item_by_playlist.items) number = randrange(totalVideos) print(number) print(playlist_item_by_playlist.items[number].snippet.title) video = playlist_item_by_playlist.items[number] embedVar = discord.Embed(title=video.snippet.title, url="https://www.youtube.com/watch?v=" + video.contentDetails.videoId, color=0x0ed0f1) embedVar.add_field(name="Published", value=video.contentDetails.videoPublishedAt) embedVar.set_image(url=video.snippet.thumbnails.standard.url) return embedVar
start_range = datetime.datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0, tzinfo=pytz.timezone("Asia/Tokyo")) end_range = datetime.datetime(end_date.year, end_date.month, end_date.day, 23, 59, 59, tzinfo=pytz.timezone("Asia/Tokyo")) date_str = start_date.strftime("%Y-%m-%d") csv_file = open('./stream_stats_' + date_str + '.csv', 'w') w = csv.writer(csv_file, delimiter=',') w.writerow(['streamer', 'title', 'thumbnail', 'chat_users', 'eng_tl_msg_per_min', 'jp_tl_msg_per_min', 'not_jp_user_%', 'adj_kusa_per_min', 'humor_score', 'tete_per_usr', 'faq_count', 'marry_per_usr', 'most_kusa_tstamp', 'most_humor_tstamp', 'most_faq_tstamp', 'most_tete_tstamp']) csv_file.flush() def is_emoji(char): return char in emoji.UNICODE_EMOJI #get video playlists from yt api for name in NAMES: playlists[name] = [] playlists[name]= api.get_playlist_items(playlist_id=PLAYLIST_IDS[pl_idx], count=50) pl_idx += 1 for key, val in playlists.items(): for vid in val.items: pub_date = vid.contentDetails.videoPublishedAt pub_dt = dateutil.parser.isoparse(pub_date).astimezone(pytz.timezone("Asia/Tokyo")) if (start_range <= pub_dt <= end_range): kusa_count = 0 tete_count = 0 faq_count = 0 marry_count = 0 humor_count = 0 all_users = [] not_jp_user = []
apikey = 'AIzaSyC053n6_uqpUiOd1X4YfD0Vkx1QcTL-0R8' playlist = 'PL_MH8gOS_ETiNT1NF8B46JYHZe6fXWfVW' from pyyoutube import Api import random api = Api(api_key=apikey) videos = [] playlist_item_by_playlist = api.get_playlist_items(playlist_id=playlist, count=None).items for item in iter(playlist_item_by_playlist): resource = item.snippet.resourceId if resource.kind == 'youtube#video': videos.append(resource.videoId) random_video = videos[random.randint(0, len(videos) - 1)] print('https://www.youtube.com/watch?v='+ random_video)
#!/usr/bin/env python3 from pyyoutube import Api import pytube import urllib.request from googleapiclient.discovery import build api = Api(api_key='AIzaSyBagf-_AQk4bISDaXIkyD0cGCPZCLHYHuU') playListId = "PLt0cfLFa-ZYzZB54dKA6EV2McLY26hNGJ" downLoadCount = 70 playlist_item = api.get_playlist_items(playlist_id=playListId, count = downLoadCount) #222 # Get item id and title itemList = [] for item in playlist_item.items: itemList.append([item.snippet.resourceId.videoId, item.snippet.title]) # Download videos api_key='AIzaSyBagf-_AQk4bISDaXIkyD0cGCPZCLHYHuU' youtube = build('youtube', 'v3', developerKey=api_key) itemCounts = len(itemList) print("==================================") print(itemCounts," videos found.") print("==================================") count = 0 videoSubFolder = './Video' for vid, vtitle in itemList: count = count + 1 itemurl = 'https://www.youtube.com/watch?v=' + vid print(itemurl)
def main(channel_name="YaleCourses", load_from_file=False): # find all the videos api = Api(api_key="AIzaSyCw0j0aCe0y_T42q3RLBoVtGXlTOMGGaSM") # AIzaSyCw0j0aCe0y_T42q3RLBoVtGXlTOMGGaSM print("Setup dir to save the transcripts of %s channel" % (channel_name)) channel_dir = os.path.join(raw_dir, "transcripts", channel_name) channel_id_file = os.path.join(raw_dir, "video_ids", channel_name + ".txt") if not os.path.exists(channel_dir): os.mkdir(channel_dir) else: print("\tThe folder of the channel %s is already exist\n" "\tdelete it before executing this script -" "we don't want to override your data" % (channel_name)) return ''' Since google is blocking after a while the retrival of the IDs, We will write the IDs to a file as a buffer for safety. ''' if load_from_file is False: print("Retriving %s channel information" % (channel_name)) channel_by_name = api.get_channel_info(channel_name=channel_name) print("\tFetch all the playlists") playlists_by_channel = api.get_playlists( channel_id=channel_by_name.items[0].id, count=None) print("\tFetch all the videos of the playlist") playlists_videos = [] for playlist in playlists_by_channel.items: print("\t\tFetching videos IDs of playlist %s" % (playlist.id)) playlists_videos.append( api.get_playlist_items(playlist_id=playlist.id, count=None)) videos_ids = [] for playlist in playlists_videos: for video in playlist.items: videos_ids.append(video.snippet.resourceId.videoId) print("We gathered now %s videos, saving save to file" % (len(videos_ids))) with open(channel_id_file, 'w') as f: json.dump(videos_ids, f) else: with open(channel_id_file, 'r') as f: videos_ids = json.load(f) print("Save %s channel videos transcripts" % (channel_name)) #map(save_transcript,videos_ids) #[save_transcript(vd) for vd in videos_ids] for video_id in videos_ids: print("The video ID is %s" % (video_id)) try: transcript_list = YouTubeTranscriptApi.list_transcripts( video_id) #,languages=['en'] #transcript_list = [transcript for transcript in transcript_list\ # if bool(re.match(transcript.language,"[en]*"))] video_transcripts = None for transcript in transcript_list: # the Transcript object provides metadata properties print("Video id : ", transcript.video_id) print("\tlanguage : %s , language code : %s" % (transcript.language, transcript.language_code)) print("\tis_generated: %s, is_translatable: %s" % (transcript.is_generated, transcript.is_translatable)) if transcript.language_code == 'en' and transcript.is_generated is False: actual_transcript = transcript.fetch() video_transcripts = actual_transcript if video_transcripts is not None: #print( "Current length json of trancsript is " ,len(transcript)) video_path = os.path.join(raw_dir, "transcripts", channel_name, video_id + ".json") with open(video_path, 'w') as outfile: json.dump(video_transcripts, outfile) except Exception as e: print(e) print("Finish main")
from pyyoutube import Api import pytube import urllib.request import os import time from googleapiclient.discovery import build ''' Environment variable setting ''' APIKey = 'you api key' PlayListId = 'The playlist id' PlayListContentNums = 200 #The number of videos in this playlist ''' Environment varialbe setting ''' api = Api(api_key=APIKey) playlist_item = api.get_playlist_items(playlist_id=PlayListId, count=PlayListContentNums) # Get item id and title itemList = [] for item in playlist_item.items: itemList.append([item.snippet.resourceId.videoId, item.snippet.title]) # Download videos and transfer to MP3 from moviepy.editor import * itemCounts = len(itemList) print("==================================") print(itemCounts, " videos found.") print("==================================") count = 0
class YTParser: """Получаем информацию из Ютуба """ def __init__(self, api, audio_path, kaldi_path): self.api = Api(api_key=api) self.filename = False self.path = audio_path self.kaldi_path = kaldi_path def url2id(self, url): return url.split('watch?v=')[1] def id2url(self, id): return 'https://www.youtube.com/watch?v=' + id def get_latest_videos_by_channel_link(self, url): """Получаем ссылки на последние видео по именной ссылке на канал """ channel_name = url.split('/user/')[1] channel_by_id = self.api.get_channel_info(channel_name=channel_name) channel_info = channel_by_id.items[0].to_dict() uploads_plst = channel_info['contentDetails']['relatedPlaylists'][ 'uploads'] items = self.api.get_playlist_items(playlist_id=uploads_plst, count=100) videos = [] for item in items.items: if item.snippet.resourceId.kind == 'youtube#video': videos.append({ 'id': item.snippet.resourceId.videoId, 'url': self.id2url(item.snippet.resourceId.videoId) }) return videos def _catch_filename(self, d): if d['status'] == 'finished': self.filename = os.path.splitext(d['filename'])[0] + '.mp3' def _downloaded_data(self): """Мета-данные скачанного видео """ if self.filename == False: return False self.description_file = os.path.splitext( self.filename)[0] + '.info.json' with open(os.path.join(self.path, self.description_file)) as fp: description = json.load(fp) return { 'id': description['id'], 'uploader_url': description['uploader_url'], 'channel_id': description['channel_id'], 'channel_url': description['channel_url'], 'upload_date': datetime.datetime.strptime(description['upload_date'], "%Y%m%d"), 'title': description['title'], 'description': description['description'], 'webpage_url': description['webpage_url'], 'view_count': description['view_count'], 'like_count': description['like_count'], 'dislike_count': description['dislike_count'], 'average_rating': description['average_rating'], } def video2data(self, url): """Получаем распознанный текст ролика по его url """ current_dir = os.getcwd() os.chdir(self.path) ydl_opts = { 'format': 'bestaudio/best', 'writeinfojson': 'info', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }], 'progress_hooks': [self._catch_filename], } with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) time.sleep(20) video_description = self._downloaded_data() model = Model(self.kaldi_path) rec = KaldiRecognizer(model, 16000) process = subprocess.Popen([ 'ffmpeg', '-loglevel', 'quiet', '-i', os.path.join(self.path, self.filename), '-ar', str(16_000), '-ac', '1', '-f', 's16le', '-' ], stdout=subprocess.PIPE) full_text = '' while True: data = process.stdout.read(4000) if len(data) == 0: break if rec.AcceptWaveform(data): res = json.loads(rec.Result()) full_text += ' ' + res['text'] full_text += ' ' + json.loads(rec.FinalResult())['text'] os.remove(os.path.join(self.path, self.description_file)) os.remove(os.path.join(self.path, self.filename)) os.chdir(current_dir) return full_text, video_description
countryCode = MyConfigurations.COUNTRY_CODE playlistIDs = MyConfigurations.PLAYLIST_IDS displayUploader = MyConfigurations.DISPLAY_UPLOADER # Create this folder from the project root if not isdir('output'): mkdir('output') # Then create another nested folder. Example: output/output-2020-09-14T173551 outputFolderPath = join( 'output', 'output-' + strftime('%Y-%m-%dT%H%M%S', localtime())) mkdir(outputFolderPath) # Remember to comment out the individual playlist keys from MyConfigurations if you don't want to loop through them all for key in playlistIDs: playlistVideoItems = api.get_playlist_items( playlist_id=playlistIDs[key], count=None).items currentTimestamp = strftime(' %Y-%m-%dT%H%M%S', localtime()) outputFileName = key + currentTimestamp + '.out' outputFilePath = join(outputFolderPath, outputFileName) with open(outputFilePath, 'w', encoding='utf8') as outfile: for index, playlistVideo in enumerate(playlistVideoItems): try: video = api.get_video_by_id( video_id=playlistVideo.contentDetails.videoId).items[0] if not isVideoBlocked(countryCode, video): outfile.write(video.snippet.title + '\n') if displayUploader:
class EdumaBot(): def __init__(self): self.driver = webdriver.Chrome() self.api = Api(api_key=apikey) self.db = DBHelper() def login(self): self.driver.get(url) time.sleep(1) login_btn = self.driver.find_element_by_xpath('//*[@id="wp-submit"]') email_field = self.driver.find_element_by_xpath( '//*[@id="user_login"]') pass_field = self.driver.find_element_by_xpath('//*[@id="user_pass"]') email_field.send_keys(username) time.sleep(1) pass_field.send_keys(password) login_btn.click() self.driver.find_element_by_xpath( '//*[@id="toplevel_page_learn_press"]').click() self.driver.find_element_by_xpath( '//*[@id="toplevel_page_learn_press"]/ul/li[3]/a').click() self.driver.find_element_by_xpath( '//*[@id="wpbody-content"]/div[3]/a').click() def start(self): self.login() courses = [] # resume_lesons = self.db.get_resume_lessons() # # print(resume_lesons.rowcount) # # if resume_lesons.rowcount != -1: # for ressume_lesson in resume_lesons: # index = ressume_lesson[4] # total = ressume_lesson[3] # title = ressume_lesson[2] # playlist_id = ressume_lesson[1] # # if int(index) == int(total): # continue # # print("found lesons that is not added index = {} ,playlist = {},count = {} ".format(index, playlist_id, # total)) # courses.append(Course(title=str(title), playlistId=str(playlist_id), start=str(index + 1))) f = open("playlist", "r+") for i in f: k = i.split('\n') for j in k: vals = j.split("[|]") if len(vals) == 2: courses.append( Course(title=str(vals[1]), playlistId=str(vals[0]), start=0)) if len(courses) == 0: print("no lesson found on file = " + f.name) return print("course found on file " + str(len(courses))) for course in courses: finsihed = self.db.getFinishedPlayLists( playlist_id=course.playlistId) print(finsihed) if finsihed.rowcount != -1: print("course already exist titile = " + course.title) continue playlist_item_by_playlist = self.api.get_playlist_items( playlist_id=course.playlistId, count=1000) videos = playlist_item_by_playlist.items print("number of vidoes in playlist = " + str(len(videos))) if len(videos) <= 0: print("no videos found for playlist = " + str(course.playlistId) + "\n") continue count = 1 real_video_count = len(videos) lesson_list = [] temp_counter = 0 for video in videos: temp_counter = temp_counter + 1 if temp_counter <= -1 and course.playlistId == "": continue count = temp_counter video_by_id = self.api.get_video_by_id( video_id=video.snippet.resourceId.videoId, parts=('snippet', 'contentDetails', 'statistics')) if len(video_by_id.items) <= 0: print("missed or private vidoe for = " + str(course.playlistId) + "\n") real_video_count = real_video_count - 1 continue item = video_by_id.items[0] title = course.title + " " + self.formatLessonName( count) + " " + item.snippet.title time_val = isodate.parse_duration(item.contentDetails.duration) code = self.getYoutubeEmbedCode( video.snippet.resourceId.videoId) count = count + 1 lesson = Lesson(title, code, time) title_field = self.driver.find_element_by_xpath( '//*[@id="title"]') media_field = self.driver.find_element_by_xpath( '//*[@id="_lp_lesson_video_intro"]') date_field = self.driver.find_element_by_xpath( '//*[@id="_lp_duration"]') publish_button = self.driver.find_element_by_xpath( '//*[@id="publish"]') select = Select( self.driver.find_element_by_id('_lp_duration_select')) time_array = str(time_val).split(":") print(time_array[0] + " = hour") print(time_array[1] + " = minties") final_time = 1 if time_array[0] != "0" and time_array[0] != "00": select.select_by_value('hour') final_time = time_array[0] elif time_array[1] != "0" and time_array[1] != "00": select.select_by_value('minute') final_time = time_array[1] else: select.select_by_value('minute') title_field.clear() title_field.send_keys(lesson.title) time.sleep(1) media_field.clear() media_field.send_keys(lesson.code) time.sleep(1) date_field.clear() date_field.send_keys(final_time) time.sleep(2) self.driver.execute_script("arguments[0].click();", publish_button) self.driver.implicitly_wait(10) time.sleep(2) self.driver.find_element_by_xpath( '//*[@id="wpbody-content"]/div[3]/a').click() self.driver.implicitly_wait(10) #self.db.update_lessons_resume(playlist_id=course.playlistId, index=str(temp_counter)) # suffucly addded the playlsit # self.db.add_finished_playlist(course.playlistId) # self.db.delete_course_from_resume(playlist_id=course.playlistId) def formatLessonName(self, count): countStr = "" if count <= 9: countStr = "00" + str(count) elif count <= 99 and count >= 10: countStr = "0" + str(count) else: countStr = str(count) return countStr def getYoutubeEmbedCode(self, videoId): embedder = Embedder() code = embedder("https://www.youtube.com/watch?v=" + str(videoId), width=video_width, height=video_height) return code