def get_song_url(title, artist): BASE_URL = "https://www.youtube.com/results?search_query=" query_string = artist + ' ' + title query_string = query_string.replace(' ', '+') BASE_URL += query_string response = requests.get(BASE_URL, headers=HEADER) soup = BeautifulSoup(response.text, "lxml") highest_views = 0 highest_href = "" for data in soup.find_all('div', class_='yt-lockup-content'): #iterate through all videos on the page, selecting the most relevant view wise if data.find('ol', class_='yt-lockup-playlist-items') is not None: #particular attribute of playlist items, which we dont want to look at pass elif title not in data.find('a', href=True).get('title').lower(): #extra check to make sure the correct video is selected pass else: #get href and view count, select if video is more 'relevant' than previous href = data.find('a', href=True).get('href') count = data.find('ul', class_='yt-lockup-meta-info').contents[1].text.strip() count = count.split(None, 1)[0] count = int(re.sub('[^\d\.]', '', count)) if count > highest_views: highest_views = count highest_href = href url = "https://www.youtube.com" + highest_href try: Downloader().audio_from_youtube(url, download=True, output_file_path=u'data/audio/'+title+'.mp3' preferred_format=u'mp3' ) except Exception as e: print('Error downloading song audio')
def audio_from_youtube(self, source_url, download=True, output_file_path=None, download_format=None, largest_audio=True): return Downloader().audio_from_youtube( source_url, download=download, output_file_path=output_file_path, download_format=download_format, largest_audio=largest_audio)
def audio_from_youtube(self, source_url, download=True, output_file_path=None, preferred_index=None, largest_audio=True, preferred_format=None): return Downloader().audio_from_youtube( source_url, download=download, output_file_path=output_file_path, preferred_index=preferred_index, largest_audio=largest_audio, preferred_format=preferred_format)
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 2: return self.print_help() source_url = self.actual_arguments[0] output_file_path = self.actual_arguments[1] download = not self.has_option("--list") # largest_audio = True by default or if explicitly given if self.has_option("--largest-audio"): largest_audio = True else: largest_audio = not self.has_option("--smallest-audio") download_format = self.has_option_with_value("--format") try: if download: self.print_info(u"Downloading audio stream from '%s' ..." % source_url) downloader = Downloader(logger=self.logger) result = downloader.audio_from_youtube( source_url, download=True, output_file_path=output_file_path, download_format=download_format, largest_audio=largest_audio, ) self.print_info(u"Downloading audio stream from '%s' ... done" % source_url) self.print_success(u"Downloaded file '%s'" % result) else: self.print_info(u"Downloading stream info from '%s' ..." % source_url) downloader = Downloader(logger=self.logger) result = downloader.audio_from_youtube( source_url, download=False ) self.print_info(u"Downloading stream info from '%s' ... done" % source_url) msg = [] msg.append(u"%s\t%s\t%s\t%s" % ("Format", "Extension", "Bitrate", "Size")) for r in result: filesize = gf.human_readable_number(r["filesize"]) msg.append(u"%s\t%s\t%s\t%s" % (r["format"], r["ext"], r["abr"], filesize)) self.print_generic(u"Available audio streams:") self.print_generic(u"\n".join(msg)) return self.NO_ERROR_EXIT_CODE except ImportError: self.print_no_dependency_error() except Exception as exc: self.print_error(u"An unexpected error occurred while downloading audio from YouTube:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 2: return self.print_help() source_url = self.actual_arguments[0] output_file_path = self.actual_arguments[1] download = not self.has_option("--list") # largest_audio = True by default or if explicitly given if self.has_option("--largest-audio"): largest_audio = True else: largest_audio = not self.has_option("--smallest-audio") preferred_format = self.has_option_with_value("--format") preferred_index = gf.safe_int(self.has_option_with_value("--index"), None) try: if download: self.print_info(u"Downloading audio stream from '%s' ..." % source_url) downloader = Downloader(logger=self.logger) result = downloader.audio_from_youtube( source_url, download=download, output_file_path=output_file_path, preferred_index=preferred_index, largest_audio=largest_audio, preferred_format=preferred_format ) self.print_info(u"Downloading audio stream from '%s' ... done" % source_url) self.print_success(u"Downloaded file '%s'" % result) else: self.print_info(u"Downloading stream info from '%s' ..." % source_url) downloader = Downloader(logger=self.logger) result = downloader.audio_from_youtube( source_url, download=False ) self.print_info(u"Downloading stream info from '%s' ... done" % source_url) msg = [] msg.append(u"%s\t%s\t%s\t%s" % ("Index", "Format", "Bitrate", "Size")) i = 0 for audio in result: ext = audio.extension bitrate = audio.bitrate size = gf.human_readable_number(audio.get_filesize()) msg.append(u"%d\t%s\t%s\t%s" % (i, ext, bitrate, size)) i += 1 self.print_generic(u"Available audio streams:") self.print_generic(u"\n".join(msg)) return self.NO_ERROR_EXIT_CODE except ImportError: self.print_no_pafy_error() except Exception as exc: self.print_error(u"An unexpected error occurred while downloading audio from YouTube:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 1: return self.print_help() if self.has_option([u"-e", u"--examples"]): return self.print_examples(False) if self.has_option(u"--examples-all"): return self.print_examples(True) if self.has_option([u"--list-parameters"]): return self.print_parameters() parameter = self.has_option_with_value(u"--list-values") if parameter is not None: return self.print_values(parameter) elif self.has_option(u"--list-values"): return self.print_values(u"?") # NOTE list() is needed for Python3, where keys() is not a list! demo = self.has_option(list(self.DEMOS.keys())) demo_parameters = u"" download_from_youtube = self.has_option([u"-y", u"--youtube"]) largest_audio = self.has_option(u"--largest-audio") keep_audio = self.has_option(u"--keep-audio") output_html = self.has_option(u"--output-html") validate = not self.has_option(u"--skip-validator") print_faster_rate = self.has_option(u"--faster-rate") print_rates = self.has_option(u"--rates") print_zero = self.has_option(u"--zero") if demo: validate = False for key in self.DEMOS: if self.has_option(key): demo_parameters = self.DEMOS[key] audio_file_path = demo_parameters[u"audio"] text_file_path = demo_parameters[u"text"] config_string = demo_parameters[u"config"] sync_map_file_path = demo_parameters[u"syncmap"] # TODO allow injecting rconf options directly from DEMOS options field if key == u"--example-cewsubprocess": self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True elif key == u"--example-ctw-espeak": self.rconf[RuntimeConfiguration.TTS] = "custom" self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_ESPEAK elif key == u"--example-ctw-speect": self.rconf[RuntimeConfiguration.TTS] = "custom" self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_SPEECT elif key == u"--example-festival": self.rconf[RuntimeConfiguration.TTS] = "festival" self.rconf[RuntimeConfiguration.TTS_PATH] = "text2wave" elif key == u"--example-mws": self.rconf[RuntimeConfiguration.MFCC_WINDOW_LENGTH] = "1.500" self.rconf[RuntimeConfiguration.MFCC_WINDOW_SHIFT] = "0.500" elif key == u"--example-faster-rate": print_faster_rate = True elif key == u"--example-no-zero": print_zero = True elif key == u"--example-py": self.rconf[RuntimeConfiguration.C_EXTENSIONS] = False elif key == u"--example-rates": print_rates = True elif key == u"--example-youtube": download_from_youtube = True break else: if len(self.actual_arguments) < 4: return self.print_help() audio_file_path = self.actual_arguments[0] text_file_path = self.actual_arguments[1] config_string = self.actual_arguments[2] sync_map_file_path = self.actual_arguments[3] html_file_path = None if output_html: keep_audio = True html_file_path = sync_map_file_path + u".html" if download_from_youtube: youtube_url = audio_file_path if (not download_from_youtube) and (not self.check_input_file(audio_file_path)): return self.ERROR_EXIT_CODE if not self.check_input_file(text_file_path): return self.ERROR_EXIT_CODE if not self.check_output_file(sync_map_file_path): return self.ERROR_EXIT_CODE if (html_file_path is not None) and (not self.check_output_file(html_file_path)): return self.ERROR_EXIT_CODE self.check_c_extensions() if demo: msg = [] msg.append(u"Running example task with arguments:") if download_from_youtube: msg.append(u" YouTube URL: %s" % youtube_url) else: msg.append(u" Audio file: %s" % audio_file_path) msg.append(u" Text file: %s" % text_file_path) msg.append(u" Config string: %s" % config_string) msg.append(u" Sync map file: %s" % sync_map_file_path) if len(demo_parameters[u"options"]) > 0: msg.append(u" Options: %s" % demo_parameters[u"options"]) self.print_info(u"\n".join(msg)) if validate: self.print_info(u"Validating config string (specify --skip-validator to bypass)...") validator = Validator(logger=self.logger) result = validator.check_configuration_string(config_string, is_job=False, external_name=True) if not result.passed: self.print_error(u"The given config string is not valid:") self.print_generic(result.pretty_print()) return self.ERROR_EXIT_CODE self.print_info(u"Validating config string... done") if download_from_youtube: try: self.print_info(u"Downloading audio from '%s' ..." % youtube_url) downloader = Downloader(logger=self.logger) audio_file_path = downloader.audio_from_youtube( youtube_url, download=True, output_file_path=None, largest_audio=largest_audio ) self.print_info(u"Downloading audio from '%s' ... done" % youtube_url) except ImportError: self.print_no_pafy_error() return self.ERROR_EXIT_CODE except Exception as exc: self.print_error(u"An unexpected error occurred while downloading audio from YouTube:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Creating task...") task = Task(config_string, logger=self.logger) task.audio_file_path_absolute = audio_file_path task.text_file_path_absolute = text_file_path task.sync_map_file_path_absolute = sync_map_file_path self.print_info(u"Creating task... done") except Exception as exc: self.print_error(u"An unexpected error occurred while creating the task:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Executing task...") executor = ExecuteTask(task=task, rconf=self.rconf, logger=self.logger) executor.execute() self.print_info(u"Executing task... done") except Exception as exc: self.print_error(u"An unexpected error occurred while executing the task:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Creating output sync map file...") path = task.output_sync_map_file() self.print_info(u"Creating output sync map file... done") self.print_success(u"Created file '%s'" % path) except Exception as exc: self.print_error(u"An unexpected error occurred while writing the sync map file:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE if output_html: try: parameters = {} parameters[gc.PPN_TASK_OS_FILE_FORMAT] = task.configuration["o_format"] parameters[gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] = task.configuration["o_smil_audio_ref"] parameters[gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] = task.configuration["o_smil_page_ref"] self.print_info(u"Creating output HTML file...") task.sync_map.output_html_for_tuning(audio_file_path, html_file_path, parameters) self.print_info(u"Creating output HTML file... done") self.print_success(u"Created file '%s'" % html_file_path) except Exception as exc: self.print_error(u"An unexpected error occurred while writing the HTML file:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE if download_from_youtube: if keep_audio: self.print_info(u"Option --keep-audio set: keeping downloaded file '%s'" % audio_file_path) else: gf.delete_file(None, audio_file_path) if print_zero: zero_duration = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.begin == l.end] if len(zero_duration) > 0: self.print_warning(u"Fragments with zero duration:") for fragment in zero_duration: self.print_generic(u" %s" % fragment) if print_rates: self.print_info(u"Fragments with rates:") for fragment in task.sync_map.fragments_tree.vleaves_not_empty: self.print_generic(u" %s (rate: %.3f chars/s)" % (fragment, fragment.rate)) if print_faster_rate: max_rate = task.configuration["aba_rate_value"] if max_rate is not None: faster = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.rate >= max_rate + Decimal("0.001")] if len(faster) > 0: self.print_warning(u"Fragments with rate greater than %.3f:" % max_rate) for fragment in faster: self.print_generic(u" %s (rate: %.3f chars/s)" % (fragment, fragment.rate)) return self.NO_ERROR_EXIT_CODE
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 1: return self.print_help() if self.has_option([u"-e", u"--examples"]): return self.print_examples(False) if self.has_option(u"--examples-all"): return self.print_examples(True) if self.has_option([u"--list-parameters"]): return self.print_parameters() parameter = self.has_option_with_value(u"--list-values") if parameter is not None: return self.print_values(parameter) elif self.has_option(u"--list-values"): return self.print_values(u"?") # NOTE list() is needed for Python3, where keys() is not a list! demo = self.has_option(list(self.DEMOS.keys())) demo_parameters = u"" download_from_youtube = self.has_option([u"-y", u"--youtube"]) largest_audio = self.has_option(u"--largest-audio") keep_audio = self.has_option(u"--keep-audio") output_html = self.has_option(u"--output-html") validate = not self.has_option(u"--skip-validator") print_faster_rate = self.has_option(u"--faster-rate") print_rates = self.has_option(u"--rates") print_zero = self.has_option(u"--zero") if demo: validate = False for key in self.DEMOS: if self.has_option(key): demo_parameters = self.DEMOS[key] audio_file_path = demo_parameters[u"audio"] text_file_path = demo_parameters[u"text"] config_string = demo_parameters[u"config"] sync_map_file_path = demo_parameters[u"syncmap"] # TODO allow injecting rconf options directly from DEMOS options field if key == u"--example-cewsubprocess": self.rconf[ RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True elif key == u"--example-ctw-espeak": self.rconf[RuntimeConfiguration.TTS] = "custom" self.rconf[ RuntimeConfiguration.TTS_PATH] = self.CTW_ESPEAK elif key == u"--example-ctw-speect": self.rconf[RuntimeConfiguration.TTS] = "custom" self.rconf[ RuntimeConfiguration.TTS_PATH] = self.CTW_SPEECT elif key == u"--example-festival": self.rconf[RuntimeConfiguration.TTS] = "festival" elif key == u"--example-mws": self.rconf[ RuntimeConfiguration.MFCC_WINDOW_LENGTH] = "1.500" self.rconf[ RuntimeConfiguration.MFCC_WINDOW_SHIFT] = "0.500" elif key == u"--example-multilevel-tts": self.rconf[RuntimeConfiguration.TTS_L1] = "festival" self.rconf[RuntimeConfiguration.TTS_L2] = "festival" self.rconf[RuntimeConfiguration.TTS_L3] = "espeak" elif key == u"--example-words-festival-cache": self.rconf[RuntimeConfiguration.TTS] = "festival" self.rconf[RuntimeConfiguration.TTS_CACHE] = True elif key == u"--example-faster-rate": print_faster_rate = True elif key == u"--example-no-zero": print_zero = True elif key == u"--example-py": self.rconf[RuntimeConfiguration.C_EXTENSIONS] = False elif key == u"--example-rates": print_rates = True elif key == u"--example-youtube": download_from_youtube = True break else: if len(self.actual_arguments) < 4: return self.print_help() audio_file_path = self.actual_arguments[0] text_file_path = self.actual_arguments[1] config_string = self.actual_arguments[2] sync_map_file_path = self.actual_arguments[3] html_file_path = None if output_html: keep_audio = True html_file_path = sync_map_file_path + u".html" if download_from_youtube: youtube_url = audio_file_path if (not download_from_youtube) and ( not self.check_input_file(audio_file_path)): return self.ERROR_EXIT_CODE if not self.check_input_file(text_file_path): return self.ERROR_EXIT_CODE if not self.check_output_file(sync_map_file_path): return self.ERROR_EXIT_CODE if (html_file_path is not None) and (not self.check_output_file(html_file_path)): return self.ERROR_EXIT_CODE self.check_c_extensions() if demo: msg = [] msg.append(u"Running example task with arguments:") if download_from_youtube: msg.append(u" YouTube URL: %s" % youtube_url) else: msg.append(u" Audio file: %s" % audio_file_path) msg.append(u" Text file: %s" % text_file_path) msg.append(u" Config string: %s" % config_string) msg.append(u" Sync map file: %s" % sync_map_file_path) if len(demo_parameters[u"options"]) > 0: msg.append(u" Options: %s" % demo_parameters[u"options"]) self.print_info(u"\n".join(msg)) if validate: self.print_info( u"Validating config string (specify --skip-validator to bypass)..." ) validator = Validator(logger=self.logger) result = validator.check_configuration_string(config_string, is_job=False, external_name=True) if not result.passed: self.print_error(u"The given config string is not valid:") self.print_generic(result.pretty_print()) return self.ERROR_EXIT_CODE self.print_info(u"Validating config string... done") if download_from_youtube: try: self.print_info(u"Downloading audio from '%s' ..." % youtube_url) downloader = Downloader(logger=self.logger) audio_file_path = downloader.audio_from_youtube( youtube_url, download=True, output_file_path=None, largest_audio=largest_audio) self.print_info(u"Downloading audio from '%s' ... done" % youtube_url) except ImportError: self.print_no_pafy_error() return self.ERROR_EXIT_CODE except Exception as exc: self.print_error( u"An unexpected error occurred while downloading audio from YouTube:" ) self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE else: audio_extension = gf.file_extension(audio_file_path) if audio_extension.lower() not in AudioFile.FILE_EXTENSIONS: self.print_warning( u"Your audio file path has extension '%s', which is uncommon for an audio file." % audio_extension) self.print_warning( u"Attempting at executing your Task anyway.") self.print_warning( u"If it fails, you might have swapped the first two arguments." ) self.print_warning( u"The audio file path should be the first argument, the text file path the second." ) try: self.print_info(u"Creating task...") task = Task(config_string, logger=self.logger) task.audio_file_path_absolute = audio_file_path task.text_file_path_absolute = text_file_path task.sync_map_file_path_absolute = sync_map_file_path self.print_info(u"Creating task... done") except Exception as exc: self.print_error( u"An unexpected error occurred while creating the task:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Executing task...") executor = ExecuteTask(task=task, rconf=self.rconf, logger=self.logger) executor.execute() self.print_info(u"Executing task... done") except Exception as exc: self.print_error( u"An unexpected error occurred while executing the task:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Creating output sync map file...") path = task.output_sync_map_file() self.print_info(u"Creating output sync map file... done") self.print_success(u"Created file '%s'" % path) except Exception as exc: self.print_error( u"An unexpected error occurred while writing the sync map file:" ) self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE if output_html: try: parameters = {} parameters[gc.PPN_TASK_OS_FILE_FORMAT] = task.configuration[ "o_format"] parameters[ gc.PPN_TASK_OS_FILE_EAF_AUDIO_REF] = task.configuration[ "o_eaf_audio_ref"] parameters[ gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] = task.configuration[ "o_smil_audio_ref"] parameters[ gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] = task.configuration[ "o_smil_page_ref"] self.print_info(u"Creating output HTML file...") task.sync_map.output_html_for_tuning(audio_file_path, html_file_path, parameters) self.print_info(u"Creating output HTML file... done") self.print_success(u"Created file '%s'" % html_file_path) except Exception as exc: self.print_error( u"An unexpected error occurred while writing the HTML file:" ) self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE if download_from_youtube: if keep_audio: self.print_info( u"Option --keep-audio set: keeping downloaded file '%s'" % audio_file_path) else: gf.delete_file(None, audio_file_path) if print_zero: zero_duration = [ l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.begin == l.end ] if len(zero_duration) > 0: self.print_warning(u"Fragments with zero duration:") for fragment in zero_duration: self.print_generic(u" %s" % fragment) if print_rates: self.print_info(u"Fragments with rates:") for fragment in task.sync_map.fragments_tree.vleaves_not_empty: self.print_generic(u" %s (rate: %.3f chars/s)" % (fragment, fragment.rate)) if print_faster_rate: max_rate = task.configuration["aba_rate_value"] if max_rate is not None: faster = [ l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.rate >= max_rate + Decimal("0.001") ] if len(faster) > 0: self.print_warning( u"Fragments with rate greater than %.3f:" % max_rate) for fragment in faster: self.print_generic(u" %s (rate: %.3f chars/s)" % (fragment, fragment.rate)) return self.NO_ERROR_EXIT_CODE