def execute(self): """ Execute the job, that is, execute all of its tasks. Each produced sync map will be stored inside the corresponding task object. :raises: :class:`~aeneas.executejob.ExecuteJobExecutionError`: if there is a problem during the job execution """ self.log(u"Executing job") if self.job is None: self.log_exc(u"The job object is None", None, True, ExecuteJobExecutionError) if len(self.job) == 0: self.log_exc(u"The job has no tasks", None, True, ExecuteJobExecutionError) job_max_tasks = self.rconf[RuntimeConfiguration.JOB_MAX_TASKS] if (job_max_tasks > 0) and (len(self.job) > job_max_tasks): self.log_exc(u"The Job has %d Tasks, more than the maximum allowed (%d)." % (len(self.job), job_max_tasks), None, True, ExecuteJobExecutionError) self.log([u"Number of tasks: '%d'", len(self.job)]) for task in self.job.tasks: try: custom_id = task.configuration["custom_id"] self.log([u"Executing task '%s'...", custom_id]) executor = ExecuteTask(task, rconf=self.rconf, logger=self.logger) executor.execute() self.log([u"Executing task '%s'... done", custom_id]) except Exception as exc: self.log_exc(u"Error while executing task '%s'" % (custom_id), exc, True, ExecuteJobExecutionError) self.log(u"Executing task: succeeded") self.log(u"Executing job: succeeded")
def main(): if len(sys.argv) < 5: usage() return audio_file_path = sys.argv[1] text_file_path = sys.argv[2] config_string = sys.argv[3] sync_map_file_path = sys.argv[4] print "[INFO] Creating task..." task = Task(config_string) task.audio_file_path_absolute = audio_file_path task.text_file_path_absolute = text_file_path task.sync_map_file_path_absolute = sync_map_file_path print "[INFO] Creating task... done" print "[INFO] Executing task..." logger = Logger(tee=False) executor = ExecuteTask(task=task, logger=logger) result = executor.execute() print "[INFO] Executing task... done" if not result: print "[ERRO] An error occurred while executing the task" return print "[INFO] Creating output container..." path = task.output_sync_map_file() print "[INFO] Creating output container... done" if path != None: print "[INFO] Created %s" % path else: print "[ERRO] An error occurred while writing the output sync map file"
def test_execute(self): config_string = "task_language=en|os_task_file_format=smil|os_task_file_name=p001.smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric" task = Task(config_string) task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3" task.text_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.xhtml" logger = Logger(tee=True) executor = ExecuteTask(task, logger=logger) result = executor.execute() self.assertTrue(result) task.sync_map_file_path_absolute = "/tmp/p001.smil" path = task.output_sync_map_file() self.assertNotEqual(path, None)
def test_execute(self): config_string = "task_language=en|os_task_file_format=txt|os_task_file_name=output_head.txt|is_text_type=plain|is_audio_file_head_length=11.960|is_audio_file_process_length=31.640" task = Task(config_string) task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3" task.text_file_path_absolute = "../aeneas/tests/res/inputtext/sonnet_plain_head_length.txt" logger = Logger(tee=True) executor = ExecuteTask(task, logger=logger) result = executor.execute() self.assertTrue(result) task.sync_map_file_path_absolute = "/tmp/output_head_length.txt" path = task.output_sync_map_file() self.assertNotEqual(path, None)
def execute(self, config_string, audio_path, text_path): handler, tmp_path = gf.tmp_file() task = Task(config_string) task.audio_file_path_absolute = gf.absolute_path(audio_path, __file__) task.text_file_path_absolute = gf.absolute_path(text_path, __file__) executor = ExecuteTask(task) executor.execute() task.sync_map_file_path_absolute = tmp_path result_path = task.output_sync_map_file() self.assertIsNotNone(result_path) self.assertEqual(result_path, tmp_path) self.assertGreater(len(gf.read_file_bytes(result_path)), 0) gf.delete_file(handler, tmp_path)
def createSyncedLyricsFile(lyrics, file): global lyricsSynced, errors f = open("tempSync.txt", "w+") f.write(lyrics) f.close() config = TaskConfiguration() config[gc.PPN_TASK_LANGUAGE] = Language.FRA config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.AUDH task = Task() task.configuration = config try: task.audio_file_path_absolute = file task.text_file_path_absolute = "tempSync.txt" ExecuteTask(task).execute() syncedLyricsFile = open(file[:-4] + ".lrc", "w+") for fragment in task.sync_map_leaves(): syncedLyricsFile.write( str('[' + gf.time_to_hhmmssmmm(fragment.interval.begin, '.')[3:-1] + ']' + fragment.text + '\n')) syncedLyricsFile.close() print(" Sync Added", sep=' ', end='', flush=True) lyricsSynced += 1 except Exception as e : errors += 1 print(" Sync error", sep=' ', end='',flush=True)
def align(text_path, audio_path, align_out_path, word_align=True): # create Task object config_string = u"task_language=hi" config_string += "|os_task_file_format=json" rconf = None if word_align: config_string += "|os_task_file_levels=3" config_string += "|is_text_type=mplain" rconf = RuntimeConfiguration() rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH_L3] = True else: config_string += "|is_text_type=plain" task = Task(config_string=config_string) task.text_file_path_absolute = text_path task.audio_file_path_absolute = audio_path task.sync_map_file_path_absolute = align_out_path # process Task ExecuteTask(task, rconf=rconf).execute() # output sync map to file task.output_sync_map_file() # Remove annoying unicode characters with open(align_out_path, 'r', encoding='utf8') as f: alignment = json.load(f) with open(align_out_path, 'w', encoding='utf8') as f: json.dump(alignment, f, ensure_ascii=False, indent=2)
def get_align(audio_file, text_file, lang): if lang == 'FRA': config[gc.PPN_TASK_LANGUAGE] = Language.FRA elif lang == 'ARA': config[gc.PPN_TASK_LANGUAGE] = Language.ARA elif lang == 'DEU': config[gc.PPN_TASK_LANGUAGE] = Language.DEU elif lang == 'CMN': config[gc.PPN_TASK_LANGUAGE] = Language.CMN else: config[gc.PPN_TASK_LANGUAGE] = Language.ENG task.configuration = config task.audio_file_path_absolute = audio_file task.text_file_path_absolute = text_file # process Task ExecuteTask(task).execute() # print produced sync map result = {'alignment': [], 'uri': ''} for fragment in task.sync_map_leaves(): if fragment.text == '': continue text = fragment.text st_time = fragment.pretty_print.split('\t')[1] ed_time = fragment.pretty_print.split('\t')[2] result['alignment'].append({ 'sentence': text, 'time': [float(st_time), float(ed_time)] }) # print(result) return result
def total_FA(soundfile, mylines, myhead, mytail, config=None): """Runs Aeneas as a library. This function isn't in use, currently, as we haven't managed to get reliable results in this way.""" # create Task object if config is None: config_string = ( u"task_language=nor|is_text_type=plain|os_task_file_format=json|is_audio_file_head_length=%s|is_audio_file_tail_length=%s" % (myhead, mytail)) print(config_string) else: config_string = ( u"task_language=nor|is_text_type=plain|os_task_file_format=json|is_audio_file_head_length=%s|is_audio_file_tail_length=%s|%s" % (myhead, mytail, config)) print(config_string) task = Task(config_string=config_string) print(task) task.audio_file_path_absolute = soundfile textfile = TextFile() print(textfile) #task.sync_map_file_path_absolute = outfile for identifier, frag_text in mylines: textfile.add_fragment( TextFragment(identifier, Language.NOR, frag_text, frag_text)) task.text_file = textfile print(len(task.text_file)) ExecuteTask(task).execute() syncmaplist = task.sync_map.fragments return syncmaplist
def force_align(audio_path, text_path, output_path, min_length=1.0, max_length=10.0, logging=logging): sentences = [] task = Task( config_string= u"task_language=eng|is_text_type=plain|os_task_file_format=json") task.audio_file_path_absolute = audio_path task.text_file_path_absolute = text_path task.sync_map_file_path_absolute = output_path logging.info("Aligning audio and text...") ExecuteTask(task).execute() logging.info("Aligned audio and text") for fragment in task.sync_map_leaves(): if fragment.length > min_length and fragment.length < max_length and fragment.text: sentences.append({ "start": float(fragment.begin), "end": float(fragment.end), "length": float(fragment.length), "text": fragment.text, }) with open(output_path, "w") as f: json.dump(sentences, f, indent=4)
def align_aeneas(): for PID in config.PIDs: audio_wc = os.path.join(PID, config.narrations_subfolder, "*.m4a") audio_file_list = glob(audio_wc) if len(audio_file_list) == 0: audio_wc = os.path.join(PID, config.narrations_subfolder, "*.mp3") audio_file_list = glob(audio_wc) audio_file_list.sort() for a in audio_file_list: file_id = os.path.split(a)[1][0:6] t = os.path.join(config.aeneas_transcript_dir, file_id + ".txt") out_file = os.path.join(config.aeneas_output_dir, file_id + ".sbv") print "aligning " + a, t if not os.path.isfile(t): print t + " not available so will not be processed. Was it a missed ground truth file?" continue # create Task object config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=sbv" task = Task(config_string=config_string) task.audio_file_path_absolute = a task.text_file_path_absolute = t task.sync_map_file_path_absolute = out_file # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def align_audio( language: LanguageEnum = Form(...), text_file_format: TextFileFormatEnum = Form(...), transcript: UploadFile = File(...), audio: UploadFile = File(...), ): try: # prepare config aeneas_config = TaskConfiguration() aeneas_config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = text_file_format aeneas_config[gc.PPN_TASK_LANGUAGE] = language # get named temporary files tmp_audio = convert_to_tempfile(audio) tmp_transcript = convert_to_tempfile(transcript) # create task task = Task() task.configuration = aeneas_config task.audio_file_path_absolute = Path(tmp_audio.name) task.text_file_path_absolute = Path(tmp_transcript.name) # process Task ExecuteTask(task).execute() tmp_audio.close() tmp_transcript.close() return [(str(fragment.begin), str(fragment.end), fragment.text) for fragment in task.sync_map_leaves() if fragment.is_regular] except Exception as e: raise HTTPException(status_code=500, detail="Error during processing: " + str(e)) from e
def align_files_in_place(data: InputDataFiles): try: # prepare config aeneas_config = TaskConfiguration() aeneas_config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = data.text_file_format aeneas_config[gc.PPN_TASK_LANGUAGE] = data.language # create task task = Task() task.configuration = aeneas_config task.audio_file_path_absolute = Path(data.audio_filename) task.text_file_path_absolute = Path(data.transcript_filename) # process Task ExecuteTask(task).execute() with open(data.alignment_filename, "w") as f: f.write( orjson.dumps([(str(fragment.begin), str(fragment.end), fragment.text) for fragment in task.sync_map_leaves() if fragment.is_regular]).decode()) except Exception as e: raise HTTPException(status_code=500, detail="Error during processing: " + str(e)) from e
def force_align(): WORK_DIR = os.path.abspath("data") conf = "task_language=rus|is_text_type=plain|os_task_file_format=json" task = Task(config_string=conf) task.audio_file_path_absolute = f"{WORK_DIR}/audio.wav" task.text_file_path_absolute = f"{WORK_DIR}/transcript.txt" ExecuteTask(task).execute() return json.loads(task.sync_map.json_string)["fragments"]
def transcribe(self, audio, transcript, output): config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = audio #get_path(self.audio_folder, self.audio_prefix, filename, "mp3") task.text_file_path_absolute = transcript #get_path(self.transcript_processed_folder, self.transcript_processed_prefix, filename, "txt") task.sync_map_file_path_absolute = output #get_path(self.output_folder, self.output_prefix, filename, "json") print("Processing task...\n") # process Task ExecuteTask(task).execute() print(f"Taks processed. Writing output to {output}") # output sync map to file task.output_sync_map_file()
def execute(self): """ Execute the job, that is, execute all of its tasks. Each produced sync map will be stored inside the corresponding task object. :raises: :class:`~aeneas.executejob.ExecuteJobExecutionError`: if there is a problem during the job execution """ self.log(u"Executing job") if self.job is None: self.log_exc(u"The job object is None", None, True, ExecuteJobExecutionError) if len(self.job) == 0: self.log_exc(u"The job has no tasks", None, True, ExecuteJobExecutionError) job_max_tasks = self.rconf[RuntimeConfiguration.JOB_MAX_TASKS] if (job_max_tasks > 0) and (len(self.job) > job_max_tasks): self.log_exc( u"The Job has %d Tasks, more than the maximum allowed (%d)." % (len(self.job), job_max_tasks), None, True, ExecuteJobExecutionError) self.log([u"Number of tasks: '%d'", len(self.job)]) for task in self.job.tasks: try: custom_id = task.configuration["custom_id"] self.log([u"Executing task '%s'...", custom_id]) executor = ExecuteTask(task, rconf=self.rconf, logger=self.logger) executor.execute() self.log([u"Executing task '%s'... done", custom_id]) except Exception as exc: self.log_exc(u"Error while executing task '%s'" % (custom_id), exc, True, ExecuteJobExecutionError) self.log(u"Executing task: succeeded") self.log(u"Executing job: succeeded")
def align_audio_and_text(self, file_path): config = TaskConfiguration() config[gc.PPN_TASK_LANGUAGE] = Language.PAN config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.JSON task = Task() task.configuration = config task.audio_file_path_absolute = self.audio_file_path task.text_file_path_absolute = self.txt_file_path task.sync_map_file_path_absolute = file_path ExecuteTask(task).execute() task.output_sync_map_file()
def process_aeneas_map(filepath_, format_): # create Task object config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = filepath_ + '.' + format_ task.text_file_path_absolute = filepath_ + ".txt" task.sync_map_file_path_absolute = filepath_ + ".json" # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def process_aeneas(txt_filename, wav_filename, csv_filename): # create Task object config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=csv" task = Task(config_string=config_string) task.audio_file_path_absolute = wav_filename task.text_file_path_absolute = txt_filename task.sync_map_file_path_absolute = csv_filename # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def execute(self): """ Execute the job, that is, execute all of its tasks. Each produced sync map will be stored inside the corresponding task object. Return ``True`` if the execution succeeded, ``False`` otherwise. :rtype: bool """ self._log("Executing job") # check if the job has tasks if self.job is None: self._log("job is None") return False if len(self.job) == 0: self._log("The job has no tasks") return False self._log(["Number of tasks: '%d'", len(self.job)]) # execute tasks for task in self.job.tasks: custom_id = task.configuration.custom_id self._log(["Executing task '%s'...", custom_id]) executor = ExecuteTask(task, logger=self.logger) result = executor.execute() self._log(["Executing task '%s'... done", custom_id]) if not result: self._log("Executing task: failed") return False self._log("Executing task: succeeded") # return self._log("Executing job: succeeded") return True
def execute(self): """ Execute the job, that is, execute all of its tasks. Each produced sync map will be stored inside the corresponding task object. Return ``True`` if the execution succeeded, ``False`` otherwise. :rtype: bool """ self._log("Executing job") # check if the job has tasks if self.job == None: self._log("job is None") return False if len(self.job) == 0: self._log("The job has no tasks") return False self._log("Number of tasks: '%s'" % len(self.job)) # execute tasks for task in self.job.tasks: custom_id = task.configuration.custom_id self._log("Executing task '%s'..." % custom_id) executor = ExecuteTask(task, logger=self.logger) result = executor.execute() self._log("Executing task '%s'... done" % custom_id) if not result: self._log("Executing task: failed") return False self._log("Executing task: succeeded") # return self._log("Executing job: succeeded") return True
def align(audio_file_path, text_file_path, syncmap_file_path): # create Task object config_string = "task_language=zh|is_text_type=plain|os_task_file_format=txt" task = Task(config_string=config_string) task.audio_file_path_absolute = audio_file_path task.text_file_path_absolute = text_file_path task.sync_map_file_path_absolute = syncmap_file_path # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file() return True
def run_aeneas_task(audio_filepath, lyrics_filepath, output_filepath): config_string = "task_language=eng|os_task_file_format=json|is_text_type=plain" task = Task(config_string=config_string) task.audio_file_path_absolute = audio_filepath task.text_file_path_absolute = lyrics_filepath task.sync_map_file_path_absolute = output_filepath # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def get_alignment(path_to_audio_file: str, transcript: List[str], force=False, language='fr_FR') -> List[dict]: # see https://github.com/readbeyond/aeneas/blob/9d95535ad63eef4a98530cfdff033b8c35315ee1/aeneas/ttswrappers/espeakngttswrapper.py#L45 # noqa language = { 'fr_FR': 'fra', 'en_US': 'eng', }[language] full_transcript = '\t'.join(transcript) full_transcript_hash = sha1(full_transcript.encode()).hexdigest() path_to_transcript = os.path.join(CACHE_DIR, f'{full_transcript_hash}.txt') with open(path_to_audio_file, 'rb') as f: audio_file_hash = hash_file(f) with open(path_to_transcript, 'w') as f: f.writelines('\n'.join(transcript)) path_to_alignment_tmp = os.path.join( CACHE_DIR, f'{full_transcript_hash}_{audio_file_hash}.json') if force or not os.path.isfile(path_to_alignment_tmp): # build alignment task = Task( f'task_language={language}|os_task_file_format=json|is_text_type=plain' ) task.audio_file_path_absolute = os.path.abspath(path_to_audio_file) task.text_file_path_absolute = path_to_transcript task.sync_map_file_path_absolute = path_to_alignment_tmp executor = ExecuteTask(task=task) executor.execute() task.output_sync_map_file() with open(path_to_alignment_tmp) as source: return [cleanup_fragment(f) for f in json.load(source)['fragments']]
def sync_map_generator(song_name): # create Task object print('Creating sync map file...') config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Songs/" + song_name +".wav" task.text_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Lyrics/" + song_name +".txt" task.sync_map_file_path_absolute = u"output/"+ song_name + ".json" # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file() print('Created sync map file')
def executeAeneas(text_path, audio_path): audio_name, _ = os.path.splitext(audio_path) # create Task object config_string = u'task_language=tur|is_text_type=plain|os_task_file_format=json' task = Task(config_string=config_string) task.audio_file_path_absolute = audio_path task.text_file_path_absolute = text_path task.sync_map_file_path_absolute = audio_name + "_syncmap.json" # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def generate_epub(pub_id): # create Task object config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = u"./test/audio.mp3" task.text_file_path_absolute = u"./test/sonnet_plain.txt" task.sync_map_file_path_absolute = u"./test/output.json" # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file() with open(task.sync_map_file_path_absolute, 'r') as f: read_data = f.read() return read_data #task.sync_map_file_path_absolute
def force_align(book_name, chapter_index): print("Aligning chapter {:d}".format(chapter_index)) # create Task objects task = Task(config_string=properties.aeneas_configuration_string) task.audio_file_path_absolute = fu.build_audio_path( book_name, chapter_index) task.text_file_path_absolute = fu.build_valid_text_path( book_name, chapter_index) task.sync_map_file_path_absolute = fu.build_syncmap_path( book_name, chapter_index) # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def chopsounds(): # create Task object config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = inputsound task.text_file_path_absolute = inputtext # process Task ExecuteTask(task).execute() # Carve wav file into fragments sound = AudioSegment.from_wav(inputsound) for fragment in task.sync_map_leaves(): if fragment.length > 0.0: fsound = sound[float(fragment.begin) * 1000:float(fragment.end) * 1000] fsound.export(outputdir + "/" + fragment.identifier + ".wav", format="wav")
def make_subs(wav_path, txt_path, srt_path, start): """Gets the subtitles with the correct timing based on the wav file""" config_string = "task_language=eng|is_text_type=plain|os_task_file_format=srt" task = Task(config_string=config_string) task.audio_file_path_absolute = wav_path task.text_file_path_absolute = txt_path task.sync_map_file_path_absolute = srt_path ExecuteTask(task).execute() task.output_sync_map_file() subs = pysrt.open(srt_path) subs.shift(seconds=start) return subs
def create_aeneas_json_file(audio_path, text_path, output_path): """ Use the api aeneas to synchronize audio and text. Parameters: audio_path (str): audio filepath. text_path (str): text filepath. output_path (str): output json filepath. Returns: Boolean: True or False. """ try: # create Task object config_string = u"task_language=por|is_text_type=plain|os_task_file_format=json|task_adjust_boundary_percent_value=50|mfcc_mask_nonspeech_l2=True" task = Task(config_string=config_string) task.audio_file_path_absolute = u"{}".format(audio_path) task.text_file_path_absolute = u"{}".format(text_path) task.sync_map_file_path_absolute = u"{}".format(output_path) # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file() except KeyboardInterrupt: print("KeyboardInterrupt Detected!") exit() except: exc_type, exc_obj, exc_tb = sys.exc_info() exc_file = split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, exc_file, exc_tb.tb_lineno) return False return True
def main(): """Main entry point""" parser = argparse.ArgumentParser(prog="librivox_align.py") parser.add_argument("book_yml", help="YAML file with book details") args = parser.parse_args() logging.basicConfig(level=logging.DEBUG) _LOGGER.debug(args) args.book_yml = Path(args.book_yml) input_dir = args.book_yml.parent with open(args.book_yml, "r") as book_file: book = yaml.safe_load(book_file) # Load gruut language gruut_lang = gruut.Language.load(book["gruut"]["language"]) assert gruut_lang, "Unsupported language" language = book["aeneas"]["language"] # Load book text text_path = Path(input_dir / book["text"]["file"]) _LOGGER.debug("Loading book text from %s", text_path) with open(text_path, "r") as text_file: text = text_file.readlines() # Process MP3 files for mp3_name, mp3_info in book["audio"].items(): mp3_path = input_dir / mp3_name sync_path = mp3_path.with_suffix(".json") config_string = f"task_language={language}|is_text_type=plain|os_task_file_format=json|task_adjust_boundary_no_zero=True" start_time = float(mp3_info.get("start_time", 0)) if start_time > 0: # Skip seconds at the beginning config_string += f"|is_audio_file_head_length={start_time}" end_time = float(mp3_info.get("end_time", 0)) if end_time < 0: # Skip seconds at the end end_time = abs(end_time) config_string += f"|is_audio_file_tail_length={end_time}" elif end_time > 0: # Set length of audio config_string += f"|is_audio_file_process_length={end_time}" task = Task(config_string=config_string) task.audio_file_path_absolute = mp3_path.absolute() task.sync_map_file_path_absolute = sync_path.absolute() mp3_text_path = mp3_path.with_suffix(".txt") with open(mp3_text_path, mode="w+") as mp3_text_file: start_line, end_line = mp3_info["start_line"], mp3_info["end_line"] # Clean up newlines in text mp3_text = "" for line_index in range(start_line - 1, end_line): mp3_text += text[line_index].strip() + "\n" # Run through gruut tokenizer to expand abbreviations, numbers, etc. raw_text_path = mp3_path.with_suffix(".raw.txt") with open(raw_text_path, "w") as raw_text_file: for sentence in gruut_lang.tokenizer.tokenize(mp3_text): clean_text = " ".join(sentence.clean_words) # Each sentence in on a line now print(clean_text, file=mp3_text_file) print(sentence.raw_text, file=raw_text_file) mp3_text_file.seek(0) task.text_file_path_absolute = mp3_text_file.name # Generate sync map JSON file _LOGGER.debug("Generating %s (%s)", sync_path, mp3_path) ExecuteTask(task).execute() task.output_sync_map_file()
def retrieve_keyword_audio(vid, keyword): audio_index = 0 v_url = URL_TEMPLATE.format(vid) youtube = YouTube(v_url) y_len = youtube.player_config_args['player_response']['videoDetails']['lengthSeconds'] print("Length :",y_len) print("Views :",youtube.views) if int(y_len) > 2700: # only consider video < 45 mins return audio_index print("="*40) caption = youtube.captions.get_by_language_code('ko') if caption: print("caption==ko") # retrieve audio from video youtube.streams.first().download(output_path=TEMP_DIR, filename=vid) temp_file_name = TEMP_DIR+vid if not os.path.isfile(temp_file_name + ".mp4"): return audio_index time.sleep(1) # need to wait before ffmpeg takes in as input file cmd = FFMPEG_TEMPLATE.format(temp_file_name).split() subprocess.check_output(cmd) audio = librosa.core.load(temp_file_name+".wav", 16000)[0] os.remove(temp_file_name + ".mp4") os.remove(temp_file_name + ".wav") formatted_vid = vid.replace('_', '-') cc_arr = caption.generate_srt_captions().split('\n\n') for captions in cc_arr: cc_split = captions.split('\n') if len(cc_split) == 4 and cc_split[0] == '': cc_split = (cc_split[1], cc_split[2], cc_split[3]) elif len(cc_split) != 3: continue _, cc_time, cc_text = cc_split cc_text = TAG_CLEANER.sub('', cc_text) # clean up punctuation cc_text = cc_text.translate(TRANSPLATOR) cc_text = cc_text.lower() words = cc_text.strip().split() # steming words if keyword not in words and keyword + "s" not in words and keyword + "es" not in words: continue aligner_task = Task(config_string=ALIGNER_CONFIG_STRING) # prepare label file for forced aligner label_file = temp_file_name + "_" + keyword + ".txt" with open(label_file, "w+") as file: for word in words: file.write(word+"\n") # prepare audio file for forced aligner match_result = SRT_TIME_PARSER.match(cc_time) if match_result: start_time_ms = srt_time_to_ms( match_result.group(1), match_result.group(2), match_result.group(3), match_result.group(4)) stop_time_ms = srt_time_to_ms( match_result.group(5), match_result.group(6), match_result.group(7), match_result.group(8)) start_pos = start_time_ms * 16 stop_pos = stop_time_ms * 16 block = audio[start_pos:stop_pos] # *16 since 16 samples are captured per each ms # temporary audio file for forced aligner audio_file = temp_file_name + "_" + keyword + ".wav" librosa.output.write_wav(audio_file, block, 16000) time.sleep(1) # buffer for writing wav file else: print(TEXT_COLOUR['FAIL'] + "failed pasing srt time : " + cc_time + TEXT_COLOUR['ENDC']) raise Exception('srt time fail error') aligner_task.text_file_path_absolute = label_file aligner_task.audio_file_path_absolute = audio_file # process aligning task ExecuteTask(aligner_task).execute() for fragment in aligner_task.sync_map_leaves(): if fragment.is_regular and keyword in fragment.text and fragment.length < 0.9: begin = int(fragment.begin * 16000) end = int(fragment.end * 16000) keyword_audio = pad_and_center_align(block[begin:end], 16000) file_name = keyword+"_"+str(audio_index)+".wav" librosa.output.write_wav( DATA_DIR + "/" + keyword + "/" + file_name, keyword_audio, 16000) audio_index += 1 return audio_index
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 1: return self.print_help() if self.has_option([u"-e", u"--examples"]): return self.print_examples(False) if self.has_option(u"--examples-all"): return self.print_examples(True) if self.has_option([u"--list-parameters"]): return self.print_parameters() parameter = self.has_option_with_value(u"--list-values") if parameter is not None: return self.print_values(parameter) elif self.has_option(u"--list-values"): return self.print_values(u"?") # NOTE list() is needed for Python3, where keys() is not a list! demo = self.has_option(list(self.DEMOS.keys())) demo_parameters = u"" download_from_youtube = self.has_option([u"-y", u"--youtube"]) largest_audio = self.has_option(u"--largest-audio") keep_audio = self.has_option(u"--keep-audio") output_html = self.has_option(u"--output-html") validate = not self.has_option(u"--skip-validator") print_faster_rate = self.has_option(u"--faster-rate") print_rates = self.has_option(u"--rates") print_zero = self.has_option(u"--zero") if demo: validate = False for key in self.DEMOS: if self.has_option(key): demo_parameters = self.DEMOS[key] audio_file_path = demo_parameters[u"audio"] text_file_path = demo_parameters[u"text"] config_string = demo_parameters[u"config"] sync_map_file_path = demo_parameters[u"syncmap"] # TODO allow injecting rconf options directly from DEMOS options field if key == u"--example-cewsubprocess": self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True elif key == u"--example-ctw-espeak": self.rconf[RuntimeConfiguration.TTS] = "custom" self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_ESPEAK elif key == u"--example-ctw-speect": self.rconf[RuntimeConfiguration.TTS] = "custom" self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_SPEECT elif key == u"--example-festival": self.rconf[RuntimeConfiguration.TTS] = "festival" self.rconf[RuntimeConfiguration.TTS_PATH] = "text2wave" elif key == u"--example-mws": self.rconf[RuntimeConfiguration.MFCC_WINDOW_LENGTH] = "1.500" self.rconf[RuntimeConfiguration.MFCC_WINDOW_SHIFT] = "0.500" elif key == u"--example-faster-rate": print_faster_rate = True elif key == u"--example-no-zero": print_zero = True elif key == u"--example-py": self.rconf[RuntimeConfiguration.C_EXTENSIONS] = False elif key == u"--example-rates": print_rates = True elif key == u"--example-youtube": download_from_youtube = True break else: if len(self.actual_arguments) < 4: return self.print_help() audio_file_path = self.actual_arguments[0] text_file_path = self.actual_arguments[1] config_string = self.actual_arguments[2] sync_map_file_path = self.actual_arguments[3] html_file_path = None if output_html: keep_audio = True html_file_path = sync_map_file_path + u".html" if download_from_youtube: youtube_url = audio_file_path if (not download_from_youtube) and (not self.check_input_file(audio_file_path)): return self.ERROR_EXIT_CODE if not self.check_input_file(text_file_path): return self.ERROR_EXIT_CODE if not self.check_output_file(sync_map_file_path): return self.ERROR_EXIT_CODE if (html_file_path is not None) and (not self.check_output_file(html_file_path)): return self.ERROR_EXIT_CODE self.check_c_extensions() if demo: msg = [] msg.append(u"Running example task with arguments:") if download_from_youtube: msg.append(u" YouTube URL: %s" % youtube_url) else: msg.append(u" Audio file: %s" % audio_file_path) msg.append(u" Text file: %s" % text_file_path) msg.append(u" Config string: %s" % config_string) msg.append(u" Sync map file: %s" % sync_map_file_path) if len(demo_parameters[u"options"]) > 0: msg.append(u" Options: %s" % demo_parameters[u"options"]) self.print_info(u"\n".join(msg)) if validate: self.print_info(u"Validating config string (specify --skip-validator to bypass)...") validator = Validator(logger=self.logger) result = validator.check_configuration_string(config_string, is_job=False, external_name=True) if not result.passed: self.print_error(u"The given config string is not valid:") self.print_generic(result.pretty_print()) return self.ERROR_EXIT_CODE self.print_info(u"Validating config string... done") if download_from_youtube: try: self.print_info(u"Downloading audio from '%s' ..." % youtube_url) downloader = Downloader(logger=self.logger) audio_file_path = downloader.audio_from_youtube( youtube_url, download=True, output_file_path=None, largest_audio=largest_audio ) self.print_info(u"Downloading audio from '%s' ... done" % youtube_url) except ImportError: self.print_no_pafy_error() return self.ERROR_EXIT_CODE except Exception as exc: self.print_error(u"An unexpected error occurred while downloading audio from YouTube:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Creating task...") task = Task(config_string, logger=self.logger) task.audio_file_path_absolute = audio_file_path task.text_file_path_absolute = text_file_path task.sync_map_file_path_absolute = sync_map_file_path self.print_info(u"Creating task... done") except Exception as exc: self.print_error(u"An unexpected error occurred while creating the task:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Executing task...") executor = ExecuteTask(task=task, rconf=self.rconf, logger=self.logger) executor.execute() self.print_info(u"Executing task... done") except Exception as exc: self.print_error(u"An unexpected error occurred while executing the task:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Creating output sync map file...") path = task.output_sync_map_file() self.print_info(u"Creating output sync map file... done") self.print_success(u"Created file '%s'" % path) except Exception as exc: self.print_error(u"An unexpected error occurred while writing the sync map file:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE if output_html: try: parameters = {} parameters[gc.PPN_TASK_OS_FILE_FORMAT] = task.configuration["o_format"] parameters[gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] = task.configuration["o_smil_audio_ref"] parameters[gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] = task.configuration["o_smil_page_ref"] self.print_info(u"Creating output HTML file...") task.sync_map.output_html_for_tuning(audio_file_path, html_file_path, parameters) self.print_info(u"Creating output HTML file... done") self.print_success(u"Created file '%s'" % html_file_path) except Exception as exc: self.print_error(u"An unexpected error occurred while writing the HTML file:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE if download_from_youtube: if keep_audio: self.print_info(u"Option --keep-audio set: keeping downloaded file '%s'" % audio_file_path) else: gf.delete_file(None, audio_file_path) if print_zero: zero_duration = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.begin == l.end] if len(zero_duration) > 0: self.print_warning(u"Fragments with zero duration:") for fragment in zero_duration: self.print_generic(u" %s" % fragment) if print_rates: self.print_info(u"Fragments with rates:") for fragment in task.sync_map.fragments_tree.vleaves_not_empty: self.print_generic(u" %s (rate: %.3f chars/s)" % (fragment, fragment.rate)) if print_faster_rate: max_rate = task.configuration["aba_rate_value"] if max_rate is not None: faster = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.rate >= max_rate + Decimal("0.001")] if len(faster) > 0: self.print_warning(u"Fragments with rate greater than %.3f:" % max_rate) for fragment in faster: self.print_generic(u" %s (rate: %.3f chars/s)" % (fragment, fragment.rate)) return self.NO_ERROR_EXIT_CODE