def align(text_path, audio_path, align_out_path, word_align=True): # create Task object config_string = u"task_language=hi" config_string += "|os_task_file_format=json" rconf = None if word_align: config_string += "|os_task_file_levels=3" config_string += "|is_text_type=mplain" rconf = RuntimeConfiguration() rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH_L3] = True else: config_string += "|is_text_type=plain" task = Task(config_string=config_string) task.text_file_path_absolute = text_path task.audio_file_path_absolute = audio_path task.sync_map_file_path_absolute = align_out_path # process Task ExecuteTask(task, rconf=rconf).execute() # output sync map to file task.output_sync_map_file() # Remove annoying unicode characters with open(align_out_path, 'r', encoding='utf8') as f: alignment = json.load(f) with open(align_out_path, 'w', encoding='utf8') as f: json.dump(alignment, f, ensure_ascii=False, indent=2)
def align_files_in_place(data: InputDataFiles): try: # prepare config aeneas_config = TaskConfiguration() aeneas_config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = data.text_file_format aeneas_config[gc.PPN_TASK_LANGUAGE] = data.language # create task task = Task() task.configuration = aeneas_config task.audio_file_path_absolute = Path(data.audio_filename) task.text_file_path_absolute = Path(data.transcript_filename) # process Task ExecuteTask(task).execute() with open(data.alignment_filename, "w") as f: f.write( orjson.dumps([(str(fragment.begin), str(fragment.end), fragment.text) for fragment in task.sync_map_leaves() if fragment.is_regular]).decode()) except Exception as e: raise HTTPException(status_code=500, detail="Error during processing: " + str(e)) from e
def align_audio( language: LanguageEnum = Form(...), text_file_format: TextFileFormatEnum = Form(...), transcript: UploadFile = File(...), audio: UploadFile = File(...), ): try: # prepare config aeneas_config = TaskConfiguration() aeneas_config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = text_file_format aeneas_config[gc.PPN_TASK_LANGUAGE] = language # get named temporary files tmp_audio = convert_to_tempfile(audio) tmp_transcript = convert_to_tempfile(transcript) # create task task = Task() task.configuration = aeneas_config task.audio_file_path_absolute = Path(tmp_audio.name) task.text_file_path_absolute = Path(tmp_transcript.name) # process Task ExecuteTask(task).execute() tmp_audio.close() tmp_transcript.close() return [(str(fragment.begin), str(fragment.end), fragment.text) for fragment in task.sync_map_leaves() if fragment.is_regular] except Exception as e: raise HTTPException(status_code=500, detail="Error during processing: " + str(e)) from e
def align_aeneas(): for PID in config.PIDs: audio_wc = os.path.join(PID, config.narrations_subfolder, "*.m4a") audio_file_list = glob(audio_wc) if len(audio_file_list) == 0: audio_wc = os.path.join(PID, config.narrations_subfolder, "*.mp3") audio_file_list = glob(audio_wc) audio_file_list.sort() for a in audio_file_list: file_id = os.path.split(a)[1][0:6] t = os.path.join(config.aeneas_transcript_dir, file_id + ".txt") out_file = os.path.join(config.aeneas_output_dir, file_id + ".sbv") print "aligning " + a, t if not os.path.isfile(t): print t + " not available so will not be processed. Was it a missed ground truth file?" continue # create Task object config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=sbv" task = Task(config_string=config_string) task.audio_file_path_absolute = a task.text_file_path_absolute = t task.sync_map_file_path_absolute = out_file # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def createSyncedLyricsFile(lyrics, file): global lyricsSynced, errors f = open("tempSync.txt", "w+") f.write(lyrics) f.close() config = TaskConfiguration() config[gc.PPN_TASK_LANGUAGE] = Language.FRA config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.AUDH task = Task() task.configuration = config try: task.audio_file_path_absolute = file task.text_file_path_absolute = "tempSync.txt" ExecuteTask(task).execute() syncedLyricsFile = open(file[:-4] + ".lrc", "w+") for fragment in task.sync_map_leaves(): syncedLyricsFile.write( str('[' + gf.time_to_hhmmssmmm(fragment.interval.begin, '.')[3:-1] + ']' + fragment.text + '\n')) syncedLyricsFile.close() print(" Sync Added", sep=' ', end='', flush=True) lyricsSynced += 1 except Exception as e : errors += 1 print(" Sync error", sep=' ', end='',flush=True)
def main(): if len(sys.argv) < 5: usage() return audio_file_path = sys.argv[1] text_file_path = sys.argv[2] config_string = sys.argv[3] sync_map_file_path = sys.argv[4] print "[INFO] Creating task..." task = Task(config_string) task.audio_file_path_absolute = audio_file_path task.text_file_path_absolute = text_file_path task.sync_map_file_path_absolute = sync_map_file_path print "[INFO] Creating task... done" print "[INFO] Executing task..." logger = Logger(tee=False) executor = ExecuteTask(task=task, logger=logger) result = executor.execute() print "[INFO] Executing task... done" if not result: print "[ERRO] An error occurred while executing the task" return print "[INFO] Creating output container..." path = task.output_sync_map_file() print "[INFO] Creating output container... done" if path != None: print "[INFO] Created %s" % path else: print "[ERRO] An error occurred while writing the output sync map file"
def force_align(audio_path, text_path, output_path, min_length=1.0, max_length=10.0, logging=logging): sentences = [] task = Task( config_string= u"task_language=eng|is_text_type=plain|os_task_file_format=json") task.audio_file_path_absolute = audio_path task.text_file_path_absolute = text_path task.sync_map_file_path_absolute = output_path logging.info("Aligning audio and text...") ExecuteTask(task).execute() logging.info("Aligned audio and text") for fragment in task.sync_map_leaves(): if fragment.length > min_length and fragment.length < max_length and fragment.text: sentences.append({ "start": float(fragment.begin), "end": float(fragment.end), "length": float(fragment.length), "text": fragment.text, }) with open(output_path, "w") as f: json.dump(sentences, f, indent=4)
def test_set_text_file_path_absolute_05(self): task = Task() task.configuration = TaskConfiguration() task.configuration.language = Language.EN task.configuration.is_text_file_format = TextFileFormat.PARSED task.text_file_path_absolute = get_abs_path("res/inputtext/sonnet_parsed.txt") self.assertNotEqual(task.text_file, None) self.assertEqual(len(task.text_file), 15)
def test_set_text_file_path_absolute_03(self): task = Task() task.configuration = TaskConfiguration() task.configuration.language = Language.EN task.configuration.is_text_file_format = TextFileFormat.UNPARSED task.configuration.is_text_unparsed_class_regex = "ra" task.configuration.is_text_unparsed_id_sort = IDSortingAlgorithm.NUMERIC task.text_file_path_absolute = get_abs_path("res/inputtext/sonnet_unparsed_class_id.xhtml") self.assertNotEqual(task.text_file, None) self.assertEqual(len(task.text_file), 15)
def force_align(): WORK_DIR = os.path.abspath("data") conf = "task_language=rus|is_text_type=plain|os_task_file_format=json" task = Task(config_string=conf) task.audio_file_path_absolute = f"{WORK_DIR}/audio.wav" task.text_file_path_absolute = f"{WORK_DIR}/transcript.txt" ExecuteTask(task).execute() return json.loads(task.sync_map.json_string)["fragments"]
def test_execute(self): config_string = "task_language=en|os_task_file_format=txt|os_task_file_name=output_head.txt|is_text_type=plain|is_audio_file_head_length=11.960|is_audio_file_process_length=31.640" task = Task(config_string) task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3" task.text_file_path_absolute = "../aeneas/tests/res/inputtext/sonnet_plain_head_length.txt" logger = Logger(tee=True) executor = ExecuteTask(task, logger=logger) result = executor.execute() self.assertTrue(result) task.sync_map_file_path_absolute = "/tmp/output_head_length.txt" path = task.output_sync_map_file() self.assertNotEqual(path, None)
def test_execute(self): config_string = "task_language=en|os_task_file_format=smil|os_task_file_name=p001.smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric" task = Task(config_string) task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3" task.text_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.xhtml" logger = Logger(tee=True) executor = ExecuteTask(task, logger=logger) result = executor.execute() self.assertTrue(result) task.sync_map_file_path_absolute = "/tmp/p001.smil" path = task.output_sync_map_file() self.assertNotEqual(path, None)
def transcribe(self, audio, transcript, output): config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = audio #get_path(self.audio_folder, self.audio_prefix, filename, "mp3") task.text_file_path_absolute = transcript #get_path(self.transcript_processed_folder, self.transcript_processed_prefix, filename, "txt") task.sync_map_file_path_absolute = output #get_path(self.output_folder, self.output_prefix, filename, "json") print("Processing task...\n") # process Task ExecuteTask(task).execute() print(f"Taks processed. Writing output to {output}") # output sync map to file task.output_sync_map_file()
def align_audio_and_text(self, file_path): config = TaskConfiguration() config[gc.PPN_TASK_LANGUAGE] = Language.PAN config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.JSON task = Task() task.configuration = config task.audio_file_path_absolute = self.audio_file_path task.text_file_path_absolute = self.txt_file_path task.sync_map_file_path_absolute = file_path ExecuteTask(task).execute() task.output_sync_map_file()
def process_aeneas(txt_filename, wav_filename, csv_filename): # create Task object config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=csv" task = Task(config_string=config_string) task.audio_file_path_absolute = wav_filename task.text_file_path_absolute = txt_filename task.sync_map_file_path_absolute = csv_filename # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def process_aeneas_map(filepath_, format_): # create Task object config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = filepath_ + '.' + format_ task.text_file_path_absolute = filepath_ + ".txt" task.sync_map_file_path_absolute = filepath_ + ".json" # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def execute(self, config_string, audio_path, text_path): handler, tmp_path = gf.tmp_file() task = Task(config_string) task.audio_file_path_absolute = gf.absolute_path(audio_path, __file__) task.text_file_path_absolute = gf.absolute_path(text_path, __file__) executor = ExecuteTask(task) executor.execute() task.sync_map_file_path_absolute = tmp_path result_path = task.output_sync_map_file() self.assertIsNotNone(result_path) self.assertEqual(result_path, tmp_path) self.assertGreater(len(gf.read_file_bytes(result_path)), 0) gf.delete_file(handler, tmp_path)
def set_text_file(self, path, fmt, expected, id_regex=None, class_regex=None, id_sort=None): task = Task() task.configuration = TaskConfiguration() task.configuration.language = Language.EN task.configuration.is_text_file_format = fmt if id_regex is not None: task.configuration.is_text_unparsed_id_regex = id_regex if class_regex is not None: task.configuration.is_text_unparsed_class_regex = class_regex if id_sort is not None: task.configuration.is_text_unparsed_id_sort = id_sort task.text_file_path_absolute = get_abs_path(path) self.assertNotEqual(task.text_file, None) self.assertEqual(len(task.text_file), expected)
def set_text_file(self, path, fmt, expected, id_regex=None, class_regex=None, id_sort=None): task = Task() task.configuration = TaskConfiguration() task.configuration["language"] = Language.ENG task.configuration["i_t_format"] = fmt if class_regex is not None: task.configuration["i_t_unparsed_class_regex"] = class_regex if id_regex is not None: task.configuration["i_t_unparsed_id_regex"] = id_regex if id_sort is not None: task.configuration["i_t_unparsed_id_sort"] = id_sort task.text_file_path_absolute = gf.absolute_path(path, __file__) self.assertIsNotNone(task.text_file) self.assertEqual(len(task.text_file), expected)
def run_aeneas_task(audio_filepath, lyrics_filepath, output_filepath): config_string = "task_language=eng|os_task_file_format=json|is_text_type=plain" task = Task(config_string=config_string) task.audio_file_path_absolute = audio_filepath task.text_file_path_absolute = lyrics_filepath task.sync_map_file_path_absolute = output_filepath # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def align(audio_file_path, text_file_path, syncmap_file_path): # create Task object config_string = "task_language=zh|is_text_type=plain|os_task_file_format=txt" task = Task(config_string=config_string) task.audio_file_path_absolute = audio_file_path task.text_file_path_absolute = text_file_path task.sync_map_file_path_absolute = syncmap_file_path # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file() return True
def sync_map_generator(song_name): # create Task object print('Creating sync map file...') config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Songs/" + song_name +".wav" task.text_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Lyrics/" + song_name +".txt" task.sync_map_file_path_absolute = u"output/"+ song_name + ".json" # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file() print('Created sync map file')
def executeAeneas(text_path, audio_path): audio_name, _ = os.path.splitext(audio_path) # create Task object config_string = u'task_language=tur|is_text_type=plain|os_task_file_format=json' task = Task(config_string=config_string) task.audio_file_path_absolute = audio_path task.text_file_path_absolute = text_path task.sync_map_file_path_absolute = audio_name + "_syncmap.json" # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def generate_epub(pub_id): # create Task object config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = u"./test/audio.mp3" task.text_file_path_absolute = u"./test/sonnet_plain.txt" task.sync_map_file_path_absolute = u"./test/output.json" # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file() with open(task.sync_map_file_path_absolute, 'r') as f: read_data = f.read() return read_data #task.sync_map_file_path_absolute
def force_align(book_name, chapter_index): print("Aligning chapter {:d}".format(chapter_index)) # create Task objects task = Task(config_string=properties.aeneas_configuration_string) task.audio_file_path_absolute = fu.build_audio_path( book_name, chapter_index) task.text_file_path_absolute = fu.build_valid_text_path( book_name, chapter_index) task.sync_map_file_path_absolute = fu.build_syncmap_path( book_name, chapter_index) # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file()
def make_subs(wav_path, txt_path, srt_path, start): """Gets the subtitles with the correct timing based on the wav file""" config_string = "task_language=eng|is_text_type=plain|os_task_file_format=srt" task = Task(config_string=config_string) task.audio_file_path_absolute = wav_path task.text_file_path_absolute = txt_path task.sync_map_file_path_absolute = srt_path ExecuteTask(task).execute() task.output_sync_map_file() subs = pysrt.open(srt_path) subs.shift(seconds=start) return subs
def chopsounds(): # create Task object config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = inputsound task.text_file_path_absolute = inputtext # process Task ExecuteTask(task).execute() # Carve wav file into fragments sound = AudioSegment.from_wav(inputsound) for fragment in task.sync_map_leaves(): if fragment.length > 0.0: fsound = sound[float(fragment.begin) * 1000:float(fragment.end) * 1000] fsound.export(outputdir + "/" + fragment.identifier + ".wav", format="wav")
def create_aeneas_json_file(audio_path, text_path, output_path): """ Use the api aeneas to synchronize audio and text. Parameters: audio_path (str): audio filepath. text_path (str): text filepath. output_path (str): output json filepath. Returns: Boolean: True or False. """ try: # create Task object config_string = u"task_language=por|is_text_type=plain|os_task_file_format=json|task_adjust_boundary_percent_value=50|mfcc_mask_nonspeech_l2=True" task = Task(config_string=config_string) task.audio_file_path_absolute = u"{}".format(audio_path) task.text_file_path_absolute = u"{}".format(text_path) task.sync_map_file_path_absolute = u"{}".format(output_path) # process Task ExecuteTask(task).execute() # output sync map to file task.output_sync_map_file() except KeyboardInterrupt: print("KeyboardInterrupt Detected!") exit() except: exc_type, exc_obj, exc_tb = sys.exc_info() exc_file = split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, exc_file, exc_tb.tb_lineno) return False return True
def get_alignments(): config = u'task_language=eng|is_text_type=plain|os_task_file_format=json' for address, youtube_id in text_audio_pairs.iteritems(): # download youtube video print D.audio_from_youtube( 'https://youtu.be/' + youtube_id, output_file_path=u'alignment/audio/' + address + '.webm', preferred_format=u'webm' ) # designate text, audio, and syncmap files text = os.path.abspath(u'alignment/text/' + address + '.txt') audio = os.path.abspath(u'alignment/audio/' + address + '.webm') syncmap = os.path.abspath(u'alignment/syncmaps/' + address + '.json') # align text to audio task = Task(config_string=config) task.text_file_path_absolute = text task.audio_file_path_absolute = audio task.sync_map_file_path_absolute = syncmap ExecuteTask(task).execute() task.output_sync_map_file()
def get_alignment(path_to_audio_file: str, transcript: List[str], force=False, language='fr_FR') -> List[dict]: # see https://github.com/readbeyond/aeneas/blob/9d95535ad63eef4a98530cfdff033b8c35315ee1/aeneas/ttswrappers/espeakngttswrapper.py#L45 # noqa language = { 'fr_FR': 'fra', 'en_US': 'eng', }[language] full_transcript = '\t'.join(transcript) full_transcript_hash = sha1(full_transcript.encode()).hexdigest() path_to_transcript = os.path.join(CACHE_DIR, f'{full_transcript_hash}.txt') with open(path_to_audio_file, 'rb') as f: audio_file_hash = hash_file(f) with open(path_to_transcript, 'w') as f: f.writelines('\n'.join(transcript)) path_to_alignment_tmp = os.path.join( CACHE_DIR, f'{full_transcript_hash}_{audio_file_hash}.json') if force or not os.path.isfile(path_to_alignment_tmp): # build alignment task = Task( f'task_language={language}|os_task_file_format=json|is_text_type=plain' ) task.audio_file_path_absolute = os.path.abspath(path_to_audio_file) task.text_file_path_absolute = path_to_transcript task.sync_map_file_path_absolute = path_to_alignment_tmp executor = ExecuteTask(task=task) executor.execute() task.output_sync_map_file() with open(path_to_alignment_tmp) as source: return [cleanup_fragment(f) for f in json.load(source)['fragments']]
def main(): """Main entry point""" parser = argparse.ArgumentParser(prog="librivox_align.py") parser.add_argument("book_yml", help="YAML file with book details") args = parser.parse_args() logging.basicConfig(level=logging.DEBUG) _LOGGER.debug(args) args.book_yml = Path(args.book_yml) input_dir = args.book_yml.parent with open(args.book_yml, "r") as book_file: book = yaml.safe_load(book_file) # Load gruut language gruut_lang = gruut.Language.load(book["gruut"]["language"]) assert gruut_lang, "Unsupported language" language = book["aeneas"]["language"] # Load book text text_path = Path(input_dir / book["text"]["file"]) _LOGGER.debug("Loading book text from %s", text_path) with open(text_path, "r") as text_file: text = text_file.readlines() # Process MP3 files for mp3_name, mp3_info in book["audio"].items(): mp3_path = input_dir / mp3_name sync_path = mp3_path.with_suffix(".json") config_string = f"task_language={language}|is_text_type=plain|os_task_file_format=json|task_adjust_boundary_no_zero=True" start_time = float(mp3_info.get("start_time", 0)) if start_time > 0: # Skip seconds at the beginning config_string += f"|is_audio_file_head_length={start_time}" end_time = float(mp3_info.get("end_time", 0)) if end_time < 0: # Skip seconds at the end end_time = abs(end_time) config_string += f"|is_audio_file_tail_length={end_time}" elif end_time > 0: # Set length of audio config_string += f"|is_audio_file_process_length={end_time}" task = Task(config_string=config_string) task.audio_file_path_absolute = mp3_path.absolute() task.sync_map_file_path_absolute = sync_path.absolute() mp3_text_path = mp3_path.with_suffix(".txt") with open(mp3_text_path, mode="w+") as mp3_text_file: start_line, end_line = mp3_info["start_line"], mp3_info["end_line"] # Clean up newlines in text mp3_text = "" for line_index in range(start_line - 1, end_line): mp3_text += text[line_index].strip() + "\n" # Run through gruut tokenizer to expand abbreviations, numbers, etc. raw_text_path = mp3_path.with_suffix(".raw.txt") with open(raw_text_path, "w") as raw_text_file: for sentence in gruut_lang.tokenizer.tokenize(mp3_text): clean_text = " ".join(sentence.clean_words) # Each sentence in on a line now print(clean_text, file=mp3_text_file) print(sentence.raw_text, file=raw_text_file) mp3_text_file.seek(0) task.text_file_path_absolute = mp3_text_file.name # Generate sync map JSON file _LOGGER.debug("Generating %s (%s)", sync_path, mp3_path) ExecuteTask(task).execute() task.output_sync_map_file()
def retrieve_keyword_audio(vid, keyword): audio_index = 0 v_url = URL_TEMPLATE.format(vid) youtube = YouTube(v_url) y_len = youtube.player_config_args['player_response']['videoDetails']['lengthSeconds'] print("Length :",y_len) print("Views :",youtube.views) if int(y_len) > 2700: # only consider video < 45 mins return audio_index print("="*40) caption = youtube.captions.get_by_language_code('ko') if caption: print("caption==ko") # retrieve audio from video youtube.streams.first().download(output_path=TEMP_DIR, filename=vid) temp_file_name = TEMP_DIR+vid if not os.path.isfile(temp_file_name + ".mp4"): return audio_index time.sleep(1) # need to wait before ffmpeg takes in as input file cmd = FFMPEG_TEMPLATE.format(temp_file_name).split() subprocess.check_output(cmd) audio = librosa.core.load(temp_file_name+".wav", 16000)[0] os.remove(temp_file_name + ".mp4") os.remove(temp_file_name + ".wav") formatted_vid = vid.replace('_', '-') cc_arr = caption.generate_srt_captions().split('\n\n') for captions in cc_arr: cc_split = captions.split('\n') if len(cc_split) == 4 and cc_split[0] == '': cc_split = (cc_split[1], cc_split[2], cc_split[3]) elif len(cc_split) != 3: continue _, cc_time, cc_text = cc_split cc_text = TAG_CLEANER.sub('', cc_text) # clean up punctuation cc_text = cc_text.translate(TRANSPLATOR) cc_text = cc_text.lower() words = cc_text.strip().split() # steming words if keyword not in words and keyword + "s" not in words and keyword + "es" not in words: continue aligner_task = Task(config_string=ALIGNER_CONFIG_STRING) # prepare label file for forced aligner label_file = temp_file_name + "_" + keyword + ".txt" with open(label_file, "w+") as file: for word in words: file.write(word+"\n") # prepare audio file for forced aligner match_result = SRT_TIME_PARSER.match(cc_time) if match_result: start_time_ms = srt_time_to_ms( match_result.group(1), match_result.group(2), match_result.group(3), match_result.group(4)) stop_time_ms = srt_time_to_ms( match_result.group(5), match_result.group(6), match_result.group(7), match_result.group(8)) start_pos = start_time_ms * 16 stop_pos = stop_time_ms * 16 block = audio[start_pos:stop_pos] # *16 since 16 samples are captured per each ms # temporary audio file for forced aligner audio_file = temp_file_name + "_" + keyword + ".wav" librosa.output.write_wav(audio_file, block, 16000) time.sleep(1) # buffer for writing wav file else: print(TEXT_COLOUR['FAIL'] + "failed pasing srt time : " + cc_time + TEXT_COLOUR['ENDC']) raise Exception('srt time fail error') aligner_task.text_file_path_absolute = label_file aligner_task.audio_file_path_absolute = audio_file # process aligning task ExecuteTask(aligner_task).execute() for fragment in aligner_task.sync_map_leaves(): if fragment.is_regular and keyword in fragment.text and fragment.length < 0.9: begin = int(fragment.begin * 16000) end = int(fragment.end * 16000) keyword_audio = pad_and_center_align(block[begin:end], 16000) file_name = keyword+"_"+str(audio_index)+".wav" librosa.output.write_wav( DATA_DIR + "/" + keyword + "/" + file_name, keyword_audio, 16000) audio_index += 1 return audio_index
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ if len(self.actual_arguments) < 1: return self.print_help() if self.has_option([u"-e", u"--examples"]): return self.print_examples(False) if self.has_option(u"--examples-all"): return self.print_examples(True) if self.has_option([u"--list-parameters"]): return self.print_parameters() parameter = self.has_option_with_value(u"--list-values") if parameter is not None: return self.print_values(parameter) elif self.has_option(u"--list-values"): return self.print_values(u"?") # NOTE list() is needed for Python3, where keys() is not a list! demo = self.has_option(list(self.DEMOS.keys())) demo_parameters = u"" download_from_youtube = self.has_option([u"-y", u"--youtube"]) largest_audio = self.has_option(u"--largest-audio") keep_audio = self.has_option(u"--keep-audio") output_html = self.has_option(u"--output-html") validate = not self.has_option(u"--skip-validator") print_faster_rate = self.has_option(u"--faster-rate") print_rates = self.has_option(u"--rates") print_zero = self.has_option(u"--zero") if demo: validate = False for key in self.DEMOS: if self.has_option(key): demo_parameters = self.DEMOS[key] audio_file_path = demo_parameters[u"audio"] text_file_path = demo_parameters[u"text"] config_string = demo_parameters[u"config"] sync_map_file_path = demo_parameters[u"syncmap"] # TODO allow injecting rconf options directly from DEMOS options field if key == u"--example-cewsubprocess": self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True elif key == u"--example-ctw-espeak": self.rconf[RuntimeConfiguration.TTS] = "custom" self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_ESPEAK elif key == u"--example-ctw-speect": self.rconf[RuntimeConfiguration.TTS] = "custom" self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_SPEECT elif key == u"--example-festival": self.rconf[RuntimeConfiguration.TTS] = "festival" self.rconf[RuntimeConfiguration.TTS_PATH] = "text2wave" elif key == u"--example-mws": self.rconf[RuntimeConfiguration.MFCC_WINDOW_LENGTH] = "1.500" self.rconf[RuntimeConfiguration.MFCC_WINDOW_SHIFT] = "0.500" elif key == u"--example-faster-rate": print_faster_rate = True elif key == u"--example-no-zero": print_zero = True elif key == u"--example-py": self.rconf[RuntimeConfiguration.C_EXTENSIONS] = False elif key == u"--example-rates": print_rates = True elif key == u"--example-youtube": download_from_youtube = True break else: if len(self.actual_arguments) < 4: return self.print_help() audio_file_path = self.actual_arguments[0] text_file_path = self.actual_arguments[1] config_string = self.actual_arguments[2] sync_map_file_path = self.actual_arguments[3] html_file_path = None if output_html: keep_audio = True html_file_path = sync_map_file_path + u".html" if download_from_youtube: youtube_url = audio_file_path if (not download_from_youtube) and (not self.check_input_file(audio_file_path)): return self.ERROR_EXIT_CODE if not self.check_input_file(text_file_path): return self.ERROR_EXIT_CODE if not self.check_output_file(sync_map_file_path): return self.ERROR_EXIT_CODE if (html_file_path is not None) and (not self.check_output_file(html_file_path)): return self.ERROR_EXIT_CODE self.check_c_extensions() if demo: msg = [] msg.append(u"Running example task with arguments:") if download_from_youtube: msg.append(u" YouTube URL: %s" % youtube_url) else: msg.append(u" Audio file: %s" % audio_file_path) msg.append(u" Text file: %s" % text_file_path) msg.append(u" Config string: %s" % config_string) msg.append(u" Sync map file: %s" % sync_map_file_path) if len(demo_parameters[u"options"]) > 0: msg.append(u" Options: %s" % demo_parameters[u"options"]) self.print_info(u"\n".join(msg)) if validate: self.print_info(u"Validating config string (specify --skip-validator to bypass)...") validator = Validator(logger=self.logger) result = validator.check_configuration_string(config_string, is_job=False, external_name=True) if not result.passed: self.print_error(u"The given config string is not valid:") self.print_generic(result.pretty_print()) return self.ERROR_EXIT_CODE self.print_info(u"Validating config string... done") if download_from_youtube: try: self.print_info(u"Downloading audio from '%s' ..." % youtube_url) downloader = Downloader(logger=self.logger) audio_file_path = downloader.audio_from_youtube( youtube_url, download=True, output_file_path=None, largest_audio=largest_audio ) self.print_info(u"Downloading audio from '%s' ... done" % youtube_url) except ImportError: self.print_no_pafy_error() return self.ERROR_EXIT_CODE except Exception as exc: self.print_error(u"An unexpected error occurred while downloading audio from YouTube:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Creating task...") task = Task(config_string, logger=self.logger) task.audio_file_path_absolute = audio_file_path task.text_file_path_absolute = text_file_path task.sync_map_file_path_absolute = sync_map_file_path self.print_info(u"Creating task... done") except Exception as exc: self.print_error(u"An unexpected error occurred while creating the task:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Executing task...") executor = ExecuteTask(task=task, rconf=self.rconf, logger=self.logger) executor.execute() self.print_info(u"Executing task... done") except Exception as exc: self.print_error(u"An unexpected error occurred while executing the task:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE try: self.print_info(u"Creating output sync map file...") path = task.output_sync_map_file() self.print_info(u"Creating output sync map file... done") self.print_success(u"Created file '%s'" % path) except Exception as exc: self.print_error(u"An unexpected error occurred while writing the sync map file:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE if output_html: try: parameters = {} parameters[gc.PPN_TASK_OS_FILE_FORMAT] = task.configuration["o_format"] parameters[gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] = task.configuration["o_smil_audio_ref"] parameters[gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] = task.configuration["o_smil_page_ref"] self.print_info(u"Creating output HTML file...") task.sync_map.output_html_for_tuning(audio_file_path, html_file_path, parameters) self.print_info(u"Creating output HTML file... done") self.print_success(u"Created file '%s'" % html_file_path) except Exception as exc: self.print_error(u"An unexpected error occurred while writing the HTML file:") self.print_error(u"%s" % exc) return self.ERROR_EXIT_CODE if download_from_youtube: if keep_audio: self.print_info(u"Option --keep-audio set: keeping downloaded file '%s'" % audio_file_path) else: gf.delete_file(None, audio_file_path) if print_zero: zero_duration = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.begin == l.end] if len(zero_duration) > 0: self.print_warning(u"Fragments with zero duration:") for fragment in zero_duration: self.print_generic(u" %s" % fragment) if print_rates: self.print_info(u"Fragments with rates:") for fragment in task.sync_map.fragments_tree.vleaves_not_empty: self.print_generic(u" %s (rate: %.3f chars/s)" % (fragment, fragment.rate)) if print_faster_rate: max_rate = task.configuration["aba_rate_value"] if max_rate is not None: faster = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.rate >= max_rate + Decimal("0.001")] if len(faster) > 0: self.print_warning(u"Fragments with rate greater than %.3f:" % max_rate) for fragment in faster: self.print_generic(u" %s (rate: %.3f chars/s)" % (fragment, fragment.rate)) return self.NO_ERROR_EXIT_CODE
aligner.py --audio-file=<a> --text-file=<t> --syncmap-file=<s> Options: --audio-file=<a> Audio file path --text-file=<t> Text file path --syncmap-file=<s> Syncmap file path """ from docopt import docopt from aeneas.executetask import ExecuteTask from aeneas.task import Task if __name__ == "__main__": args = docopt(__doc__) audio_file = args["--audio-file"] text_file = args["--text-file"] syncmap_file = args["--syncmap-file"] config_string = u"task_language=kan|is_text_type=plain|os_task_file_format=json" task = Task(config_string=config_string) task.audio_file_path_absolute = audio_file task.text_file_path_absolute = text_file task.sync_map_file_path_absolute = syncmap_file # process Task ExecuteTask(task).execute() task.output_sync_map_file()
lang = args.lang if lang not in ["eng", "hi", "hin"]: print("only hi and eng allowed for language") exit(1) from aeneas.executetask import ExecuteTask from aeneas.task import Task from aeneas.runtimeconfiguration import RuntimeConfiguration config_string = u"task_language=" + lang + u"|is_text_type=subtitles|os_task_file_format=srt" tempout, tempfilename = tempfile.mkstemp() task = Task(config_string=config_string) task.audio_file_path_absolute = args.audio task.text_file_path_absolute = args.txt task.sync_map_file_path_absolute = tempfilename rconf = RuntimeConfiguration() # This option ignores the non-word sounds in the audio rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True rconf[RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD] = 2.5 # To use a different Text-to-Speech engine #rconf[RuntimeConfiguration.TTS] = "festival" # process Task ExecuteTask(task, rconf=rconf).execute() # output sync map to file task.output_sync_map_file()