def perform_run(self, audio_file_path, text_file_path, config_string, rconf_string): output_file_handler, output_file_path = gf.tmp_file() executor = ExecuteTaskCLI(use_sys=False) verbose = "-v" if self.verbose else "" executor.run(arguments=[ "dummy placeholder for aeneas.tools.execute_task", audio_file_path, text_file_path, config_string, output_file_path, "-r=\"%s\"" % rconf_string, verbose ]) gf.delete_file(output_file_handler, output_file_path) return executor.logger
def perform_command(self): """ Perform command and return the appropriate exit code. :rtype: int """ # if no actual arguments, print help if len(self.actual_arguments) < 1: return self.print_help(short=True) # check if we have a recognized tool switch for cls, switches in self.TOOLS: if self.has_option(switches): arguments = [a for a in sys.argv if a not in switches] return cls(invoke=(self.invoke + u" %s" % switches[0])).run( arguments=arguments) # check if we have -h, --help, or --version if u"-h" in self.actual_arguments: return self.print_help(short=True) if u"--help" in self.actual_arguments: return self.print_help(short=False) if u"--version" in self.actual_arguments: return self.print_name_version() # default to run ExecuteTaskCLI return ExecuteTaskCLI(invoke=self.invoke).run(arguments=sys.argv)
def execute(self, parameters, expected_exit_code): output_path = gf.tmp_directory() params = ["placeholder"] for p_type, p_value in parameters: if p_type == "in": params.append(gf.absolute_path(p_value, __file__)) elif p_type == "out": params.append(os.path.join(output_path, p_value)) else: params.append(p_value) exit_code = ExecuteTaskCLI(use_sys=False).run(arguments=params) gf.delete_directory(output_path) self.assertEqual(exit_code, expected_exit_code)
def run_aeneas_per_file(text_path, audio_path, lang): json_filepath = (os.path.splitext(audio_path)[0] + '.json' ) # same name as audiofile args = [ "dummy", audio_path, text_path, 'task_language={}|is_text_type=plain|os_task_file_format=json|is_audio_file_detect_head_max=1|task_adjust_boundary_nonspeech_string=REMOVE' .format(lang), json_filepath ] exit_code = ExecuteTaskCLI(use_sys=False).run(arguments=args) if exit_code != 0: print('Erro happened on file: {}. Exit code: {}'.format( os.path.basename(audio_path), exit_code)) return False if exit_code == 0: set_proper_filename_id( json_filepath, os.path.basename(os.path.splitext(audio_path)[0])) return True
def arrange(folder_path): folders = list() allitems = os.listdir(folder_path) for item in allitems: item_folder_path = join(folder_path, item) if isfile(item_folder_path) is False: folders.append(item_folder_path) for folder in folders: folder_items = os.listdir(folder) book_text_file_path = folder + '/' + os.path.basename(folder) + '.txt' book_audio_file_path = None for file_item in folder_items: if file_item.endswith('.mp3') or file_item.endswith('.wav'): book_audio_file_path = os.path.join(folder, file_item) break if book_audio_file_path is None: print('Can\'t find audio for chapter in folder: {}'.format(folder)) continue json_filepath = os.path.join(folder, os.path.basename(folder) + '.json') args = [ "dummy", book_audio_file_path, book_text_file_path, 'task_language=deu|is_text_type=plain|os_task_file_format=json|is_audio_file_detect_head_max=1|task_adjust_boundary_nonspeech_string=REMOVE', json_filepath ] exit_code = ExecuteTaskCLI(use_sys=False).run(arguments=args) print('Folder {} done with exit code: {}'.format( os.path.basename(folder), exit_code)) if exit_code == 0: set_proper_filename_id(json_filepath, os.path.basename(folder)) print('Aeneas finished all the fodlers')
def create_aeneas_csv(df=input_df, book_chapter_list=book_chapter_list, input_audio_dir=input_audio_dir): for each_chapter in book_chapter_list: #Find respective audio file if len(each_chapter.split('_')) == 3: sequence = re.findall(r'\d+', each_chapter.split('_')[0])[0] if each_chapter.split('_')[1] == 'THESSALONIANS': book = 'Thess' else: book = (each_chapter.split('_')[1]).capitalize() chapter = each_chapter.split('_')[2] find_audio_string = chapter + '_' + sequence + book search_book = ' '.join(each_chapter.split('_')[0:2]) else: if each_chapter.split('_')[1] == 'THESSALONIANS': book = 'Thess' else: book = (each_chapter.split('_')[0]).capitalize() search_book = each_chapter.split('_')[0] chapter = each_chapter.split('_')[1] find_audio_string = chapter + '_' + book if sound_find_string is not None: #find_audio_string=chapter+'_'+sound_find_string find_audio_string = sound_find_string + '_' + chapter if language_code == 'en': find_audio_string = chapter + '_' + sound_find_string print(find_audio_string) chapter_audio = glob.glob(input_audio_dir + '/*' + find_audio_string + '*')[0] if not (chapter_audio): missing_chapters.append(each_chapter) #Create aeneas text input aeneas_file_name = ( chapter_audio.split('/')[-1]).split('.')[0] + '_aeneas_input.txt' aeneas_write = codecs.open(output_dir + '/' + aeneas_file_name, 'w', 'utf-8') chapter = chapter.lstrip('0') for i in range(0, len(df)): if ((str(df['book'][i])).strip() ).upper() == search_book.upper() and int( df['chapter'][i]) == int(chapter): aeneas_write.write(df['verse_content'][i] + '\n') aeneas_write.close() #Run aeneas from aeneas.executetask import ExecuteTask from aeneas.task import Task from aeneas.tools.execute_task import ExecuteTaskCLI # create Task object aeneas_output_file = ( chapter_audio.split('/')[-1]).split('.')[0] + '_aeneas_out.txt' config_string = u"task_adjust_boundary_percent_value=50|task_adjust_boundary_nonspeech_min=0.4|task_language=epo|is_text_type=plain|os_task_file_format=aud" print(config_string) # Save .txt file ExecuteTaskCLI(use_sys=False).run(arguments=[ None, # dummy program name argument chapter_audio, os.path.join(output_dir, aeneas_file_name), config_string, os.path.join(output_dir, aeneas_output_file) ]) # Save time boundary task = Task(config_string=config_string) task.audio_file_path_absolute = chapter_audio task.text_file_path_absolute = os.path.join(output_dir, aeneas_file_name) task.sync_map_file_path_absolute = os.path.join( output_dir, aeneas_output_file) index_list = list() # process Task ExecuteTask(task).execute() new_aeneas = list() with open(output_dir + '/' + aeneas_output_file, 'r') as a: with open(output_dir + '/' + 'new' + aeneas_output_file, 'w') as b: for line in a: if not (line.__contains__('......')): #print(line) b.write(line) # new_aeneas.append(line) a.close() b.close() # with open(output_dir+'/'+'new'+aeneas_output_file,'w') as b: # b.write(str(new_aeneas)) # b.close() shutil.move(output_dir + '/new' + aeneas_output_file, output_dir + '/' + aeneas_output_file) last = len(task.sync_map_leaves()) for i, time in enumerate(task.sync_map_leaves()): if 0 < i < last - 1: index_list.append(time.end) # print(time.end) inc = 0 verse_list = list() for i in range(0, len(df)): if ((str(df['book'][i])).strip() ).upper() == search_book.upper() and int( df['chapter'][i]) == int(chapter): write_file.writerow( (df['fileset'][i], df['book'][i], df['chapter'][i], df['line_number'][i], df['verse_number'][i], df['verse_content'][i], index_list[inc])) verse_list.append(df['verse_number'][i]) inc += 1 print(chapter_audio) if args.move_adjustment: silence_file = output_dir + '/' + (aeneas_output_file.split( '/')[-1]).split('.')[0] + '_silence.txt' extract_silence_intervals(chapter_audio, silence_file) adjust_update_boundaries_with_silence( output_dir + '/' + aeneas_output_file, silence_file, output_dir + '/' + (chapter_audio.split('/')[-1]).split('.')[0] + '_sync_adjusted.txt', verse_list, input_split_field='\t', output_split_field='\t') elif args.adjust_silence: silence_file = output_dir + '/' + (aeneas_output_file.split( '/')[-1]).split('.')[0] + '_silence.txt' extract_silence_intervals(chapter_audio, silence_file) adjust_boundaries_with_silence( output_dir + '/' + aeneas_output_file, silence_file, output_dir + '/' + (chapter_audio.split('/')[-1]).split('.')[0] + '_adjusted.txt', verse_list, input_split_field='\t', output_split_field='\t') write_file_handle.close() if missing_chapters: with open(output_dir + '/missing_chapters.txt', 'w', encoding='utf-8') as missing: for each_missing in missing_chapters: missing.write(each_missing) missing.close()
def main(): """ Execute a Task, that is, a pair of audio/text files and a configuration string. """ ExecuteTaskCLI(invoke="aeneas_execute_task").run(arguments=sys.argv)
outputPath = args.outputPath # Assumes same filename for audio and transcript audioFiles = [f for f in listdir(audioPath) if isfile(join(audioPath,f))] transFiles = [f for f in listdir(transPath) if isfile(join(transPath,f))] print("===================== # Audio Files found: " + str(len(audioFiles))) print("===================== # Transcript Files found: " + str(len(transFiles)) + "\n") count = 0 for audioFile in audioFiles: transFile = name=audioFile[:-4] + ".txt" if transFile in transFiles: print("[START] Force aligning \"" + audioFile + "\"...") ExecuteTaskCLI(use_sys=False).run(arguments=[ None, # dummy program name argument unicode(audioPath+audioFile), unicode(transPath+transFile), u"task_language=eng|is_text_type=mplain|os_task_file_format=aud|os_task_file_levels=3", unicode(outputPath + audioFile[:-4] + "_fa.aud")]) count += 1 print("[FINISH] Completed Files: " + str(count) + "/" + str(len(audioFiles))) else: print("ERROR: Transcript not found for audio file: " + audioFile)
def get_fragments_database(mvp_clips, transcripts, remix, debug=False, force_language=None): """ generate a dictionary containing segment information for every line produced by :func:`fragmenter` :parameter clips: list of input clip filenames :parameter transcripts: raw texts of transcripts. map one-one to clips :remix: list of remix lines dictionaries as returned by :func:`fine_tuning` """ sources_by_clip = OrderedDict() remix_lines = list(remix.keys()) # for clip, transcript in zip(mvp_clips, transcripts): transcript = open(transcript).read().replace('\n', ' ').replace(' ', ' ') sources_by_clip[clip], remix_lines = fragmenter(transcript, remix_lines, debug=debug) if not remix_lines: break else: if remix_lines: raise ValueError( "Remix verse/s not found in transcripts given:\n{}".format( '\n- '.join(remix_lines))) # create Task object fragments = OrderedDict() for clip, sources in sources_by_clip.items(): l_sources = len(sources) for i, source in enumerate(sources, 1): if force_language: language = force_language elif i == 1: # for first iteration of the clip, autodetect the language snippet = source[:source.index(' ', 100)] language = langdetect.detect(snippet) logging.info("Autodetected language for %s: %s", clip, language) config_string = u"task_language={}|is_text_type=plain|os_task_file_format=json".format( language) with tempfile.NamedTemporaryFile('w', delete=False) as f_in: f_in.write(source) output_json = '{}.json'.format(f_in.name) logging.info('Forcing aligment for %s (step %s/%s)', clip, i, l_sources) ExecuteTaskCLI(use_sys=False).run(arguments=[ None, os.path.abspath(clip), f_in.name, config_string, output_json ]) output = json.load(open(output_json)) for f in output['fragments']: line = f['lines'][0] try: offset_begin = remix[line]['offset_begin'] offset_end = remix[line]['offset_end'] except KeyError: offset_begin = 0 offset_end = 0 fragments[line] = { 'begin': float(f['begin']) + offset_begin, 'end': float(f['end']) + offset_end, 'clip': clip } if debug: d = tempfile.mkstemp(suffix='.json')[1] json.dump(fragments, open(d, 'w'), indent=2) logging.debug('Segments database written to {}'.format(d)) return fragments
from aeneas.tools.execute_task import ExecuteTaskCLI import sys args = sys.argv[1:] ExecuteTaskCLI(use_sys=False).run(arguments=[ None, # dummy program name argument args[0], args[1], ("tts=festival|" "task_language=eng|" "os_task_file_format=vtt|" "is_text_type=plain|" "task_adjust_boundary_nonspeech_min=0.0010|" "task_adjust_boundary_algorithm=auto|" "level=3"), args[2] ])
def create_aeneas_csv(df=input_df, book_chapter_list=book_chapter_list, input_audio_dir=input_audio_dir): try: for each_chapter in book_chapter_list: search_book = each_chapter.split('_')[0] chapter = each_chapter.split('_')[1] book1 = each_chapter.split('_')[0] if (book1[0]).isdigit(): book = book1[0] + (book1[1:]) else: book = book1 print(book, chapter) map_book_df = pd.read_csv(book_to_audio_map) #print(sys.path[0],file_name) audio_book_id = (map_book_df[( map_book_df.iloc[:, 0].str).contains(book) == True].iloc[0, 1]) if int(audio_book_id) < 10: audio_book_id = '0' + str(audio_book_id) else: audio_book_id = str(audio_book_id) find_audio_string = audio_book_id + '*' + chapter print(find_audio_string) chapter_audio = glob.glob(input_audio_dir + '/*' + find_audio_string + '*.mp3')[0] if not (chapter_audio): missing_chapters.append(each_chapter) #Create aeneas text input aeneas_file_name = (chapter_audio.split('/')[-1] ).split('.')[0] + '_aeneas_input.txt' aeneas_write = codecs.open(output_dir + '/' + aeneas_file_name, 'w', 'utf-8') chapter = chapter.lstrip('0') for i in range(0, len(df)): if (((str(df['book'][i])).strip()).upper()).replace( ' ', '') == search_book.upper() and int( df['chapter'][i]) == int(chapter): aeneas_write_string = ''.join( (filter(lambda i: i not in remove_chars_list, str(df['verse_content'][i])))) aeneas_write.write(aeneas_write_string + '\n') aeneas_write.close() #Run aeneas from aeneas.executetask import ExecuteTask from aeneas.task import Task from aeneas.tools.execute_task import ExecuteTaskCLI # create Task object aeneas_output_file = ( chapter_audio.split('/')[-1]).split('.')[0] + '_aeneas_out.txt' if (args.skip_matthew1_audio_head is not None) and (find_audio_string == '01_*01'): config_string = ( "is_audio_file_head_length=skip_length|task_adjust_boundary_percent_value=50|task_adjust_boundary_nonspeech_min=0.4|task_language=aeneas_lang|is_text_type=plain|os_task_file_format=aud" .replace('aeneas_lang', language_code)).replace( 'skip_length', args.skip_matthew1_audio_head[0]) else: config_string = "task_adjust_boundary_percent_value=50|task_adjust_boundary_nonspeech_min=0.4|task_language=aeneas_lang|is_text_type=plain|os_task_file_format=aud".replace( 'aeneas_lang', language_code) #print(config_string) check_file = os.path.join( output_dir, (chapter_audio.split('/')[-1]).split('.')[0] + '_sync_adjusted.txt') if not os.path.isfile(check_file): print(os.path.isfile(check_file), check_file) print(os.path.join(output_dir, aeneas_output_file)) #Save .txt file ExecuteTaskCLI(use_sys=False).run(arguments=[ None, # dummy program name argument chapter_audio, os.path.join(output_dir, aeneas_file_name), config_string, os.path.join(output_dir, aeneas_output_file) ]) # # Save time boundary task = Task(config_string=config_string) task.audio_file_path_absolute = chapter_audio print(aeneas_file_name) task.text_file_path_absolute = os.path.join( output_dir, aeneas_file_name) task.sync_map_file_path_absolute = os.path.join( output_dir, aeneas_output_file) # #process Task ExecuteTask(task).execute() index_list = list() with open(output_dir + '/' + aeneas_output_file, 'r') as a: with open(output_dir + '/' + 'new' + aeneas_output_file, 'w') as b: for line in a: if not (line.__contains__('......')): b.write(line) a.close() b.close() shutil.move(output_dir + '/new' + aeneas_output_file, output_dir + '/' + aeneas_output_file) last = len(task.sync_map_leaves()) for i, time in enumerate(task.sync_map_leaves()): if 0 < i < last - 1: index_list.append(time.end) inc = 0 verse_list = list() for i in range(0, len(df)): if (((str(df['book'][i])).strip()).replace( ' ', '')).upper() == search_book.upper() and int( df['chapter'][i]) == int(chapter): write_file.writerow( (df['fileset'][i], df['book'][i], df['chapter'][i], df['line_number'][i], df['verse_number'][i], df['verse_content'][i], index_list[inc])) verse_list.append(df['verse_number'][i]) inc += 1 print(chapter_audio) if args.no_move_adjustment: silence_file = output_dir + '/' + ( aeneas_output_file.split('/')[-1] ).split('.')[0] + '_silence.txt' extract_silence_intervals(chapter_audio, silence_file) sound = AudioSegment.from_mp3(chapter_audio) framerate = sound.frame_rate print(verse_list) adjust_update_boundaries_with_silence( output_dir + '/' + aeneas_output_file, silence_file, output_dir + '/' + (chapter_audio.split('/')[-1]).split('.')[0] + '_sync_adjusted.txt', verse_list, framerate, input_split_field='\t', output_split_field='\t') elif args.adjust_silence: silence_file = output_dir + '/' + ( aeneas_output_file.split('/')[-1] ).split('.')[0] + '_silence.txt' extract_silence_intervals(chapter_audio, silence_file) adjust_boundaries_with_silence( output_dir + '/' + aeneas_output_file, silence_file, output_dir + '/' + (chapter_audio.split('/')[-1]).split('.')[0] + '_adjusted.txt', verse_list, input_split_field='\t', output_split_field='\t') write_file_handle.close() if missing_chapters: with open(output_dir + '/missing_chapters.txt', 'w', encoding='utf-8') as missing: for each_missing in missing_chapters: missing.write(each_missing) missing.close() except Exception as err: print( type(err).__name__, # TypeError __file__, # /tmp/example.py err.__traceback__.tb_lineno # 2 )