def perform_run(self, audio_file_path, text_file_path, config_string, rconf_string):
     output_file_handler, output_file_path = gf.tmp_file()
     executor = ExecuteTaskCLI(use_sys=False)
     verbose = "-v" if self.verbose else ""
     executor.run(arguments=[
         "dummy placeholder for aeneas.tools.execute_task",
         audio_file_path,
         text_file_path,
         config_string,
         output_file_path,
         "-r=\"%s\"" % rconf_string,
         verbose
     ])
     gf.delete_file(output_file_handler, output_file_path)
     return executor.logger
Example #2
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        # if no actual arguments, print help
        if len(self.actual_arguments) < 1:
            return self.print_help(short=True)

        # check if we have a recognized tool switch
        for cls, switches in self.TOOLS:
            if self.has_option(switches):
                arguments = [a for a in sys.argv if a not in switches]
                return cls(invoke=(self.invoke + u" %s" % switches[0])).run(
                    arguments=arguments)

        # check if we have -h, --help, or --version
        if u"-h" in self.actual_arguments:
            return self.print_help(short=True)
        if u"--help" in self.actual_arguments:
            return self.print_help(short=False)
        if u"--version" in self.actual_arguments:
            return self.print_name_version()

        # default to run ExecuteTaskCLI
        return ExecuteTaskCLI(invoke=self.invoke).run(arguments=sys.argv)
Example #3
0
 def execute(self, parameters, expected_exit_code):
     output_path = gf.tmp_directory()
     params = ["placeholder"]
     for p_type, p_value in parameters:
         if p_type == "in":
             params.append(gf.absolute_path(p_value, __file__))
         elif p_type == "out":
             params.append(os.path.join(output_path, p_value))
         else:
             params.append(p_value)
     exit_code = ExecuteTaskCLI(use_sys=False).run(arguments=params)
     gf.delete_directory(output_path)
     self.assertEqual(exit_code, expected_exit_code)
Example #4
0
def run_aeneas_per_file(text_path, audio_path, lang):
    json_filepath = (os.path.splitext(audio_path)[0] + '.json'
                     )  # same name as audiofile
    args = [
        "dummy", audio_path, text_path,
        'task_language={}|is_text_type=plain|os_task_file_format=json|is_audio_file_detect_head_max=1|task_adjust_boundary_nonspeech_string=REMOVE'
        .format(lang), json_filepath
    ]
    exit_code = ExecuteTaskCLI(use_sys=False).run(arguments=args)
    if exit_code != 0:
        print('Erro happened on file: {}. Exit code: {}'.format(
            os.path.basename(audio_path), exit_code))
        return False
    if exit_code == 0:
        set_proper_filename_id(
            json_filepath, os.path.basename(os.path.splitext(audio_path)[0]))
    return True
Example #5
0
def arrange(folder_path):
    folders = list()
    allitems = os.listdir(folder_path)
    for item in allitems:
        item_folder_path = join(folder_path, item)
        if isfile(item_folder_path) is False:
            folders.append(item_folder_path)

    for folder in folders:
        folder_items = os.listdir(folder)
        book_text_file_path = folder + '/' + os.path.basename(folder) + '.txt'
        book_audio_file_path = None
        for file_item in folder_items:
            if file_item.endswith('.mp3') or file_item.endswith('.wav'):
                book_audio_file_path = os.path.join(folder, file_item)
                break

        if book_audio_file_path is None:
            print('Can\'t find audio for chapter in folder: {}'.format(folder))
            continue

        json_filepath = os.path.join(folder,
                                     os.path.basename(folder) + '.json')

        args = [
            "dummy", book_audio_file_path, book_text_file_path,
            'task_language=deu|is_text_type=plain|os_task_file_format=json|is_audio_file_detect_head_max=1|task_adjust_boundary_nonspeech_string=REMOVE',
            json_filepath
        ]
        exit_code = ExecuteTaskCLI(use_sys=False).run(arguments=args)
        print('Folder {} done with  exit code: {}'.format(
            os.path.basename(folder), exit_code))
        if exit_code == 0:
            set_proper_filename_id(json_filepath, os.path.basename(folder))

    print('Aeneas finished all the fodlers')
Example #6
0
def create_aeneas_csv(df=input_df,
                      book_chapter_list=book_chapter_list,
                      input_audio_dir=input_audio_dir):
    for each_chapter in book_chapter_list:

        #Find respective audio file
        if len(each_chapter.split('_')) == 3:
            sequence = re.findall(r'\d+', each_chapter.split('_')[0])[0]

            if each_chapter.split('_')[1] == 'THESSALONIANS': book = 'Thess'
            else: book = (each_chapter.split('_')[1]).capitalize()

            chapter = each_chapter.split('_')[2]
            find_audio_string = chapter + '_' + sequence + book

            search_book = ' '.join(each_chapter.split('_')[0:2])
        else:
            if each_chapter.split('_')[1] == 'THESSALONIANS': book = 'Thess'
            else:
                book = (each_chapter.split('_')[0]).capitalize()
                search_book = each_chapter.split('_')[0]

            chapter = each_chapter.split('_')[1]
            find_audio_string = chapter + '_' + book

        if sound_find_string is not None:
            #find_audio_string=chapter+'_'+sound_find_string
            find_audio_string = sound_find_string + '_' + chapter
            if language_code == 'en':
                find_audio_string = chapter + '_' + sound_find_string

        print(find_audio_string)
        chapter_audio = glob.glob(input_audio_dir + '/*' + find_audio_string +
                                  '*')[0]
        if not (chapter_audio): missing_chapters.append(each_chapter)

        #Create aeneas text input
        aeneas_file_name = (
            chapter_audio.split('/')[-1]).split('.')[0] + '_aeneas_input.txt'
        aeneas_write = codecs.open(output_dir + '/' + aeneas_file_name, 'w',
                                   'utf-8')
        chapter = chapter.lstrip('0')

        for i in range(0, len(df)):
            if ((str(df['book'][i])).strip()
                ).upper() == search_book.upper() and int(
                    df['chapter'][i]) == int(chapter):
                aeneas_write.write(df['verse_content'][i] + '\n')
        aeneas_write.close()

        #Run aeneas
        from aeneas.executetask import ExecuteTask
        from aeneas.task import Task
        from aeneas.tools.execute_task import ExecuteTaskCLI

        # create Task object
        aeneas_output_file = (
            chapter_audio.split('/')[-1]).split('.')[0] + '_aeneas_out.txt'
        config_string = u"task_adjust_boundary_percent_value=50|task_adjust_boundary_nonspeech_min=0.4|task_language=epo|is_text_type=plain|os_task_file_format=aud"
        print(config_string)

        # Save .txt file
        ExecuteTaskCLI(use_sys=False).run(arguments=[
            None,  # dummy program name argument
            chapter_audio,
            os.path.join(output_dir, aeneas_file_name),
            config_string,
            os.path.join(output_dir, aeneas_output_file)
        ])

        # Save time boundary
        task = Task(config_string=config_string)
        task.audio_file_path_absolute = chapter_audio
        task.text_file_path_absolute = os.path.join(output_dir,
                                                    aeneas_file_name)
        task.sync_map_file_path_absolute = os.path.join(
            output_dir, aeneas_output_file)

        index_list = list()
        # process Task
        ExecuteTask(task).execute()

        new_aeneas = list()
        with open(output_dir + '/' + aeneas_output_file, 'r') as a:
            with open(output_dir + '/' + 'new' + aeneas_output_file, 'w') as b:
                for line in a:
                    if not (line.__contains__('......')):
                        #print(line)
                        b.write(line)
                        # new_aeneas.append(line)
        a.close()
        b.close()

        # with open(output_dir+'/'+'new'+aeneas_output_file,'w') as b:
        #     b.write(str(new_aeneas))
        # b.close()

        shutil.move(output_dir + '/new' + aeneas_output_file,
                    output_dir + '/' + aeneas_output_file)

        last = len(task.sync_map_leaves())
        for i, time in enumerate(task.sync_map_leaves()):
            if 0 < i < last - 1:
                index_list.append(time.end)
                # print(time.end)

        inc = 0
        verse_list = list()
        for i in range(0, len(df)):
            if ((str(df['book'][i])).strip()
                ).upper() == search_book.upper() and int(
                    df['chapter'][i]) == int(chapter):
                write_file.writerow(
                    (df['fileset'][i], df['book'][i], df['chapter'][i],
                     df['line_number'][i], df['verse_number'][i],
                     df['verse_content'][i], index_list[inc]))
                verse_list.append(df['verse_number'][i])
                inc += 1

        print(chapter_audio)

        if args.move_adjustment:
            silence_file = output_dir + '/' + (aeneas_output_file.split(
                '/')[-1]).split('.')[0] + '_silence.txt'
            extract_silence_intervals(chapter_audio, silence_file)
            adjust_update_boundaries_with_silence(
                output_dir + '/' + aeneas_output_file,
                silence_file,
                output_dir + '/' +
                (chapter_audio.split('/')[-1]).split('.')[0] +
                '_sync_adjusted.txt',
                verse_list,
                input_split_field='\t',
                output_split_field='\t')

        elif args.adjust_silence:
            silence_file = output_dir + '/' + (aeneas_output_file.split(
                '/')[-1]).split('.')[0] + '_silence.txt'
            extract_silence_intervals(chapter_audio, silence_file)
            adjust_boundaries_with_silence(
                output_dir + '/' + aeneas_output_file,
                silence_file,
                output_dir + '/' +
                (chapter_audio.split('/')[-1]).split('.')[0] + '_adjusted.txt',
                verse_list,
                input_split_field='\t',
                output_split_field='\t')

    write_file_handle.close()

    if missing_chapters:
        with open(output_dir + '/missing_chapters.txt', 'w',
                  encoding='utf-8') as missing:
            for each_missing in missing_chapters:
                missing.write(each_missing)
            missing.close()
def main():
    """
    Execute a Task, that is, a pair of audio/text files
    and a configuration string.
    """
    ExecuteTaskCLI(invoke="aeneas_execute_task").run(arguments=sys.argv)
Example #8
0
outputPath = args.outputPath

# Assumes same filename for audio and transcript  
audioFiles = [f for f in listdir(audioPath) if isfile(join(audioPath,f))]
transFiles = [f for f in listdir(transPath) if isfile(join(transPath,f))]

print("===================== # Audio Files found:      " + str(len(audioFiles)))
print("===================== # Transcript Files found: " + str(len(transFiles)) +  "\n")

count = 0
for audioFile in audioFiles:
	transFile = name=audioFile[:-4] + ".txt" 
	if transFile in transFiles:
		print("[START] Force aligning \"" + audioFile + "\"...")
		ExecuteTaskCLI(use_sys=False).run(arguments=[
    			None, # dummy program name argument
    			unicode(audioPath+audioFile),
    			unicode(transPath+transFile),
    			u"task_language=eng|is_text_type=mplain|os_task_file_format=aud|os_task_file_levels=3",
    			unicode(outputPath + audioFile[:-4] + "_fa.aud")])
		count += 1
		print("[FINISH] Completed Files: " + str(count) + "/" + str(len(audioFiles)))

	else:
		print("ERROR: Transcript not found for audio file: " + audioFile)	





Example #9
0
def get_fragments_database(mvp_clips,
                           transcripts,
                           remix,
                           debug=False,
                           force_language=None):
    """
    generate a dictionary containing segment information for every
    line produced by :func:`fragmenter`

    :parameter clips: list of input clip filenames
    :parameter transcripts: raw texts of transcripts. map one-one to clips
    :remix: list of remix lines dictionaries as returned by :func:`fine_tuning`

    """
    sources_by_clip = OrderedDict()
    remix_lines = list(remix.keys())

    #
    for clip, transcript in zip(mvp_clips, transcripts):
        transcript = open(transcript).read().replace('\n',
                                                     ' ').replace('  ', ' ')
        sources_by_clip[clip], remix_lines = fragmenter(transcript,
                                                        remix_lines,
                                                        debug=debug)
        if not remix_lines:
            break
    else:
        if remix_lines:
            raise ValueError(
                "Remix verse/s not found in transcripts given:\n{}".format(
                    '\n- '.join(remix_lines)))

    # create Task object

    fragments = OrderedDict()
    for clip, sources in sources_by_clip.items():
        l_sources = len(sources)
        for i, source in enumerate(sources, 1):
            if force_language:
                language = force_language
            elif i == 1:
                # for first iteration of the clip, autodetect the language
                snippet = source[:source.index(' ', 100)]
                language = langdetect.detect(snippet)
                logging.info("Autodetected language for %s: %s", clip,
                             language)

            config_string = u"task_language={}|is_text_type=plain|os_task_file_format=json".format(
                language)
            with tempfile.NamedTemporaryFile('w', delete=False) as f_in:
                f_in.write(source)
            output_json = '{}.json'.format(f_in.name)
            logging.info('Forcing aligment for %s (step %s/%s)', clip, i,
                         l_sources)
            ExecuteTaskCLI(use_sys=False).run(arguments=[
                None,
                os.path.abspath(clip), f_in.name, config_string, output_json
            ])
            output = json.load(open(output_json))
            for f in output['fragments']:
                line = f['lines'][0]
                try:
                    offset_begin = remix[line]['offset_begin']
                    offset_end = remix[line]['offset_end']
                except KeyError:
                    offset_begin = 0
                    offset_end = 0

                fragments[line] = {
                    'begin': float(f['begin']) + offset_begin,
                    'end': float(f['end']) + offset_end,
                    'clip': clip
                }
        if debug:
            d = tempfile.mkstemp(suffix='.json')[1]
            json.dump(fragments, open(d, 'w'), indent=2)
            logging.debug('Segments database written to {}'.format(d))
        return fragments
Example #10
0
from aeneas.tools.execute_task import ExecuteTaskCLI
import sys
args = sys.argv[1:]
ExecuteTaskCLI(use_sys=False).run(arguments=[
    None,  # dummy program name argument
    args[0],
    args[1],
    ("tts=festival|"
     "task_language=eng|"
     "os_task_file_format=vtt|"
     "is_text_type=plain|"
     "task_adjust_boundary_nonspeech_min=0.0010|"
     "task_adjust_boundary_algorithm=auto|"
     "level=3"),
    args[2]
])
def create_aeneas_csv(df=input_df,
                      book_chapter_list=book_chapter_list,
                      input_audio_dir=input_audio_dir):

    try:
        for each_chapter in book_chapter_list:

            search_book = each_chapter.split('_')[0]
            chapter = each_chapter.split('_')[1]
            book1 = each_chapter.split('_')[0]

            if (book1[0]).isdigit():
                book = book1[0] + (book1[1:])
            else:
                book = book1

            print(book, chapter)

            map_book_df = pd.read_csv(book_to_audio_map)
            #print(sys.path[0],file_name)
            audio_book_id = (map_book_df[(
                map_book_df.iloc[:, 0].str).contains(book) == True].iloc[0, 1])

            if int(audio_book_id) < 10:
                audio_book_id = '0' + str(audio_book_id)
            else:
                audio_book_id = str(audio_book_id)

            find_audio_string = audio_book_id + '*' + chapter

            print(find_audio_string)
            chapter_audio = glob.glob(input_audio_dir + '/*' +
                                      find_audio_string + '*.mp3')[0]

            if not (chapter_audio): missing_chapters.append(each_chapter)

            #Create aeneas text input
            aeneas_file_name = (chapter_audio.split('/')[-1]
                                ).split('.')[0] + '_aeneas_input.txt'
            aeneas_write = codecs.open(output_dir + '/' + aeneas_file_name,
                                       'w', 'utf-8')
            chapter = chapter.lstrip('0')

            for i in range(0, len(df)):
                if (((str(df['book'][i])).strip()).upper()).replace(
                        ' ', '') == search_book.upper() and int(
                            df['chapter'][i]) == int(chapter):
                    aeneas_write_string = ''.join(
                        (filter(lambda i: i not in remove_chars_list,
                                str(df['verse_content'][i]))))
                    aeneas_write.write(aeneas_write_string + '\n')
            aeneas_write.close()

            #Run aeneas
            from aeneas.executetask import ExecuteTask
            from aeneas.task import Task
            from aeneas.tools.execute_task import ExecuteTaskCLI

            # create Task object
            aeneas_output_file = (
                chapter_audio.split('/')[-1]).split('.')[0] + '_aeneas_out.txt'

            if (args.skip_matthew1_audio_head
                    is not None) and (find_audio_string == '01_*01'):
                config_string = (
                    "is_audio_file_head_length=skip_length|task_adjust_boundary_percent_value=50|task_adjust_boundary_nonspeech_min=0.4|task_language=aeneas_lang|is_text_type=plain|os_task_file_format=aud"
                    .replace('aeneas_lang', language_code)).replace(
                        'skip_length', args.skip_matthew1_audio_head[0])
            else:
                config_string = "task_adjust_boundary_percent_value=50|task_adjust_boundary_nonspeech_min=0.4|task_language=aeneas_lang|is_text_type=plain|os_task_file_format=aud".replace(
                    'aeneas_lang', language_code)

            #print(config_string)
            check_file = os.path.join(
                output_dir, (chapter_audio.split('/')[-1]).split('.')[0] +
                '_sync_adjusted.txt')

            if not os.path.isfile(check_file):
                print(os.path.isfile(check_file), check_file)
                print(os.path.join(output_dir, aeneas_output_file))
                #Save .txt file
                ExecuteTaskCLI(use_sys=False).run(arguments=[
                    None,  # dummy program name argument
                    chapter_audio,
                    os.path.join(output_dir, aeneas_file_name),
                    config_string,
                    os.path.join(output_dir, aeneas_output_file)
                ])

                # # Save time boundary
                task = Task(config_string=config_string)
                task.audio_file_path_absolute = chapter_audio
                print(aeneas_file_name)
                task.text_file_path_absolute = os.path.join(
                    output_dir, aeneas_file_name)
                task.sync_map_file_path_absolute = os.path.join(
                    output_dir, aeneas_output_file)

                # #process Task
                ExecuteTask(task).execute()

                index_list = list()

                with open(output_dir + '/' + aeneas_output_file, 'r') as a:
                    with open(output_dir + '/' + 'new' + aeneas_output_file,
                              'w') as b:
                        for line in a:
                            if not (line.__contains__('......')):

                                b.write(line)
                a.close()
                b.close()

                shutil.move(output_dir + '/new' + aeneas_output_file,
                            output_dir + '/' + aeneas_output_file)

                last = len(task.sync_map_leaves())
                for i, time in enumerate(task.sync_map_leaves()):
                    if 0 < i < last - 1:
                        index_list.append(time.end)

                inc = 0
                verse_list = list()
                for i in range(0, len(df)):
                    if (((str(df['book'][i])).strip()).replace(
                            ' ', '')).upper() == search_book.upper() and int(
                                df['chapter'][i]) == int(chapter):
                        write_file.writerow(
                            (df['fileset'][i], df['book'][i], df['chapter'][i],
                             df['line_number'][i], df['verse_number'][i],
                             df['verse_content'][i], index_list[inc]))
                        verse_list.append(df['verse_number'][i])
                        inc += 1

                print(chapter_audio)

                if args.no_move_adjustment:
                    silence_file = output_dir + '/' + (
                        aeneas_output_file.split('/')[-1]
                    ).split('.')[0] + '_silence.txt'
                    extract_silence_intervals(chapter_audio, silence_file)
                    sound = AudioSegment.from_mp3(chapter_audio)
                    framerate = sound.frame_rate

                    print(verse_list)
                    adjust_update_boundaries_with_silence(
                        output_dir + '/' + aeneas_output_file,
                        silence_file,
                        output_dir + '/' +
                        (chapter_audio.split('/')[-1]).split('.')[0] +
                        '_sync_adjusted.txt',
                        verse_list,
                        framerate,
                        input_split_field='\t',
                        output_split_field='\t')

                elif args.adjust_silence:
                    silence_file = output_dir + '/' + (
                        aeneas_output_file.split('/')[-1]
                    ).split('.')[0] + '_silence.txt'
                    extract_silence_intervals(chapter_audio, silence_file)
                    adjust_boundaries_with_silence(
                        output_dir + '/' + aeneas_output_file,
                        silence_file,
                        output_dir + '/' +
                        (chapter_audio.split('/')[-1]).split('.')[0] +
                        '_adjusted.txt',
                        verse_list,
                        input_split_field='\t',
                        output_split_field='\t')

        write_file_handle.close()

        if missing_chapters:
            with open(output_dir + '/missing_chapters.txt',
                      'w',
                      encoding='utf-8') as missing:
                for each_missing in missing_chapters:
                    missing.write(each_missing)
                missing.close()
    except Exception as err:
        print(
            type(err).__name__,  # TypeError
            __file__,  # /tmp/example.py
            err.__traceback__.tb_lineno  # 2
        )