Esempio n. 1
0
def align_aeneas():

    for PID in config.PIDs:

        audio_wc = os.path.join(PID, config.narrations_subfolder, "*.m4a")
        audio_file_list = glob(audio_wc)

        if len(audio_file_list) == 0:
            audio_wc = os.path.join(PID, config.narrations_subfolder, "*.mp3")
            audio_file_list = glob(audio_wc)

        audio_file_list.sort()

        for a in audio_file_list:
            file_id = os.path.split(a)[1][0:6]
            t = os.path.join(config.aeneas_transcript_dir, file_id + ".txt")
            out_file = os.path.join(config.aeneas_output_dir, file_id + ".sbv")
            print "aligning " + a, t

            if not os.path.isfile(t):
                print t + " not available so will not be processed.  Was it a missed ground truth file?"
                continue

            # create Task object
            config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=sbv"
            task = Task(config_string=config_string)
            task.audio_file_path_absolute = a
            task.text_file_path_absolute = t
            task.sync_map_file_path_absolute = out_file

            # process Task
            ExecuteTask(task).execute()

            # output sync map to file
            task.output_sync_map_file()
Esempio n. 2
0
def align(text_path, audio_path, align_out_path, word_align=True):
    # create Task object
    config_string = u"task_language=hi"
    config_string += "|os_task_file_format=json"
    rconf = None
    if word_align:
        config_string += "|os_task_file_levels=3"
        config_string += "|is_text_type=mplain"
        rconf = RuntimeConfiguration()
        rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True
        rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH_L3] = True
    else:
        config_string += "|is_text_type=plain"

    task = Task(config_string=config_string)
    task.text_file_path_absolute = text_path
    task.audio_file_path_absolute = audio_path
    task.sync_map_file_path_absolute = align_out_path

    # process Task
    ExecuteTask(task, rconf=rconf).execute()

    # output sync map to file
    task.output_sync_map_file()

    # Remove annoying unicode characters
    with open(align_out_path, 'r', encoding='utf8') as f:
        alignment = json.load(f)
    with open(align_out_path, 'w', encoding='utf8') as f:
        json.dump(alignment, f, ensure_ascii=False, indent=2)
Esempio n. 3
0
def main():
    if len(sys.argv) < 5:
        usage()
        return

    audio_file_path = sys.argv[1]
    text_file_path = sys.argv[2]
    config_string = sys.argv[3]
    sync_map_file_path = sys.argv[4]

    print "[INFO] Creating task..."
    task = Task(config_string)
    task.audio_file_path_absolute = audio_file_path
    task.text_file_path_absolute = text_file_path
    task.sync_map_file_path_absolute = sync_map_file_path
    print "[INFO] Creating task... done"

    print "[INFO] Executing task..."
    logger = Logger(tee=False)
    executor = ExecuteTask(task=task, logger=logger)
    result = executor.execute()
    print "[INFO] Executing task... done"

    if not result:
        print "[ERRO] An error occurred while executing the task"
        return

    print "[INFO] Creating output container..."
    path = task.output_sync_map_file()
    print "[INFO] Creating output container... done"

    if path != None:
        print "[INFO] Created %s" % path
    else:
        print "[ERRO] An error occurred while writing the output sync map file"
Esempio n. 4
0
def force_align(audio_path,
                text_path,
                output_path,
                min_length=1.0,
                max_length=10.0,
                logging=logging):
    sentences = []
    task = Task(
        config_string=
        u"task_language=eng|is_text_type=plain|os_task_file_format=json")
    task.audio_file_path_absolute = audio_path
    task.text_file_path_absolute = text_path
    task.sync_map_file_path_absolute = output_path
    logging.info("Aligning audio and text...")
    ExecuteTask(task).execute()
    logging.info("Aligned audio and text")

    for fragment in task.sync_map_leaves():
        if fragment.length > min_length and fragment.length < max_length and fragment.text:
            sentences.append({
                "start": float(fragment.begin),
                "end": float(fragment.end),
                "length": float(fragment.length),
                "text": fragment.text,
            })

    with open(output_path, "w") as f:
        json.dump(sentences, f, indent=4)
Esempio n. 5
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.os_file_format = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = tempfile.mkstemp(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
     self.assertEqual(path, output_file_path)
     delete_file(handler, output_file_path)
Esempio n. 6
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = gf.tmp_file(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertIsNotNone(path)
     self.assertEqual(path, output_file_path)
     gf.delete_file(handler, output_file_path)
Esempio n. 7
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=txt|os_task_file_name=output_head.txt|is_text_type=plain|is_audio_file_head_length=11.960|is_audio_file_process_length=31.640"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/inputtext/sonnet_plain_head_length.txt"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/output_head_length.txt"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
Esempio n. 8
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.os_file_format = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = tempfile.mkstemp(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
     self.assertEqual(path, output_file_path)
     delete_file(handler, output_file_path)
Esempio n. 9
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=smil|os_task_file_name=p001.smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.xhtml"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/p001.smil"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
Esempio n. 10
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=smil|os_task_file_name=p001.smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.xhtml"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/p001.smil"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
 def transcribe(self, audio, transcript, output):
     config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
     task = Task(config_string=config_string)
     task.audio_file_path_absolute = audio  #get_path(self.audio_folder, self.audio_prefix, filename, "mp3")
     task.text_file_path_absolute = transcript  #get_path(self.transcript_processed_folder, self.transcript_processed_prefix, filename, "txt")
     task.sync_map_file_path_absolute = output  #get_path(self.output_folder, self.output_prefix, filename, "json")
     print("Processing task...\n")
     # process Task
     ExecuteTask(task).execute()
     print(f"Taks processed. Writing output to {output}")
     # output sync map to file
     task.output_sync_map_file()
Esempio n. 12
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = gf.tmp_file(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertIsNotNone(path)
     self.assertEqual(path, output_file_path)
     gf.delete_file(handler, output_file_path)
Esempio n. 13
0
def process_aeneas_map(filepath_, format_):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = filepath_ + '.' + format_
    task.text_file_path_absolute = filepath_ + ".txt"
    task.sync_map_file_path_absolute = filepath_ + ".json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Esempio n. 14
0
 def execute(self, config_string, audio_path, text_path):
     handler, tmp_path = gf.tmp_file()
     task = Task(config_string)
     task.audio_file_path_absolute = gf.absolute_path(audio_path, __file__)
     task.text_file_path_absolute = gf.absolute_path(text_path, __file__)
     executor = ExecuteTask(task)
     executor.execute()
     task.sync_map_file_path_absolute = tmp_path
     result_path = task.output_sync_map_file()
     self.assertIsNotNone(result_path)
     self.assertEqual(result_path, tmp_path)
     self.assertGreater(len(gf.read_file_bytes(result_path)), 0)
     gf.delete_file(handler, tmp_path)
    def align_audio_and_text(self, file_path):
        config = TaskConfiguration()
        config[gc.PPN_TASK_LANGUAGE] = Language.PAN
        config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN
        config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.JSON
        task = Task()
        task.configuration = config

        task.audio_file_path_absolute = self.audio_file_path
        task.text_file_path_absolute = self.txt_file_path
        task.sync_map_file_path_absolute = file_path
        ExecuteTask(task).execute()
        task.output_sync_map_file()
Esempio n. 16
0
def process_aeneas(txt_filename, wav_filename, csv_filename):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=csv"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = wav_filename
    task.text_file_path_absolute = txt_filename
    task.sync_map_file_path_absolute = csv_filename

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Esempio n. 17
0
 def test_output_sync_map_06(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.os_file_format = SyncMapFormat.SMIL
     task.configuration.os_file_smil_page_ref = "Text/page.xhtml"
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = tempfile.mkstemp(suffix=".smil")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertEqual(path, None)
     os.close(handler)
     os.remove(output_file_path)
Esempio n. 18
0
def align(audio_file_path, text_file_path, syncmap_file_path):
    # create Task object
    config_string = "task_language=zh|is_text_type=plain|os_task_file_format=txt"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = audio_file_path
    task.text_file_path_absolute = text_file_path
    task.sync_map_file_path_absolute = syncmap_file_path

    # process Task
    ExecuteTask(task).execute()
    # output sync map to file
    task.output_sync_map_file()

    return True
Esempio n. 19
0
def run_aeneas_task(audio_filepath, lyrics_filepath, output_filepath):
    config_string = "task_language=eng|os_task_file_format=json|is_text_type=plain"

    task = Task(config_string=config_string)

    task.audio_file_path_absolute = audio_filepath
    task.text_file_path_absolute = lyrics_filepath
    task.sync_map_file_path_absolute = output_filepath

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Esempio n. 20
0
def executeAeneas(text_path, audio_path):
    audio_name, _ = os.path.splitext(audio_path)

    # create Task object
    config_string = u'task_language=tur|is_text_type=plain|os_task_file_format=json'
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = audio_path
    task.text_file_path_absolute = text_path
    task.sync_map_file_path_absolute = audio_name + "_syncmap.json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
def sync_map_generator(song_name):
# create Task object
    print('Creating sync map file...')
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Songs/" + song_name +".wav"
    task.text_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Lyrics/" + song_name +".txt"
    task.sync_map_file_path_absolute = u"output/"+ song_name + ".json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
    print('Created sync map file')
Esempio n. 22
0
def force_align(book_name, chapter_index):
    print("Aligning chapter {:d}".format(chapter_index))
    # create Task objects
    task = Task(config_string=properties.aeneas_configuration_string)

    task.audio_file_path_absolute = fu.build_audio_path(
        book_name, chapter_index)
    task.text_file_path_absolute = fu.build_valid_text_path(
        book_name, chapter_index)
    task.sync_map_file_path_absolute = fu.build_syncmap_path(
        book_name, chapter_index)

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Esempio n. 23
0
def generate_epub(pub_id):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = u"./test/audio.mp3"
    task.text_file_path_absolute = u"./test/sonnet_plain.txt"
    task.sync_map_file_path_absolute = u"./test/output.json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
    with open(task.sync_map_file_path_absolute, 'r') as f:
        read_data = f.read()

    return read_data  #task.sync_map_file_path_absolute
Esempio n. 24
0
def make_subs(wav_path, txt_path, srt_path, start):
    """Gets the subtitles with the correct timing based on the wav file"""

    config_string = "task_language=eng|is_text_type=plain|os_task_file_format=srt"

    task = Task(config_string=config_string)
    task.audio_file_path_absolute = wav_path
    task.text_file_path_absolute = txt_path
    task.sync_map_file_path_absolute = srt_path

    ExecuteTask(task).execute()
    task.output_sync_map_file()

    subs = pysrt.open(srt_path)

    subs.shift(seconds=start)

    return subs
Esempio n. 25
0
def create_aeneas_json_file(audio_path, text_path, output_path):
    """
    Use the api aeneas to synchronize audio and text.

        Parameters:
        audio_path (str): audio filepath.
        text_path (str): text filepath.
        output_path (str): output json filepath.

        Returns:
        Boolean: True or False.
    """
    try:
        # create Task object
        config_string = u"task_language=por|is_text_type=plain|os_task_file_format=json|task_adjust_boundary_percent_value=50|mfcc_mask_nonspeech_l2=True"
        task = Task(config_string=config_string)
        task.audio_file_path_absolute = u"{}".format(audio_path)
        task.text_file_path_absolute = u"{}".format(text_path)
        task.sync_map_file_path_absolute = u"{}".format(output_path)

        # process Task
        ExecuteTask(task).execute()

        # output sync map to file
        task.output_sync_map_file()

    except KeyboardInterrupt:
        print("KeyboardInterrupt Detected!")
        exit()

    except:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        exc_file = split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, exc_file, exc_tb.tb_lineno)
        return False

    return True
Esempio n. 26
0
def get_alignments():
    config = u'task_language=eng|is_text_type=plain|os_task_file_format=json'

    for address, youtube_id in text_audio_pairs.iteritems():

        # download youtube video
        print D.audio_from_youtube(
            'https://youtu.be/' + youtube_id,
            output_file_path=u'alignment/audio/' + address + '.webm',
            preferred_format=u'webm'
            )

        # designate text, audio, and syncmap files
        text = os.path.abspath(u'alignment/text/' + address + '.txt')
        audio = os.path.abspath(u'alignment/audio/' + address + '.webm')
        syncmap = os.path.abspath(u'alignment/syncmaps/' + address + '.json')

        # align text to audio
        task = Task(config_string=config)
        task.text_file_path_absolute = text
        task.audio_file_path_absolute = audio
        task.sync_map_file_path_absolute = syncmap
        ExecuteTask(task).execute()
        task.output_sync_map_file()
Esempio n. 27
0
def get_alignment(path_to_audio_file: str,
                  transcript: List[str],
                  force=False,
                  language='fr_FR') -> List[dict]:
    # see https://github.com/readbeyond/aeneas/blob/9d95535ad63eef4a98530cfdff033b8c35315ee1/aeneas/ttswrappers/espeakngttswrapper.py#L45  # noqa
    language = {
        'fr_FR': 'fra',
        'en_US': 'eng',
    }[language]
    full_transcript = '\t'.join(transcript)
    full_transcript_hash = sha1(full_transcript.encode()).hexdigest()
    path_to_transcript = os.path.join(CACHE_DIR, f'{full_transcript_hash}.txt')

    with open(path_to_audio_file, 'rb') as f:
        audio_file_hash = hash_file(f)

    with open(path_to_transcript, 'w') as f:
        f.writelines('\n'.join(transcript))

    path_to_alignment_tmp = os.path.join(
        CACHE_DIR, f'{full_transcript_hash}_{audio_file_hash}.json')

    if force or not os.path.isfile(path_to_alignment_tmp):
        # build alignment
        task = Task(
            f'task_language={language}|os_task_file_format=json|is_text_type=plain'
        )
        task.audio_file_path_absolute = os.path.abspath(path_to_audio_file)
        task.text_file_path_absolute = path_to_transcript
        task.sync_map_file_path_absolute = path_to_alignment_tmp
        executor = ExecuteTask(task=task)
        executor.execute()
        task.output_sync_map_file()

    with open(path_to_alignment_tmp) as source:
        return [cleanup_fragment(f) for f in json.load(source)['fragments']]
Esempio n. 28
0
from aeneas.executetask import ExecuteTask
from aeneas.task import Task

#configurations for task
config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
task = Task(config_string=config_string)
task.audio_file_path_absolute = u"./music/rapgod.mp3"
task.text_file_path_absolute = u"./lyrics/aeneas_test_task.txt"
task.sync_map_file_path_absolute = u"./output/rapgod.json"

# process Task
ExecuteTask(task).execute()

# output sync map to file
task.output_sync_map_file()
Esempio n. 29
0
print("Hello World")
from aeneas.executetask import ExecuteTask
from aeneas.task import Task
import os
import pandas as pd
os.chdir('C:\\Users\\User\\Desktop\\Aflorithmic Labs')

#aeneas forced alignment
config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
task = Task(config_string=config_string)
task.audio_file_path_absolute = "Data/A tale of two cities/tale_of_two_cities_01_dickens_64kb_EDIT.wav"
task.text_file_path_absolute = "Data/A tale of two cities/a_tale_of_two_cities_chapter_1.txt"
task.sync_map_file_path_absolute = "Data/A tale of two cities/syncmap_atotc_c1.json"
ExecuteTask(task).execute()
task.output_sync_map_file()

#creating metadata and list of small audio clips
book = AudioSegment.from_mp3(
    "Data/A tale of two cities/tale_of_two_cities_01_dickens_64kb_EDIT.wav")
with open("Data/A tale of two cities/syncmap_atotc_c1.json") as f:
    syncmap = json.loads(f.read())

sentences = []
for fragment in syncmap['fragments']:
    if ((float(fragment['end']) * 1000) -
            float(fragment['begin']) * 1000) > 400:
        sentences.append({
            "audio":
            book[float(fragment['begin']) * 1000:float(fragment['end']) *
                 1000],
            "text":
Esempio n. 30
0
if lang not in ["eng", "hi", "hin"]:
    print("only hi and eng allowed for language")
    exit(1)

from aeneas.executetask import ExecuteTask
from aeneas.task import Task
from aeneas.runtimeconfiguration import RuntimeConfiguration

config_string = u"task_language=" + lang + u"|is_text_type=subtitles|os_task_file_format=srt"

tempout, tempfilename = tempfile.mkstemp()
task = Task(config_string=config_string)
task.audio_file_path_absolute = args.audio
task.text_file_path_absolute = args.txt
task.sync_map_file_path_absolute = tempfilename
rconf = RuntimeConfiguration()
# This option ignores the non-word sounds in the audio
rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True
rconf[RuntimeConfiguration.MFCC_MASK_LOG_ENERGY_THRESHOLD] = 2.5

# To use a different Text-to-Speech engine
#rconf[RuntimeConfiguration.TTS] = "festival"

# process Task
ExecuteTask(task, rconf=rconf).execute()

# output sync map to file
task.output_sync_map_file()

f = open(args.out, "w")
Esempio n. 31
0
from aeneas.executetask import ExecuteTask
from aeneas.task import Task
from env import get_list_input

import os


config_string = \
        'task_language=vi|is_text_type=plain|os_task_file_format=json'
task = Task(config_string=config_string)

inputs = get_list_input()
for item in inputs:
    task.audio_file_path_absolute = f'textData/{item}.mp3'
    task.text_file_path_absolute = f'textData/{item}.txt'
    task.sync_map_file_path_absolute = f'textData/syncmap-{item}.json'

    # process Task
    ExecuteTask(task).execute()
    # output sync map to file
    task.output_sync_map_file()
Esempio n. 32
0
# based on the three experiments!/usr/bin/env python
# coding=utf-8

from aeneas.executetask import ExecuteTask
from aeneas.task import Task

# This script could provide additional functionality by:
#    -automatically taking a video and converting it to an audio file
#    -interactively asking the user for timestamps to create a "noise profile"
#    -use that profile to run SoX "noisered", creating an audio file with reduced background noise
#    -retrieving transcript from database as a text file


config_string =(
    u"task_language=eng|" + 
    u"is_text_type=plain|" + #
    u"os_task_file_format=csv|" + # output file type
    u"task_adjust_boundary_nonspeech_min=0.010|" + # consider timestamps of nonspeech if they are longer than .10 sec
    u"task_adjust_boundary_nonspeech_string=REMOVE|" + # remove nonspeech timestamps
    u"task_adjust_boundary_algorithm=rateaggressive|" + 
    u"task_adjust_boundary_rate_value=14.000")
task = Task(config_string=config_string)
# currently the task takes predetermined audio, transcript and output paths
task.audio_file_path_absolute = u"audio/test-sox-low.mp3"
task.text_file_path_absolute = u"transcripts/test.txt"
task.sync_map_file_path_absolute = u"output/test-sox3.csv"

ExecuteTask(task).execute()

task.output_sync_map_file()
Esempio n. 33
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 1:
            return self.print_help()

        if self.has_option([u"-e", u"--examples"]):
            return self.print_examples(False)

        if self.has_option(u"--examples-all"):
            return self.print_examples(True)

        if self.has_option([u"--list-parameters"]):
            return self.print_parameters()

        parameter = self.has_option_with_value(u"--list-values")
        if parameter is not None:
            return self.print_values(parameter)
        elif self.has_option(u"--list-values"):
            return self.print_values(u"?")

        # NOTE list() is needed for Python3, where keys() is not a list!
        demo = self.has_option(list(self.DEMOS.keys()))
        demo_parameters = u""
        download_from_youtube = self.has_option([u"-y", u"--youtube"])
        largest_audio = self.has_option(u"--largest-audio")
        keep_audio = self.has_option(u"--keep-audio")
        output_html = self.has_option(u"--output-html")
        validate = not self.has_option(u"--skip-validator")
        print_faster_rate = self.has_option(u"--faster-rate")
        print_rates = self.has_option(u"--rates")
        print_zero = self.has_option(u"--zero")

        if demo:
            validate = False
            for key in self.DEMOS:
                if self.has_option(key):
                    demo_parameters = self.DEMOS[key]
                    audio_file_path = demo_parameters[u"audio"]
                    text_file_path = demo_parameters[u"text"]
                    config_string = demo_parameters[u"config"]
                    sync_map_file_path = demo_parameters[u"syncmap"]
                    # TODO allow injecting rconf options directly from DEMOS options field
                    if key == u"--example-cewsubprocess":
                        self.rconf[
                            RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True
                    elif key == u"--example-ctw-espeak":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[
                            RuntimeConfiguration.TTS_PATH] = self.CTW_ESPEAK
                    elif key == u"--example-ctw-speect":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[
                            RuntimeConfiguration.TTS_PATH] = self.CTW_SPEECT
                    elif key == u"--example-festival":
                        self.rconf[RuntimeConfiguration.TTS] = "festival"
                    elif key == u"--example-mws":
                        self.rconf[
                            RuntimeConfiguration.MFCC_WINDOW_LENGTH] = "1.500"
                        self.rconf[
                            RuntimeConfiguration.MFCC_WINDOW_SHIFT] = "0.500"
                    elif key == u"--example-multilevel-tts":
                        self.rconf[RuntimeConfiguration.TTS_L1] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_L2] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_L3] = "espeak"
                    elif key == u"--example-words-festival-cache":
                        self.rconf[RuntimeConfiguration.TTS] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_CACHE] = True
                    elif key == u"--example-faster-rate":
                        print_faster_rate = True
                    elif key == u"--example-no-zero":
                        print_zero = True
                    elif key == u"--example-py":
                        self.rconf[RuntimeConfiguration.C_EXTENSIONS] = False
                    elif key == u"--example-rates":
                        print_rates = True
                    elif key == u"--example-youtube":
                        download_from_youtube = True
                    break
        else:
            if len(self.actual_arguments) < 4:
                return self.print_help()
            audio_file_path = self.actual_arguments[0]
            text_file_path = self.actual_arguments[1]
            config_string = self.actual_arguments[2]
            sync_map_file_path = self.actual_arguments[3]

        html_file_path = None
        if output_html:
            keep_audio = True
            html_file_path = sync_map_file_path + u".html"

        if download_from_youtube:
            youtube_url = audio_file_path

        if (not download_from_youtube) and (
                not self.check_input_file(audio_file_path)):
            return self.ERROR_EXIT_CODE
        if not self.check_input_file(text_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(sync_map_file_path):
            return self.ERROR_EXIT_CODE
        if (html_file_path
                is not None) and (not self.check_output_file(html_file_path)):
            return self.ERROR_EXIT_CODE

        self.check_c_extensions()

        if demo:
            msg = []
            msg.append(u"Running example task with arguments:")
            if download_from_youtube:
                msg.append(u"  YouTube URL:   %s" % youtube_url)
            else:
                msg.append(u"  Audio file:    %s" % audio_file_path)
            msg.append(u"  Text file:     %s" % text_file_path)
            msg.append(u"  Config string: %s" % config_string)
            msg.append(u"  Sync map file: %s" % sync_map_file_path)
            if len(demo_parameters[u"options"]) > 0:
                msg.append(u"  Options:       %s" %
                           demo_parameters[u"options"])
            self.print_info(u"\n".join(msg))

        if validate:
            self.print_info(
                u"Validating config string (specify --skip-validator to bypass)..."
            )
            validator = Validator(logger=self.logger)
            result = validator.check_configuration_string(config_string,
                                                          is_job=False,
                                                          external_name=True)
            if not result.passed:
                self.print_error(u"The given config string is not valid:")
                self.print_generic(result.pretty_print())
                return self.ERROR_EXIT_CODE
            self.print_info(u"Validating config string... done")

        if download_from_youtube:
            try:
                self.print_info(u"Downloading audio from '%s' ..." %
                                youtube_url)
                downloader = Downloader(logger=self.logger)
                audio_file_path = downloader.audio_from_youtube(
                    youtube_url,
                    download=True,
                    output_file_path=None,
                    largest_audio=largest_audio)
                self.print_info(u"Downloading audio from '%s' ... done" %
                                youtube_url)
            except ImportError:
                self.print_no_pafy_error()
                return self.ERROR_EXIT_CODE
            except Exception as exc:
                self.print_error(
                    u"An unexpected error occurred while downloading audio from YouTube:"
                )
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE
        else:
            audio_extension = gf.file_extension(audio_file_path)
            if audio_extension.lower() not in AudioFile.FILE_EXTENSIONS:
                self.print_warning(
                    u"Your audio file path has extension '%s', which is uncommon for an audio file."
                    % audio_extension)
                self.print_warning(
                    u"Attempting at executing your Task anyway.")
                self.print_warning(
                    u"If it fails, you might have swapped the first two arguments."
                )
                self.print_warning(
                    u"The audio file path should be the first argument, the text file path the second."
                )

        try:
            self.print_info(u"Creating task...")
            task = Task(config_string, logger=self.logger)
            task.audio_file_path_absolute = audio_file_path
            task.text_file_path_absolute = text_file_path
            task.sync_map_file_path_absolute = sync_map_file_path
            self.print_info(u"Creating task... done")
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while creating the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Executing task...")
            executor = ExecuteTask(task=task,
                                   rconf=self.rconf,
                                   logger=self.logger)
            executor.execute()
            self.print_info(u"Executing task... done")
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while executing the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Creating output sync map file...")
            path = task.output_sync_map_file()
            self.print_info(u"Creating output sync map file... done")
            self.print_success(u"Created file '%s'" % path)
        except Exception as exc:
            self.print_error(
                u"An unexpected error occurred while writing the sync map file:"
            )
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        if output_html:
            try:
                parameters = {}
                parameters[gc.PPN_TASK_OS_FILE_FORMAT] = task.configuration[
                    "o_format"]
                parameters[
                    gc.PPN_TASK_OS_FILE_EAF_AUDIO_REF] = task.configuration[
                        "o_eaf_audio_ref"]
                parameters[
                    gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] = task.configuration[
                        "o_smil_audio_ref"]
                parameters[
                    gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] = task.configuration[
                        "o_smil_page_ref"]
                self.print_info(u"Creating output HTML file...")
                task.sync_map.output_html_for_tuning(audio_file_path,
                                                     html_file_path,
                                                     parameters)
                self.print_info(u"Creating output HTML file... done")
                self.print_success(u"Created file '%s'" % html_file_path)
            except Exception as exc:
                self.print_error(
                    u"An unexpected error occurred while writing the HTML file:"
                )
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE

        if download_from_youtube:
            if keep_audio:
                self.print_info(
                    u"Option --keep-audio set: keeping downloaded file '%s'" %
                    audio_file_path)
            else:
                gf.delete_file(None, audio_file_path)

        if print_zero:
            zero_duration = [
                l for l in task.sync_map.fragments_tree.vleaves_not_empty
                if l.begin == l.end
            ]
            if len(zero_duration) > 0:
                self.print_warning(u"Fragments with zero duration:")
                for fragment in zero_duration:
                    self.print_generic(u"  %s" % fragment)

        if print_rates:
            self.print_info(u"Fragments with rates:")
            for fragment in task.sync_map.fragments_tree.vleaves_not_empty:
                self.print_generic(u"  %s (rate: %.3f chars/s)" %
                                   (fragment, fragment.rate))

        if print_faster_rate:
            max_rate = task.configuration["aba_rate_value"]
            if max_rate is not None:
                faster = [
                    l for l in task.sync_map.fragments_tree.vleaves_not_empty
                    if l.rate >= max_rate + Decimal("0.001")
                ]
                if len(faster) > 0:
                    self.print_warning(
                        u"Fragments with rate greater than %.3f:" % max_rate)
                    for fragment in faster:
                        self.print_generic(u"  %s (rate: %.3f chars/s)" %
                                           (fragment, fragment.rate))

        return self.NO_ERROR_EXIT_CODE
Esempio n. 34
0
def main():
    """Main entry point"""
    parser = argparse.ArgumentParser(prog="librivox_align.py")
    parser.add_argument("book_yml", help="YAML file with book details")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG)
    _LOGGER.debug(args)

    args.book_yml = Path(args.book_yml)
    input_dir = args.book_yml.parent

    with open(args.book_yml, "r") as book_file:
        book = yaml.safe_load(book_file)

    # Load gruut language
    gruut_lang = gruut.Language.load(book["gruut"]["language"])
    assert gruut_lang, "Unsupported language"

    language = book["aeneas"]["language"]

    # Load book text
    text_path = Path(input_dir / book["text"]["file"])
    _LOGGER.debug("Loading book text from %s", text_path)
    with open(text_path, "r") as text_file:
        text = text_file.readlines()

    # Process MP3 files
    for mp3_name, mp3_info in book["audio"].items():
        mp3_path = input_dir / mp3_name
        sync_path = mp3_path.with_suffix(".json")

        config_string = f"task_language={language}|is_text_type=plain|os_task_file_format=json|task_adjust_boundary_no_zero=True"

        start_time = float(mp3_info.get("start_time", 0))
        if start_time > 0:
            # Skip seconds at the beginning
            config_string += f"|is_audio_file_head_length={start_time}"

        end_time = float(mp3_info.get("end_time", 0))
        if end_time < 0:
            # Skip seconds at the end
            end_time = abs(end_time)
            config_string += f"|is_audio_file_tail_length={end_time}"
        elif end_time > 0:
            # Set length of audio
            config_string += f"|is_audio_file_process_length={end_time}"

        task = Task(config_string=config_string)
        task.audio_file_path_absolute = mp3_path.absolute()
        task.sync_map_file_path_absolute = sync_path.absolute()

        mp3_text_path = mp3_path.with_suffix(".txt")
        with open(mp3_text_path, mode="w+") as mp3_text_file:
            start_line, end_line = mp3_info["start_line"], mp3_info["end_line"]
            # Clean up newlines in text
            mp3_text = ""
            for line_index in range(start_line - 1, end_line):
                mp3_text += text[line_index].strip() + "\n"

            # Run through gruut tokenizer to expand abbreviations, numbers, etc.
            raw_text_path = mp3_path.with_suffix(".raw.txt")
            with open(raw_text_path, "w") as raw_text_file:
                for sentence in gruut_lang.tokenizer.tokenize(mp3_text):
                    clean_text = " ".join(sentence.clean_words)

                    # Each sentence in on a line now
                    print(clean_text, file=mp3_text_file)
                    print(sentence.raw_text, file=raw_text_file)

            mp3_text_file.seek(0)
            task.text_file_path_absolute = mp3_text_file.name

            # Generate sync map JSON file
            _LOGGER.debug("Generating %s (%s)", sync_path, mp3_path)
            ExecuteTask(task).execute()
            task.output_sync_map_file()
Esempio n. 35
0
def create_aeneas_csv(df=input_df,
                      book_chapter_list=book_chapter_list,
                      input_audio_dir=input_audio_dir):
    for each_chapter in book_chapter_list:

        #Find respective audio file
        if len(each_chapter.split('_')) == 3:
            sequence = re.findall(r'\d+', each_chapter.split('_')[0])[0]

            if each_chapter.split('_')[1] == 'THESSALONIANS': book = 'Thess'
            else: book = (each_chapter.split('_')[1]).capitalize()

            chapter = each_chapter.split('_')[2]
            find_audio_string = chapter + '_' + sequence + book

            search_book = ' '.join(each_chapter.split('_')[0:2])
        else:
            if each_chapter.split('_')[1] == 'THESSALONIANS': book = 'Thess'
            else:
                book = (each_chapter.split('_')[0]).capitalize()
                search_book = each_chapter.split('_')[0]

            chapter = each_chapter.split('_')[1]
            find_audio_string = chapter + '_' + book

        if sound_find_string is not None:
            #find_audio_string=chapter+'_'+sound_find_string
            find_audio_string = sound_find_string + '_' + chapter
            if language_code == 'en':
                find_audio_string = chapter + '_' + sound_find_string

        print(find_audio_string)
        chapter_audio = glob.glob(input_audio_dir + '/*' + find_audio_string +
                                  '*')[0]
        if not (chapter_audio): missing_chapters.append(each_chapter)

        #Create aeneas text input
        aeneas_file_name = (
            chapter_audio.split('/')[-1]).split('.')[0] + '_aeneas_input.txt'
        aeneas_write = codecs.open(output_dir + '/' + aeneas_file_name, 'w',
                                   'utf-8')
        chapter = chapter.lstrip('0')

        for i in range(0, len(df)):
            if ((str(df['book'][i])).strip()
                ).upper() == search_book.upper() and int(
                    df['chapter'][i]) == int(chapter):
                aeneas_write.write(df['verse_content'][i] + '\n')
        aeneas_write.close()

        #Run aeneas
        from aeneas.executetask import ExecuteTask
        from aeneas.task import Task
        from aeneas.tools.execute_task import ExecuteTaskCLI

        # create Task object
        aeneas_output_file = (
            chapter_audio.split('/')[-1]).split('.')[0] + '_aeneas_out.txt'
        config_string = u"task_adjust_boundary_percent_value=50|task_adjust_boundary_nonspeech_min=0.4|task_language=epo|is_text_type=plain|os_task_file_format=aud"
        print(config_string)

        # Save .txt file
        ExecuteTaskCLI(use_sys=False).run(arguments=[
            None,  # dummy program name argument
            chapter_audio,
            os.path.join(output_dir, aeneas_file_name),
            config_string,
            os.path.join(output_dir, aeneas_output_file)
        ])

        # Save time boundary
        task = Task(config_string=config_string)
        task.audio_file_path_absolute = chapter_audio
        task.text_file_path_absolute = os.path.join(output_dir,
                                                    aeneas_file_name)
        task.sync_map_file_path_absolute = os.path.join(
            output_dir, aeneas_output_file)

        index_list = list()
        # process Task
        ExecuteTask(task).execute()

        new_aeneas = list()
        with open(output_dir + '/' + aeneas_output_file, 'r') as a:
            with open(output_dir + '/' + 'new' + aeneas_output_file, 'w') as b:
                for line in a:
                    if not (line.__contains__('......')):
                        #print(line)
                        b.write(line)
                        # new_aeneas.append(line)
        a.close()
        b.close()

        # with open(output_dir+'/'+'new'+aeneas_output_file,'w') as b:
        #     b.write(str(new_aeneas))
        # b.close()

        shutil.move(output_dir + '/new' + aeneas_output_file,
                    output_dir + '/' + aeneas_output_file)

        last = len(task.sync_map_leaves())
        for i, time in enumerate(task.sync_map_leaves()):
            if 0 < i < last - 1:
                index_list.append(time.end)
                # print(time.end)

        inc = 0
        verse_list = list()
        for i in range(0, len(df)):
            if ((str(df['book'][i])).strip()
                ).upper() == search_book.upper() and int(
                    df['chapter'][i]) == int(chapter):
                write_file.writerow(
                    (df['fileset'][i], df['book'][i], df['chapter'][i],
                     df['line_number'][i], df['verse_number'][i],
                     df['verse_content'][i], index_list[inc]))
                verse_list.append(df['verse_number'][i])
                inc += 1

        print(chapter_audio)

        if args.move_adjustment:
            silence_file = output_dir + '/' + (aeneas_output_file.split(
                '/')[-1]).split('.')[0] + '_silence.txt'
            extract_silence_intervals(chapter_audio, silence_file)
            adjust_update_boundaries_with_silence(
                output_dir + '/' + aeneas_output_file,
                silence_file,
                output_dir + '/' +
                (chapter_audio.split('/')[-1]).split('.')[0] +
                '_sync_adjusted.txt',
                verse_list,
                input_split_field='\t',
                output_split_field='\t')

        elif args.adjust_silence:
            silence_file = output_dir + '/' + (aeneas_output_file.split(
                '/')[-1]).split('.')[0] + '_silence.txt'
            extract_silence_intervals(chapter_audio, silence_file)
            adjust_boundaries_with_silence(
                output_dir + '/' + aeneas_output_file,
                silence_file,
                output_dir + '/' +
                (chapter_audio.split('/')[-1]).split('.')[0] + '_adjusted.txt',
                verse_list,
                input_split_field='\t',
                output_split_field='\t')

    write_file_handle.close()

    if missing_chapters:
        with open(output_dir + '/missing_chapters.txt', 'w',
                  encoding='utf-8') as missing:
            for each_missing in missing_chapters:
                missing.write(each_missing)
            missing.close()
Esempio n. 36
0
    aligner.py --audio-file=<a> --text-file=<t> --syncmap-file=<s>

Options:
    --audio-file=<a>        Audio file path
    --text-file=<t>         Text file path
    --syncmap-file=<s>      Syncmap file path
"""
from docopt import docopt
from aeneas.executetask import ExecuteTask
from aeneas.task import Task

if __name__ == "__main__":
    args = docopt(__doc__)

    audio_file = args["--audio-file"]

    text_file = args["--text-file"]

    syncmap_file = args["--syncmap-file"]

    config_string = u"task_language=kan|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)

    task.audio_file_path_absolute = audio_file
    task.text_file_path_absolute = text_file
    task.sync_map_file_path_absolute = syncmap_file

    # process Task
    ExecuteTask(task).execute()
    task.output_sync_map_file()
Esempio n. 37
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 1:
            return self.print_help()

        if self.has_option([u"-e", u"--examples"]):
            return self.print_examples(False)

        if self.has_option(u"--examples-all"):
            return self.print_examples(True)

        if self.has_option([u"--list-parameters"]):
            return self.print_parameters()

        parameter = self.has_option_with_value(u"--list-values")
        if parameter is not None:
            return self.print_values(parameter)
        elif self.has_option(u"--list-values"):
            return self.print_values(u"?")

        # NOTE list() is needed for Python3, where keys() is not a list!
        demo = self.has_option(list(self.DEMOS.keys()))
        demo_parameters = u""
        download_from_youtube = self.has_option([u"-y", u"--youtube"])
        largest_audio = self.has_option(u"--largest-audio")
        keep_audio = self.has_option(u"--keep-audio")
        output_html = self.has_option(u"--output-html")
        validate = not self.has_option(u"--skip-validator")
        print_faster_rate = self.has_option(u"--faster-rate")
        print_rates = self.has_option(u"--rates")
        print_zero = self.has_option(u"--zero")

        if demo:
            validate = False
            for key in self.DEMOS:
                if self.has_option(key):
                    demo_parameters = self.DEMOS[key]
                    audio_file_path = demo_parameters[u"audio"]
                    text_file_path = demo_parameters[u"text"]
                    config_string = demo_parameters[u"config"]
                    sync_map_file_path = demo_parameters[u"syncmap"]
                    # TODO allow injecting rconf options directly from DEMOS options field
                    if key == u"--example-cewsubprocess":
                        self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True
                    elif key == u"--example-ctw-espeak":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_ESPEAK
                    elif key == u"--example-ctw-speect":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_SPEECT
                    elif key == u"--example-festival":
                        self.rconf[RuntimeConfiguration.TTS] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_PATH] = "text2wave"
                    elif key == u"--example-mws":
                        self.rconf[RuntimeConfiguration.MFCC_WINDOW_LENGTH] = "1.500"
                        self.rconf[RuntimeConfiguration.MFCC_WINDOW_SHIFT] = "0.500"
                    elif key == u"--example-faster-rate":
                        print_faster_rate = True
                    elif key == u"--example-no-zero":
                        print_zero = True
                    elif key == u"--example-py":
                        self.rconf[RuntimeConfiguration.C_EXTENSIONS] = False
                    elif key == u"--example-rates":
                        print_rates = True
                    elif key == u"--example-youtube":
                        download_from_youtube = True
                    break
        else:
            if len(self.actual_arguments) < 4:
                return self.print_help()
            audio_file_path = self.actual_arguments[0]
            text_file_path = self.actual_arguments[1]
            config_string = self.actual_arguments[2]
            sync_map_file_path = self.actual_arguments[3]

        html_file_path = None
        if output_html:
            keep_audio = True
            html_file_path = sync_map_file_path + u".html"

        if download_from_youtube:
            youtube_url = audio_file_path

        if (not download_from_youtube) and (not self.check_input_file(audio_file_path)):
            return self.ERROR_EXIT_CODE
        if not self.check_input_file(text_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(sync_map_file_path):
            return self.ERROR_EXIT_CODE
        if (html_file_path is not None) and (not self.check_output_file(html_file_path)):
            return self.ERROR_EXIT_CODE

        self.check_c_extensions()

        if demo:
            msg = []
            msg.append(u"Running example task with arguments:")
            if download_from_youtube:
                msg.append(u"  YouTube URL:   %s" % youtube_url)
            else:
                msg.append(u"  Audio file:    %s" % audio_file_path)
            msg.append(u"  Text file:     %s" % text_file_path)
            msg.append(u"  Config string: %s" % config_string)
            msg.append(u"  Sync map file: %s" % sync_map_file_path)
            if len(demo_parameters[u"options"]) > 0:
                msg.append(u"  Options:       %s" % demo_parameters[u"options"])
            self.print_info(u"\n".join(msg))

        if validate:
            self.print_info(u"Validating config string (specify --skip-validator to bypass)...")
            validator = Validator(logger=self.logger)
            result = validator.check_configuration_string(config_string, is_job=False, external_name=True)
            if not result.passed:
                self.print_error(u"The given config string is not valid:")
                self.print_generic(result.pretty_print())
                return self.ERROR_EXIT_CODE
            self.print_info(u"Validating config string... done")

        if download_from_youtube:
            try:
                self.print_info(u"Downloading audio from '%s' ..." % youtube_url)
                downloader = Downloader(logger=self.logger)
                audio_file_path = downloader.audio_from_youtube(
                    youtube_url,
                    download=True,
                    output_file_path=None,
                    largest_audio=largest_audio
                )
                self.print_info(u"Downloading audio from '%s' ... done" % youtube_url)
            except ImportError:
                self.print_no_pafy_error()
                return self.ERROR_EXIT_CODE
            except Exception as exc:
                self.print_error(u"An unexpected error occurred while downloading audio from YouTube:")
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Creating task...")
            task = Task(config_string, logger=self.logger)
            task.audio_file_path_absolute = audio_file_path
            task.text_file_path_absolute = text_file_path
            task.sync_map_file_path_absolute = sync_map_file_path
            self.print_info(u"Creating task... done")
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while creating the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Executing task...")
            executor = ExecuteTask(task=task, rconf=self.rconf, logger=self.logger)
            executor.execute()
            self.print_info(u"Executing task... done")
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while executing the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Creating output sync map file...")
            path = task.output_sync_map_file()
            self.print_info(u"Creating output sync map file... done")
            self.print_success(u"Created file '%s'" % path)
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while writing the sync map file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        if output_html:
            try:
                parameters = {}
                parameters[gc.PPN_TASK_OS_FILE_FORMAT] = task.configuration["o_format"]
                parameters[gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] = task.configuration["o_smil_audio_ref"]
                parameters[gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] = task.configuration["o_smil_page_ref"]
                self.print_info(u"Creating output HTML file...")
                task.sync_map.output_html_for_tuning(audio_file_path, html_file_path, parameters)
                self.print_info(u"Creating output HTML file... done")
                self.print_success(u"Created file '%s'" % html_file_path)
            except Exception as exc:
                self.print_error(u"An unexpected error occurred while writing the HTML file:")
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE

        if download_from_youtube:
            if keep_audio:
                self.print_info(u"Option --keep-audio set: keeping downloaded file '%s'" % audio_file_path)
            else:
                gf.delete_file(None, audio_file_path)

        if print_zero:
            zero_duration = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.begin == l.end]
            if len(zero_duration) > 0:
                self.print_warning(u"Fragments with zero duration:")
                for fragment in zero_duration:
                    self.print_generic(u"  %s" % fragment)

        if print_rates:
            self.print_info(u"Fragments with rates:")
            for fragment in task.sync_map.fragments_tree.vleaves_not_empty:
                self.print_generic(u"  %s (rate: %.3f chars/s)" % (fragment, fragment.rate))

        if print_faster_rate:
            max_rate = task.configuration["aba_rate_value"]
            if max_rate is not None:
                faster = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.rate >= max_rate + Decimal("0.001")]
                if len(faster) > 0:
                    self.print_warning(u"Fragments with rate greater than %.3f:" % max_rate)
                    for fragment in faster:
                        self.print_generic(u"  %s (rate: %.3f chars/s)" % (fragment, fragment.rate))

        return self.NO_ERROR_EXIT_CODE