def total_FA(soundfile, mylines, myhead, mytail, config=None):
    """Runs Aeneas as a library. This function isn't in use, currently,
    as we haven't managed to get reliable results in this way."""
    # create Task object
    if config is None:
        config_string = (
            u"task_language=nor|is_text_type=plain|os_task_file_format=json|is_audio_file_head_length=%s|is_audio_file_tail_length=%s"
            % (myhead, mytail))
        print(config_string)
    else:
        config_string = (
            u"task_language=nor|is_text_type=plain|os_task_file_format=json|is_audio_file_head_length=%s|is_audio_file_tail_length=%s|%s"
            % (myhead, mytail, config))
        print(config_string)
    task = Task(config_string=config_string)
    print(task)
    task.audio_file_path_absolute = soundfile
    textfile = TextFile()
    print(textfile)
    #task.sync_map_file_path_absolute = outfile
    for identifier, frag_text in mylines:
        textfile.add_fragment(
            TextFragment(identifier, Language.NOR, frag_text, frag_text))
    task.text_file = textfile
    print(len(task.text_file))
    ExecuteTask(task).execute()
    syncmaplist = task.sync_map.fragments
    return syncmaplist
Esempio n. 2
0
def main():
    if len(sys.argv) < 5:
        usage()
        return

    audio_file_path = sys.argv[1]
    text_file_path = sys.argv[2]
    config_string = sys.argv[3]
    sync_map_file_path = sys.argv[4]

    print "[INFO] Creating task..."
    task = Task(config_string)
    task.audio_file_path_absolute = audio_file_path
    task.text_file_path_absolute = text_file_path
    task.sync_map_file_path_absolute = sync_map_file_path
    print "[INFO] Creating task... done"

    print "[INFO] Executing task..."
    logger = Logger(tee=False)
    executor = ExecuteTask(task=task, logger=logger)
    result = executor.execute()
    print "[INFO] Executing task... done"

    if not result:
        print "[ERRO] An error occurred while executing the task"
        return

    print "[INFO] Creating output container..."
    path = task.output_sync_map_file()
    print "[INFO] Creating output container... done"

    if path != None:
        print "[INFO] Created %s" % path
    else:
        print "[ERRO] An error occurred while writing the output sync map file"
Esempio n. 3
0
    def _create_task(
            self,
            task_info,
            config_string,
            sync_map_root_directory,
            job_os_hierarchy_type
        ):
        """
        Create a task object from

        1. the ``task_info`` found analyzing the container entries, and
        2. the given ``config_string``.

        :param list task_info: the task information: ``[prefix, text_path, audio_path]``
        :param string config_string: the configuration string
        :param string sync_map_root_directory: the root directory for the sync map files
        :param job_os_hierarchy_type: type of job output hierarchy
        :type  job_os_hierarchy_type: :class:`~aeneas.hierarchytype.HierarchyType`
        :rtype: :class:`~aeneas.task.Task`
        """
        self.log(u"Converting config string to config dict")
        parameters = gf.config_string_to_dict(config_string)
        self.log(u"Creating task")
        task = Task(config_string, logger=self.logger)
        task.configuration["description"] = "Task %s" % task_info[0]
        self.log([u"Task description: %s", task.configuration["description"]])
        try:
            task.configuration["language"] = parameters[gc.PPN_TASK_LANGUAGE]
            self.log([u"Set language from task: '%s'", task.configuration["language"]])
        except KeyError:
            task.configuration["language"] = parameters[gc.PPN_JOB_LANGUAGE]
            self.log([u"Set language from job: '%s'", task.configuration["language"]])
        custom_id = task_info[0]
        task.configuration["custom_id"] = custom_id
        self.log([u"Task custom_id: %s", task.configuration["custom_id"]])
        task.text_file_path = task_info[1]
        self.log([u"Task text file path: %s", task.text_file_path])
        task.audio_file_path = task_info[2]
        self.log([u"Task audio file path: %s", task.audio_file_path])
        task.sync_map_file_path = self._compute_sync_map_file_path(
            sync_map_root_directory,
            job_os_hierarchy_type,
            custom_id,
            task.configuration["o_name"]
        )
        self.log([u"Task sync map file path: %s", task.sync_map_file_path])

        self.log(u"Replacing placeholder in os_file_smil_audio_ref")
        task.configuration["o_smil_audio_ref"] = self._replace_placeholder(
            task.configuration["o_smil_audio_ref"],
            custom_id
        )
        self.log(u"Replacing placeholder in os_file_smil_page_ref")
        task.configuration["o_smil_page_ref"] = self._replace_placeholder(
            task.configuration["o_smil_page_ref"],
            custom_id
        )
        self.log(u"Returning task")
        return task
Esempio n. 4
0
 def test_set_text_file_path_absolute_05(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.is_text_file_format = TextFileFormat.PARSED
     task.text_file_path_absolute = get_abs_path("res/inputtext/sonnet_parsed.txt")
     self.assertNotEqual(task.text_file, None)
     self.assertEqual(len(task.text_file), 15)
Esempio n. 5
0
 def test_set_audio_file_path_absolute(self):
     task = Task()
     task.audio_file_path_absolute = gf.absolute_path(
         "res/container/job/assets/p001.mp3", __file__)
     self.assertIsNotNone(task.audio_file)
     self.assertEqual(task.audio_file.file_size, 426735)
     self.assertAlmostEqual(task.audio_file.audio_length,
                            TimeValue("53.3"),
                            places=1)
Esempio n. 6
0
 def test_output_sync_map_03(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.os_file_format = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     output_path = tempfile.mkdtemp()
     path = task.output_sync_map_file(container_root_path=output_path)
     self.assertEqual(path, None)
     shutil.rmtree(output_path)
Esempio n. 7
0
 def test_set_text_file_path_absolute_03(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.is_text_file_format = TextFileFormat.UNPARSED
     task.configuration.is_text_unparsed_class_regex = "ra"
     task.configuration.is_text_unparsed_id_sort = IDSortingAlgorithm.NUMERIC
     task.text_file_path_absolute = get_abs_path("res/inputtext/sonnet_unparsed_class_id.xhtml")
     self.assertNotEqual(task.text_file, None)
     self.assertEqual(len(task.text_file), 15)
Esempio n. 8
0
def force_align():
    WORK_DIR = os.path.abspath("data")
    conf = "task_language=rus|is_text_type=plain|os_task_file_format=json"

    task = Task(config_string=conf)
    task.audio_file_path_absolute = f"{WORK_DIR}/audio.wav"
    task.text_file_path_absolute = f"{WORK_DIR}/transcript.txt"

    ExecuteTask(task).execute()
    return json.loads(task.sync_map.json_string)["fragments"]
Esempio n. 9
0
 def test_job_add_task(self):
     job = Job()
     self.assertEqual(len(job), 0)
     task1 = Task()
     job.add_task(task1)
     self.assertEqual(len(job), 1)
     task2 = Task()
     job.add_task(task2)
     self.assertEqual(len(job), 2)
     task3 = Task()
     job.add_task(task3)
     self.assertEqual(len(job), 3)
Esempio n. 10
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.os_file_format = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = tempfile.mkstemp(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
     self.assertEqual(path, output_file_path)
     delete_file(handler, output_file_path)
Esempio n. 11
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=smil|os_task_file_name=p001.smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.xhtml"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/p001.smil"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
Esempio n. 12
0
    def _create_task(self, task_info, config_string, sync_map_root_directory,
                     job_os_hierarchy_type):
        """
        Create a task object from

        1. the ``task_info`` found analyzing the container entries, and
        2. the given ``config_string``.

        :param task_info: the task information: ``[prefix, text_path, audio_path]``
        :type  task_info: list of strings
        :param config_string: the configuration string
        :type  config_string: string
        :param sync_map_root_directory: the root directory for the sync map files
        :type  sync_map_root_directory: string (path)
        :param job_os_hierarchy_type: type of job output hierarchy
        :type  job_os_hierarchy_type: :class:`aeneas.hierarchytype.HierarchyType`
        :rtype: :class:`aeneas.task.Task`
        """
        self._log("Converting config string to config dict")
        parameters = gf.config_string_to_dict(config_string)
        self._log("Creating task")
        task = Task(config_string)
        task.configuration.description = "Task %s" % task_info[0]
        self._log(["Task description: %s", task.configuration.description])
        try:
            task.configuration.language = parameters[gc.PPN_TASK_LANGUAGE]
            self._log(
                ["Set language from task: '%s'", task.configuration.language])
        except KeyError:
            task.configuration.language = parameters[gc.PPN_JOB_LANGUAGE]
            self._log(
                ["Set language from job: '%s'", task.configuration.language])
        custom_id = task_info[0]
        task.configuration.custom_id = custom_id
        self._log(["Task custom_id: %s", task.configuration.custom_id])
        task.text_file_path = task_info[1]
        self._log(["Task text file path: %s", task.text_file_path])
        task.audio_file_path = task_info[2]
        self._log(["Task audio file path: %s", task.audio_file_path])
        task.sync_map_file_path = self._compute_sync_map_file_path(
            sync_map_root_directory, job_os_hierarchy_type, custom_id,
            task.configuration.os_file_name)
        self._log(["Task sync map file path: %s", task.sync_map_file_path])

        self._log("Replacing placeholder in os_file_smil_audio_ref")
        task.configuration.os_file_smil_audio_ref = self._replace_placeholder(
            task.configuration.os_file_smil_audio_ref, custom_id)
        self._log("Replacing placeholder in os_file_smil_page_ref")
        task.configuration.os_file_smil_page_ref = self._replace_placeholder(
            task.configuration.os_file_smil_page_ref, custom_id)
        self._log("Returning task")
        return task
Esempio n. 13
0
 def test_output_sync_map_06(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.os_file_format = SyncMapFormat.SMIL
     task.configuration.os_file_smil_page_ref = "Text/page.xhtml"
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = tempfile.mkstemp(suffix=".smil")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertEqual(path, None)
     os.close(handler)
     os.remove(output_file_path)
Esempio n. 14
0
 def execute(self, config_string, audio_path, text_path):
     handler, tmp_path = gf.tmp_file()
     task = Task(config_string)
     task.audio_file_path_absolute = gf.absolute_path(audio_path, __file__)
     task.text_file_path_absolute = gf.absolute_path(text_path, __file__)
     executor = ExecuteTask(task)
     executor.execute()
     task.sync_map_file_path_absolute = tmp_path
     result_path = task.output_sync_map_file()
     self.assertIsNotNone(result_path)
     self.assertEqual(result_path, tmp_path)
     self.assertGreater(len(gf.read_file_bytes(result_path)), 0)
     gf.delete_file(handler, tmp_path)
Esempio n. 15
0
 def set_text_file(self, path, fmt, expected, id_regex=None, class_regex=None, id_sort=None):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.is_text_file_format = fmt
     if id_regex is not None:
         task.configuration.is_text_unparsed_id_regex = id_regex
     if class_regex is not None:
         task.configuration.is_text_unparsed_class_regex = class_regex
     if id_sort is not None:
         task.configuration.is_text_unparsed_id_sort = id_sort
     task.text_file_path_absolute = get_abs_path(path)
     self.assertNotEqual(task.text_file, None)
     self.assertEqual(len(task.text_file), expected)
Esempio n. 16
0
 def test_output_sync_map_02(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.os_file_format = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = tempfile.mkstemp(suffix=".txt")
     task.sync_map_file_path = output_file_path
     output_path = tempfile.mkdtemp()
     path = task.output_sync_map_file(container_root_path=output_path)
     self.assertNotEqual(path, None)
     self.assertEqual(path, os.path.join(output_path, output_file_path))
     os.close(handler)
     os.remove(output_file_path)
     shutil.rmtree(output_path)
Esempio n. 17
0
 def test_job_clear_tasks(self):
     job = Job()
     task1 = Task()
     job.tasks.append(task1)
     self.assertEqual(len(job), 1)
     job.clear_tasks()
     self.assertEqual(len(job), 0)
Esempio n. 18
0
def align_aeneas():

    for PID in config.PIDs:

        audio_wc = os.path.join(PID, config.narrations_subfolder, "*.m4a")
        audio_file_list = glob(audio_wc)

        if len(audio_file_list) == 0:
            audio_wc = os.path.join(PID, config.narrations_subfolder, "*.mp3")
            audio_file_list = glob(audio_wc)

        audio_file_list.sort()

        for a in audio_file_list:
            file_id = os.path.split(a)[1][0:6]
            t = os.path.join(config.aeneas_transcript_dir, file_id + ".txt")
            out_file = os.path.join(config.aeneas_output_dir, file_id + ".sbv")
            print "aligning " + a, t

            if not os.path.isfile(t):
                print t + " not available so will not be processed.  Was it a missed ground truth file?"
                continue

            # create Task object
            config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=sbv"
            task = Task(config_string=config_string)
            task.audio_file_path_absolute = a
            task.text_file_path_absolute = t
            task.sync_map_file_path_absolute = out_file

            # process Task
            ExecuteTask(task).execute()

            # output sync map to file
            task.output_sync_map_file()
Esempio n. 19
0
def align(text_path, audio_path, align_out_path, word_align=True):
    # create Task object
    config_string = u"task_language=hi"
    config_string += "|os_task_file_format=json"
    rconf = None
    if word_align:
        config_string += "|os_task_file_levels=3"
        config_string += "|is_text_type=mplain"
        rconf = RuntimeConfiguration()
        rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True
        rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH_L3] = True
    else:
        config_string += "|is_text_type=plain"

    task = Task(config_string=config_string)
    task.text_file_path_absolute = text_path
    task.audio_file_path_absolute = audio_path
    task.sync_map_file_path_absolute = align_out_path

    # process Task
    ExecuteTask(task, rconf=rconf).execute()

    # output sync map to file
    task.output_sync_map_file()

    # Remove annoying unicode characters
    with open(align_out_path, 'r', encoding='utf8') as f:
        alignment = json.load(f)
    with open(align_out_path, 'w', encoding='utf8') as f:
        json.dump(alignment, f, ensure_ascii=False, indent=2)
Esempio n. 20
0
 def test_task_sync_map_leaves(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     self.assertEqual(len(task.sync_map_leaves()), 3)
Esempio n. 21
0
def chopsounds():
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = inputsound
    task.text_file_path_absolute = inputtext

    # process Task
    ExecuteTask(task).execute()

    # Carve wav file into fragments
    sound = AudioSegment.from_wav(inputsound)
    for fragment in task.sync_map_leaves():
        if fragment.length > 0.0:
            fsound = sound[float(fragment.begin) * 1000:float(fragment.end) *
                           1000]
            fsound.export(outputdir + "/" + fragment.identifier + ".wav",
                          format="wav")
Esempio n. 22
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = gf.tmp_file(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertIsNotNone(path)
     self.assertEqual(path, output_file_path)
     gf.delete_file(handler, output_file_path)
Esempio n. 23
0
 def set_text_file(self,
                   path,
                   fmt,
                   expected,
                   id_regex=None,
                   class_regex=None,
                   id_sort=None):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.is_text_file_format = fmt
     if id_regex is not None:
         task.configuration.is_text_unparsed_id_regex = id_regex
     if class_regex is not None:
         task.configuration.is_text_unparsed_class_regex = class_regex
     if id_sort is not None:
         task.configuration.is_text_unparsed_id_sort = id_sort
     task.text_file_path_absolute = get_abs_path(path)
     self.assertNotEqual(task.text_file, None)
     self.assertEqual(len(task.text_file), expected)
    def align_audio_and_text(self, file_path):
        config = TaskConfiguration()
        config[gc.PPN_TASK_LANGUAGE] = Language.PAN
        config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN
        config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.JSON
        task = Task()
        task.configuration = config

        task.audio_file_path_absolute = self.audio_file_path
        task.text_file_path_absolute = self.txt_file_path
        task.sync_map_file_path_absolute = file_path
        ExecuteTask(task).execute()
        task.output_sync_map_file()
Esempio n. 25
0
 def test_task_empty_on_creation(self):
     task = Task()
     self.assertIsNone(task.configuration)
     self.assertIsNone(task.text_file)
     self.assertIsNone(task.audio_file)
     self.assertIsNone(task.sync_map)
     self.assertIsNone(task.audio_file_path)
     self.assertIsNone(task.audio_file_path_absolute)
     self.assertIsNone(task.text_file_path)
     self.assertIsNone(task.text_file_path_absolute)
     self.assertIsNone(task.sync_map_file_path)
     self.assertIsNone(task.sync_map_file_path_absolute)
Esempio n. 26
0
 def test_task_empty_on_creation(self):
     task = Task()
     self.assertEqual(task.configuration, None)
     self.assertEqual(task.text_file, None)
     self.assertEqual(task.audio_file, None)
     self.assertEqual(task.sync_map, None)
     self.assertEqual(task.audio_file_path, None)
     self.assertEqual(task.audio_file_path_absolute, None)
     self.assertEqual(task.text_file_path, None)
     self.assertEqual(task.text_file_path_absolute, None)
     self.assertEqual(task.sync_map_file_path, None)
     self.assertEqual(task.sync_map_file_path_absolute, None)
Esempio n. 27
0
def force_align(audio_path,
                text_path,
                output_path,
                min_length=1.0,
                max_length=10.0,
                logging=logging):
    sentences = []
    task = Task(
        config_string=
        u"task_language=eng|is_text_type=plain|os_task_file_format=json")
    task.audio_file_path_absolute = audio_path
    task.text_file_path_absolute = text_path
    task.sync_map_file_path_absolute = output_path
    logging.info("Aligning audio and text...")
    ExecuteTask(task).execute()
    logging.info("Aligned audio and text")

    for fragment in task.sync_map_leaves():
        if fragment.length > min_length and fragment.length < max_length and fragment.text:
            sentences.append({
                "start": float(fragment.begin),
                "end": float(fragment.end),
                "length": float(fragment.length),
                "text": fragment.text,
            })

    with open(output_path, "w") as f:
        json.dump(sentences, f, indent=4)
Esempio n. 28
0
def align_files_in_place(data: InputDataFiles):
    try:
        # prepare config
        aeneas_config = TaskConfiguration()
        aeneas_config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = data.text_file_format
        aeneas_config[gc.PPN_TASK_LANGUAGE] = data.language

        # create task
        task = Task()
        task.configuration = aeneas_config
        task.audio_file_path_absolute = Path(data.audio_filename)
        task.text_file_path_absolute = Path(data.transcript_filename)

        # process Task
        ExecuteTask(task).execute()

        with open(data.alignment_filename, "w") as f:
            f.write(
                orjson.dumps([(str(fragment.begin), str(fragment.end),
                               fragment.text)
                              for fragment in task.sync_map_leaves()
                              if fragment.is_regular]).decode())
    except Exception as e:
        raise HTTPException(status_code=500,
                            detail="Error during processing: " + str(e)) from e
Esempio n. 29
0
 def set_text_file(self, path, fmt, expected, id_regex=None, class_regex=None, id_sort=None):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["i_t_format"] = fmt
     if class_regex is not None:
         task.configuration["i_t_unparsed_class_regex"] = class_regex
     if id_regex is not None:
         task.configuration["i_t_unparsed_id_regex"] = id_regex
     if id_sort is not None:
         task.configuration["i_t_unparsed_id_sort"] = id_sort
     task.text_file_path_absolute = gf.absolute_path(path, __file__)
     self.assertIsNotNone(task.text_file)
     self.assertEqual(len(task.text_file), expected)
Esempio n. 30
0
def align_audio(
        language: LanguageEnum = Form(...),
        text_file_format: TextFileFormatEnum = Form(...),
        transcript: UploadFile = File(...),
        audio: UploadFile = File(...),
):
    try:
        # prepare config
        aeneas_config = TaskConfiguration()
        aeneas_config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = text_file_format
        aeneas_config[gc.PPN_TASK_LANGUAGE] = language

        # get named temporary files
        tmp_audio = convert_to_tempfile(audio)
        tmp_transcript = convert_to_tempfile(transcript)

        # create task
        task = Task()
        task.configuration = aeneas_config
        task.audio_file_path_absolute = Path(tmp_audio.name)
        task.text_file_path_absolute = Path(tmp_transcript.name)

        # process Task
        ExecuteTask(task).execute()

        tmp_audio.close()
        tmp_transcript.close()

        return [(str(fragment.begin), str(fragment.end), fragment.text)
                for fragment in task.sync_map_leaves() if fragment.is_regular]
    except Exception as e:
        raise HTTPException(status_code=500,
                            detail="Error during processing: " + str(e)) from e
Esempio n. 31
0
 def test_task_sync_map_leaves(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     self.assertEqual(len(task.sync_map_leaves()), 3)
Esempio n. 32
0
def createSyncedLyricsFile(lyrics, file):
    global lyricsSynced, errors
    f = open("tempSync.txt", "w+")
    f.write(lyrics)
    f.close()
    config = TaskConfiguration()
    config[gc.PPN_TASK_LANGUAGE] = Language.FRA
    config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN
    config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.AUDH
    task = Task()
    task.configuration = config
    try:
        task.audio_file_path_absolute = file
        task.text_file_path_absolute = "tempSync.txt"
        ExecuteTask(task).execute()
        syncedLyricsFile = open(file[:-4] + ".lrc", "w+")
        for fragment in task.sync_map_leaves():
            syncedLyricsFile.write(
                str('[' + gf.time_to_hhmmssmmm(fragment.interval.begin, '.')[3:-1] + ']' + fragment.text + '\n'))
        syncedLyricsFile.close()
        print("   Sync Added", sep=' ', end='', flush=True)
        lyricsSynced += 1
    except Exception as e :
        errors += 1
        print("   Sync error", sep=' ', end='',flush=True)
Esempio n. 33
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = gf.tmp_file(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertIsNotNone(path)
     self.assertEqual(path, output_file_path)
     gf.delete_file(handler, output_file_path)
 def transcribe(self, audio, transcript, output):
     config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
     task = Task(config_string=config_string)
     task.audio_file_path_absolute = audio  #get_path(self.audio_folder, self.audio_prefix, filename, "mp3")
     task.text_file_path_absolute = transcript  #get_path(self.transcript_processed_folder, self.transcript_processed_prefix, filename, "txt")
     task.sync_map_file_path_absolute = output  #get_path(self.output_folder, self.output_prefix, filename, "json")
     print("Processing task...\n")
     # process Task
     ExecuteTask(task).execute()
     print(f"Taks processed. Writing output to {output}")
     # output sync map to file
     task.output_sync_map_file()
Esempio n. 35
0
 def set_text_file(self,
                   path,
                   fmt,
                   expected,
                   id_regex=None,
                   class_regex=None,
                   id_sort=None):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["i_t_format"] = fmt
     if class_regex is not None:
         task.configuration["i_t_unparsed_class_regex"] = class_regex
     if id_regex is not None:
         task.configuration["i_t_unparsed_id_regex"] = id_regex
     if id_sort is not None:
         task.configuration["i_t_unparsed_id_sort"] = id_sort
     task.text_file_path_absolute = gf.absolute_path(path, __file__)
     self.assertIsNotNone(task.text_file)
     self.assertEqual(len(task.text_file), expected)
Esempio n. 36
0
def process_aeneas(txt_filename, wav_filename, csv_filename):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=csv"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = wav_filename
    task.text_file_path_absolute = txt_filename
    task.sync_map_file_path_absolute = csv_filename

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Esempio n. 37
0
def process_aeneas_map(filepath_, format_):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = filepath_ + '.' + format_
    task.text_file_path_absolute = filepath_ + ".txt"
    task.sync_map_file_path_absolute = filepath_ + ".json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Esempio n. 38
0
def run_aeneas_task(audio_filepath, lyrics_filepath, output_filepath):
    config_string = "task_language=eng|os_task_file_format=json|is_text_type=plain"

    task = Task(config_string=config_string)

    task.audio_file_path_absolute = audio_filepath
    task.text_file_path_absolute = lyrics_filepath
    task.sync_map_file_path_absolute = output_filepath

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Esempio n. 39
0
def align(audio_file_path, text_file_path, syncmap_file_path):
    # create Task object
    config_string = "task_language=zh|is_text_type=plain|os_task_file_format=txt"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = audio_file_path
    task.text_file_path_absolute = text_file_path
    task.sync_map_file_path_absolute = syncmap_file_path

    # process Task
    ExecuteTask(task).execute()
    # output sync map to file
    task.output_sync_map_file()

    return True
Esempio n. 40
0
def executeAeneas(text_path, audio_path):
    audio_name, _ = os.path.splitext(audio_path)

    # create Task object
    config_string = u'task_language=tur|is_text_type=plain|os_task_file_format=json'
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = audio_path
    task.text_file_path_absolute = text_path
    task.sync_map_file_path_absolute = audio_name + "_syncmap.json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
def sync_map_generator(song_name):
# create Task object
    print('Creating sync map file...')
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Songs/" + song_name +".wav"
    task.text_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Lyrics/" + song_name +".txt"
    task.sync_map_file_path_absolute = u"output/"+ song_name + ".json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
    print('Created sync map file')
Esempio n. 42
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=txt|os_task_file_name=output_head.txt|is_text_type=plain|is_audio_file_head_length=11.960|is_audio_file_process_length=31.640"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/inputtext/sonnet_plain_head_length.txt"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/output_head_length.txt"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
Esempio n. 43
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=smil|os_task_file_name=p001.smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.xhtml"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/p001.smil"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
Esempio n. 44
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.os_file_format = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = tempfile.mkstemp(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
     self.assertEqual(path, output_file_path)
     delete_file(handler, output_file_path)
Esempio n. 45
0
def get_alignments():
    config = u'task_language=eng|is_text_type=plain|os_task_file_format=json'

    for address, youtube_id in text_audio_pairs.iteritems():

        # download youtube video
        print D.audio_from_youtube(
            'https://youtu.be/' + youtube_id,
            output_file_path=u'alignment/audio/' + address + '.webm',
            preferred_format=u'webm'
            )

        # designate text, audio, and syncmap files
        text = os.path.abspath(u'alignment/text/' + address + '.txt')
        audio = os.path.abspath(u'alignment/audio/' + address + '.webm')
        syncmap = os.path.abspath(u'alignment/syncmaps/' + address + '.json')

        # align text to audio
        task = Task(config_string=config)
        task.text_file_path_absolute = text
        task.audio_file_path_absolute = audio
        task.sync_map_file_path_absolute = syncmap
        ExecuteTask(task).execute()
        task.output_sync_map_file()
Esempio n. 46
0
 def test_set_configuration(self):
     task = Task()
     taskconf = TaskConfiguration()
     task.configuration = taskconf
     self.assertNotEqual(task.configuration, None)
Esempio n. 47
0
 def test_set_audio_file_path_absolute_01(self):
     task = Task()
     task.audio_file_path_absolute = get_abs_path("res/container/job/assets/p001.mp3")
     self.assertNotEqual(task.audio_file, None)
     self.assertEqual(task.audio_file.file_size, 426735)
     self.assertEqual(int(task.audio_file.audio_length), 53)
Esempio n. 48
0
 def test_set_audio_file_path_absolute_02(self):
     task = Task()
     with self.assertRaises(OSError):
         task.audio_file_path_absolute = get_abs_path("not/existing.mp3")
Esempio n. 49
0
 def test_set_audio_file_path_absolute_error(self):
     task = Task()
     with self.assertRaises(OSError):
         task.audio_file_path_absolute = gf.absolute_path("not_existing.mp3", __file__)
Esempio n. 50
0
 def test_set_audio_file_path_absolute(self):
     task = Task()
     task.audio_file_path_absolute = gf.absolute_path("res/container/job/assets/p001.mp3", __file__)
     self.assertIsNotNone(task.audio_file)
     self.assertEqual(task.audio_file.file_size, 426735)
     self.assertAlmostEqual(task.audio_file.audio_length, TimeValue("53.3"), places=1)
Esempio n. 51
0
 def test_task_sync_map_leaves_empty(self):
     task = Task()
     self.assertEqual(len(task.sync_map_leaves()), 0)
Esempio n. 52
0
 def test_task_set_configuration(self):
     task = Task()
     taskconf = TaskConfiguration()
     task.configuration = taskconf
     self.assertIsNotNone(task.configuration)
Esempio n. 53
0
 def test_set_audio_file_path_absolute(self):
     task = Task()
     task.audio_file_path_absolute = get_abs_path("res/container/job/assets/p001.mp3")
     self.assertNotEqual(task.audio_file, None)
     self.assertEqual(task.audio_file.file_size, 426735)
     self.assertAlmostEqual(task.audio_file.audio_length, 53.3, places=1)
Esempio n. 54
0
    def perform_command(self):
        """
        Perform command and return the appropriate exit code.

        :rtype: int
        """
        if len(self.actual_arguments) < 1:
            return self.print_help()

        if self.has_option([u"-e", u"--examples"]):
            return self.print_examples(False)

        if self.has_option(u"--examples-all"):
            return self.print_examples(True)

        if self.has_option([u"--list-parameters"]):
            return self.print_parameters()

        parameter = self.has_option_with_value(u"--list-values")
        if parameter is not None:
            return self.print_values(parameter)
        elif self.has_option(u"--list-values"):
            return self.print_values(u"?")

        # NOTE list() is needed for Python3, where keys() is not a list!
        demo = self.has_option(list(self.DEMOS.keys()))
        demo_parameters = u""
        download_from_youtube = self.has_option([u"-y", u"--youtube"])
        largest_audio = self.has_option(u"--largest-audio")
        keep_audio = self.has_option(u"--keep-audio")
        output_html = self.has_option(u"--output-html")
        validate = not self.has_option(u"--skip-validator")
        print_faster_rate = self.has_option(u"--faster-rate")
        print_rates = self.has_option(u"--rates")
        print_zero = self.has_option(u"--zero")

        if demo:
            validate = False
            for key in self.DEMOS:
                if self.has_option(key):
                    demo_parameters = self.DEMOS[key]
                    audio_file_path = demo_parameters[u"audio"]
                    text_file_path = demo_parameters[u"text"]
                    config_string = demo_parameters[u"config"]
                    sync_map_file_path = demo_parameters[u"syncmap"]
                    # TODO allow injecting rconf options directly from DEMOS options field
                    if key == u"--example-cewsubprocess":
                        self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = True
                    elif key == u"--example-ctw-espeak":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_ESPEAK
                    elif key == u"--example-ctw-speect":
                        self.rconf[RuntimeConfiguration.TTS] = "custom"
                        self.rconf[RuntimeConfiguration.TTS_PATH] = self.CTW_SPEECT
                    elif key == u"--example-festival":
                        self.rconf[RuntimeConfiguration.TTS] = "festival"
                        self.rconf[RuntimeConfiguration.TTS_PATH] = "text2wave"
                    elif key == u"--example-mws":
                        self.rconf[RuntimeConfiguration.MFCC_WINDOW_LENGTH] = "1.500"
                        self.rconf[RuntimeConfiguration.MFCC_WINDOW_SHIFT] = "0.500"
                    elif key == u"--example-faster-rate":
                        print_faster_rate = True
                    elif key == u"--example-no-zero":
                        print_zero = True
                    elif key == u"--example-py":
                        self.rconf[RuntimeConfiguration.C_EXTENSIONS] = False
                    elif key == u"--example-rates":
                        print_rates = True
                    elif key == u"--example-youtube":
                        download_from_youtube = True
                    break
        else:
            if len(self.actual_arguments) < 4:
                return self.print_help()
            audio_file_path = self.actual_arguments[0]
            text_file_path = self.actual_arguments[1]
            config_string = self.actual_arguments[2]
            sync_map_file_path = self.actual_arguments[3]

        html_file_path = None
        if output_html:
            keep_audio = True
            html_file_path = sync_map_file_path + u".html"

        if download_from_youtube:
            youtube_url = audio_file_path

        if (not download_from_youtube) and (not self.check_input_file(audio_file_path)):
            return self.ERROR_EXIT_CODE
        if not self.check_input_file(text_file_path):
            return self.ERROR_EXIT_CODE
        if not self.check_output_file(sync_map_file_path):
            return self.ERROR_EXIT_CODE
        if (html_file_path is not None) and (not self.check_output_file(html_file_path)):
            return self.ERROR_EXIT_CODE

        self.check_c_extensions()

        if demo:
            msg = []
            msg.append(u"Running example task with arguments:")
            if download_from_youtube:
                msg.append(u"  YouTube URL:   %s" % youtube_url)
            else:
                msg.append(u"  Audio file:    %s" % audio_file_path)
            msg.append(u"  Text file:     %s" % text_file_path)
            msg.append(u"  Config string: %s" % config_string)
            msg.append(u"  Sync map file: %s" % sync_map_file_path)
            if len(demo_parameters[u"options"]) > 0:
                msg.append(u"  Options:       %s" % demo_parameters[u"options"])
            self.print_info(u"\n".join(msg))

        if validate:
            self.print_info(u"Validating config string (specify --skip-validator to bypass)...")
            validator = Validator(logger=self.logger)
            result = validator.check_configuration_string(config_string, is_job=False, external_name=True)
            if not result.passed:
                self.print_error(u"The given config string is not valid:")
                self.print_generic(result.pretty_print())
                return self.ERROR_EXIT_CODE
            self.print_info(u"Validating config string... done")

        if download_from_youtube:
            try:
                self.print_info(u"Downloading audio from '%s' ..." % youtube_url)
                downloader = Downloader(logger=self.logger)
                audio_file_path = downloader.audio_from_youtube(
                    youtube_url,
                    download=True,
                    output_file_path=None,
                    largest_audio=largest_audio
                )
                self.print_info(u"Downloading audio from '%s' ... done" % youtube_url)
            except ImportError:
                self.print_no_pafy_error()
                return self.ERROR_EXIT_CODE
            except Exception as exc:
                self.print_error(u"An unexpected error occurred while downloading audio from YouTube:")
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Creating task...")
            task = Task(config_string, logger=self.logger)
            task.audio_file_path_absolute = audio_file_path
            task.text_file_path_absolute = text_file_path
            task.sync_map_file_path_absolute = sync_map_file_path
            self.print_info(u"Creating task... done")
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while creating the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Executing task...")
            executor = ExecuteTask(task=task, rconf=self.rconf, logger=self.logger)
            executor.execute()
            self.print_info(u"Executing task... done")
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while executing the task:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        try:
            self.print_info(u"Creating output sync map file...")
            path = task.output_sync_map_file()
            self.print_info(u"Creating output sync map file... done")
            self.print_success(u"Created file '%s'" % path)
        except Exception as exc:
            self.print_error(u"An unexpected error occurred while writing the sync map file:")
            self.print_error(u"%s" % exc)
            return self.ERROR_EXIT_CODE

        if output_html:
            try:
                parameters = {}
                parameters[gc.PPN_TASK_OS_FILE_FORMAT] = task.configuration["o_format"]
                parameters[gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF] = task.configuration["o_smil_audio_ref"]
                parameters[gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF] = task.configuration["o_smil_page_ref"]
                self.print_info(u"Creating output HTML file...")
                task.sync_map.output_html_for_tuning(audio_file_path, html_file_path, parameters)
                self.print_info(u"Creating output HTML file... done")
                self.print_success(u"Created file '%s'" % html_file_path)
            except Exception as exc:
                self.print_error(u"An unexpected error occurred while writing the HTML file:")
                self.print_error(u"%s" % exc)
                return self.ERROR_EXIT_CODE

        if download_from_youtube:
            if keep_audio:
                self.print_info(u"Option --keep-audio set: keeping downloaded file '%s'" % audio_file_path)
            else:
                gf.delete_file(None, audio_file_path)

        if print_zero:
            zero_duration = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.begin == l.end]
            if len(zero_duration) > 0:
                self.print_warning(u"Fragments with zero duration:")
                for fragment in zero_duration:
                    self.print_generic(u"  %s" % fragment)

        if print_rates:
            self.print_info(u"Fragments with rates:")
            for fragment in task.sync_map.fragments_tree.vleaves_not_empty:
                self.print_generic(u"  %s (rate: %.3f chars/s)" % (fragment, fragment.rate))

        if print_faster_rate:
            max_rate = task.configuration["aba_rate_value"]
            if max_rate is not None:
                faster = [l for l in task.sync_map.fragments_tree.vleaves_not_empty if l.rate >= max_rate + Decimal("0.001")]
                if len(faster) > 0:
                    self.print_warning(u"Fragments with rate greater than %.3f:" % max_rate)
                    for fragment in faster:
                        self.print_generic(u"  %s (rate: %.3f chars/s)" % (fragment, fragment.rate))

        return self.NO_ERROR_EXIT_CODE