Esempio n. 1
0
 def test_job_add_task(self):
     job = Job()
     self.assertEqual(len(job), 0)
     task1 = Task()
     job.add_task(task1)
     self.assertEqual(len(job), 1)
     task2 = Task()
     job.add_task(task2)
     self.assertEqual(len(job), 2)
     task3 = Task()
     job.add_task(task3)
     self.assertEqual(len(job), 3)
def total_FA(soundfile, mylines, myhead, mytail, config=None):
    """Runs Aeneas as a library. This function isn't in use, currently,
    as we haven't managed to get reliable results in this way."""
    # create Task object
    if config is None:
        config_string = (
            u"task_language=nor|is_text_type=plain|os_task_file_format=json|is_audio_file_head_length=%s|is_audio_file_tail_length=%s"
            % (myhead, mytail))
        print(config_string)
    else:
        config_string = (
            u"task_language=nor|is_text_type=plain|os_task_file_format=json|is_audio_file_head_length=%s|is_audio_file_tail_length=%s|%s"
            % (myhead, mytail, config))
        print(config_string)
    task = Task(config_string=config_string)
    print(task)
    task.audio_file_path_absolute = soundfile
    textfile = TextFile()
    print(textfile)
    #task.sync_map_file_path_absolute = outfile
    for identifier, frag_text in mylines:
        textfile.add_fragment(
            TextFragment(identifier, Language.NOR, frag_text, frag_text))
    task.text_file = textfile
    print(len(task.text_file))
    ExecuteTask(task).execute()
    syncmaplist = task.sync_map.fragments
    return syncmaplist
Esempio n. 3
0
 def test_job_clear_tasks(self):
     job = Job()
     task1 = Task()
     job.tasks.append(task1)
     self.assertEqual(len(job), 1)
     job.clear_tasks()
     self.assertEqual(len(job), 0)
Esempio n. 4
0
def force_align(audio_path,
                text_path,
                output_path,
                min_length=1.0,
                max_length=10.0,
                logging=logging):
    sentences = []
    task = Task(
        config_string=
        u"task_language=eng|is_text_type=plain|os_task_file_format=json")
    task.audio_file_path_absolute = audio_path
    task.text_file_path_absolute = text_path
    task.sync_map_file_path_absolute = output_path
    logging.info("Aligning audio and text...")
    ExecuteTask(task).execute()
    logging.info("Aligned audio and text")

    for fragment in task.sync_map_leaves():
        if fragment.length > min_length and fragment.length < max_length and fragment.text:
            sentences.append({
                "start": float(fragment.begin),
                "end": float(fragment.end),
                "length": float(fragment.length),
                "text": fragment.text,
            })

    with open(output_path, "w") as f:
        json.dump(sentences, f, indent=4)
Esempio n. 5
0
def align_audio(
        language: LanguageEnum = Form(...),
        text_file_format: TextFileFormatEnum = Form(...),
        transcript: UploadFile = File(...),
        audio: UploadFile = File(...),
):
    try:
        # prepare config
        aeneas_config = TaskConfiguration()
        aeneas_config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = text_file_format
        aeneas_config[gc.PPN_TASK_LANGUAGE] = language

        # get named temporary files
        tmp_audio = convert_to_tempfile(audio)
        tmp_transcript = convert_to_tempfile(transcript)

        # create task
        task = Task()
        task.configuration = aeneas_config
        task.audio_file_path_absolute = Path(tmp_audio.name)
        task.text_file_path_absolute = Path(tmp_transcript.name)

        # process Task
        ExecuteTask(task).execute()

        tmp_audio.close()
        tmp_transcript.close()

        return [(str(fragment.begin), str(fragment.end), fragment.text)
                for fragment in task.sync_map_leaves() if fragment.is_regular]
    except Exception as e:
        raise HTTPException(status_code=500,
                            detail="Error during processing: " + str(e)) from e
Esempio n. 6
0
def align(text_path, audio_path, align_out_path, word_align=True):
    # create Task object
    config_string = u"task_language=hi"
    config_string += "|os_task_file_format=json"
    rconf = None
    if word_align:
        config_string += "|os_task_file_levels=3"
        config_string += "|is_text_type=mplain"
        rconf = RuntimeConfiguration()
        rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH] = True
        rconf[RuntimeConfiguration.MFCC_MASK_NONSPEECH_L3] = True
    else:
        config_string += "|is_text_type=plain"

    task = Task(config_string=config_string)
    task.text_file_path_absolute = text_path
    task.audio_file_path_absolute = audio_path
    task.sync_map_file_path_absolute = align_out_path

    # process Task
    ExecuteTask(task, rconf=rconf).execute()

    # output sync map to file
    task.output_sync_map_file()

    # Remove annoying unicode characters
    with open(align_out_path, 'r', encoding='utf8') as f:
        alignment = json.load(f)
    with open(align_out_path, 'w', encoding='utf8') as f:
        json.dump(alignment, f, ensure_ascii=False, indent=2)
Esempio n. 7
0
 def test_task_sync_map_leaves(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     self.assertEqual(len(task.sync_map_leaves()), 3)
Esempio n. 8
0
def align_files_in_place(data: InputDataFiles):
    try:
        # prepare config
        aeneas_config = TaskConfiguration()
        aeneas_config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = data.text_file_format
        aeneas_config[gc.PPN_TASK_LANGUAGE] = data.language

        # create task
        task = Task()
        task.configuration = aeneas_config
        task.audio_file_path_absolute = Path(data.audio_filename)
        task.text_file_path_absolute = Path(data.transcript_filename)

        # process Task
        ExecuteTask(task).execute()

        with open(data.alignment_filename, "w") as f:
            f.write(
                orjson.dumps([(str(fragment.begin), str(fragment.end),
                               fragment.text)
                              for fragment in task.sync_map_leaves()
                              if fragment.is_regular]).decode())
    except Exception as e:
        raise HTTPException(status_code=500,
                            detail="Error during processing: " + str(e)) from e
Esempio n. 9
0
def createSyncedLyricsFile(lyrics, file):
    global lyricsSynced, errors
    f = open("tempSync.txt", "w+")
    f.write(lyrics)
    f.close()
    config = TaskConfiguration()
    config[gc.PPN_TASK_LANGUAGE] = Language.FRA
    config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN
    config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.AUDH
    task = Task()
    task.configuration = config
    try:
        task.audio_file_path_absolute = file
        task.text_file_path_absolute = "tempSync.txt"
        ExecuteTask(task).execute()
        syncedLyricsFile = open(file[:-4] + ".lrc", "w+")
        for fragment in task.sync_map_leaves():
            syncedLyricsFile.write(
                str('[' + gf.time_to_hhmmssmmm(fragment.interval.begin, '.')[3:-1] + ']' + fragment.text + '\n'))
        syncedLyricsFile.close()
        print("   Sync Added", sep=' ', end='', flush=True)
        lyricsSynced += 1
    except Exception as e :
        errors += 1
        print("   Sync error", sep=' ', end='',flush=True)
Esempio n. 10
0
def align_aeneas():

    for PID in config.PIDs:

        audio_wc = os.path.join(PID, config.narrations_subfolder, "*.m4a")
        audio_file_list = glob(audio_wc)

        if len(audio_file_list) == 0:
            audio_wc = os.path.join(PID, config.narrations_subfolder, "*.mp3")
            audio_file_list = glob(audio_wc)

        audio_file_list.sort()

        for a in audio_file_list:
            file_id = os.path.split(a)[1][0:6]
            t = os.path.join(config.aeneas_transcript_dir, file_id + ".txt")
            out_file = os.path.join(config.aeneas_output_dir, file_id + ".sbv")
            print "aligning " + a, t

            if not os.path.isfile(t):
                print t + " not available so will not be processed.  Was it a missed ground truth file?"
                continue

            # create Task object
            config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=sbv"
            task = Task(config_string=config_string)
            task.audio_file_path_absolute = a
            task.text_file_path_absolute = t
            task.sync_map_file_path_absolute = out_file

            # process Task
            ExecuteTask(task).execute()

            # output sync map to file
            task.output_sync_map_file()
Esempio n. 11
0
 def test_set_audio_file_path_absolute(self):
     task = Task()
     task.audio_file_path_absolute = get_abs_path(
         "res/container/job/assets/p001.mp3")
     self.assertNotEqual(task.audio_file, None)
     self.assertEqual(task.audio_file.file_size, 426735)
     self.assertAlmostEqual(task.audio_file.audio_length, 53.3, places=1)
Esempio n. 12
0
 def test_set_audio_file_path_absolute(self):
     task = Task()
     task.audio_file_path_absolute = gf.absolute_path(
         "res/container/job/assets/p001.mp3", __file__)
     self.assertIsNotNone(task.audio_file)
     self.assertEqual(task.audio_file.file_size, 426735)
     self.assertAlmostEqual(task.audio_file.audio_length,
                            TimeValue("53.3"),
                            places=1)
Esempio n. 13
0
def force_align():
    WORK_DIR = os.path.abspath("data")
    conf = "task_language=rus|is_text_type=plain|os_task_file_format=json"

    task = Task(config_string=conf)
    task.audio_file_path_absolute = f"{WORK_DIR}/audio.wav"
    task.text_file_path_absolute = f"{WORK_DIR}/transcript.txt"

    ExecuteTask(task).execute()
    return json.loads(task.sync_map.json_string)["fragments"]
Esempio n. 14
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration.language = Language.EN
     task.configuration.os_file_format = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = tempfile.mkstemp(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
     self.assertEqual(path, output_file_path)
     delete_file(handler, output_file_path)
Esempio n. 15
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=smil|os_task_file_name=p001.smil|os_task_file_smil_audio_ref=p001.mp3|os_task_file_smil_page_ref=p001.xhtml|is_text_type=unparsed|is_text_unparsed_id_regex=f[0-9]+|is_text_unparsed_id_sort=numeric"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.xhtml"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/p001.smil"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
 def transcribe(self, audio, transcript, output):
     config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
     task = Task(config_string=config_string)
     task.audio_file_path_absolute = audio  #get_path(self.audio_folder, self.audio_prefix, filename, "mp3")
     task.text_file_path_absolute = transcript  #get_path(self.transcript_processed_folder, self.transcript_processed_prefix, filename, "txt")
     task.sync_map_file_path_absolute = output  #get_path(self.output_folder, self.output_prefix, filename, "json")
     print("Processing task...\n")
     # process Task
     ExecuteTask(task).execute()
     print(f"Taks processed. Writing output to {output}")
     # output sync map to file
     task.output_sync_map_file()
Esempio n. 17
0
 def test_execute(self):
     config_string = "task_language=en|os_task_file_format=txt|os_task_file_name=output_head.txt|is_text_type=plain|is_audio_file_head_length=11.960|is_audio_file_process_length=31.640"
     task = Task(config_string)
     task.audio_file_path_absolute = "../aeneas/tests/res/container/job/assets/p001.mp3"
     task.text_file_path_absolute = "../aeneas/tests/res/inputtext/sonnet_plain_head_length.txt"
     logger = Logger(tee=True)
     executor = ExecuteTask(task, logger=logger)
     result = executor.execute()
     self.assertTrue(result)
     task.sync_map_file_path_absolute = "/tmp/output_head_length.txt"
     path = task.output_sync_map_file()
     self.assertNotEqual(path, None)
Esempio n. 18
0
 def test_output_sync_map(self):
     task = Task()
     task.configuration = TaskConfiguration()
     task.configuration["language"] = Language.ENG
     task.configuration["o_format"] = SyncMapFormat.TXT
     task.sync_map = self.dummy_sync_map()
     handler, output_file_path = gf.tmp_file(suffix=".txt")
     task.sync_map_file_path_absolute = output_file_path
     path = task.output_sync_map_file()
     self.assertIsNotNone(path)
     self.assertEqual(path, output_file_path)
     gf.delete_file(handler, output_file_path)
Esempio n. 19
0
 def test_task_empty_on_creation(self):
     task = Task()
     self.assertEqual(task.configuration, None)
     self.assertEqual(task.text_file, None)
     self.assertEqual(task.audio_file, None)
     self.assertEqual(task.sync_map, None)
     self.assertEqual(task.audio_file_path, None)
     self.assertEqual(task.audio_file_path_absolute, None)
     self.assertEqual(task.text_file_path, None)
     self.assertEqual(task.text_file_path_absolute, None)
     self.assertEqual(task.sync_map_file_path, None)
     self.assertEqual(task.sync_map_file_path_absolute, None)
Esempio n. 20
0
 def test_task_empty_on_creation(self):
     task = Task()
     self.assertIsNone(task.configuration)
     self.assertIsNone(task.text_file)
     self.assertIsNone(task.audio_file)
     self.assertIsNone(task.sync_map)
     self.assertIsNone(task.audio_file_path)
     self.assertIsNone(task.audio_file_path_absolute)
     self.assertIsNone(task.text_file_path)
     self.assertIsNone(task.text_file_path_absolute)
     self.assertIsNone(task.sync_map_file_path)
     self.assertIsNone(task.sync_map_file_path_absolute)
Esempio n. 21
0
    def _create_task(self, task_info, config_string, sync_map_root_directory,
                     job_os_hierarchy_type):
        """
        Create a task object from

        1. the ``task_info`` found analyzing the container entries, and
        2. the given ``config_string``.

        :param task_info: the task information: ``[prefix, text_path, audio_path]``
        :type  task_info: list of strings
        :param config_string: the configuration string
        :type  config_string: string
        :param sync_map_root_directory: the root directory for the sync map files
        :type  sync_map_root_directory: string (path)
        :param job_os_hierarchy_type: type of job output hierarchy
        :type  job_os_hierarchy_type: :class:`aeneas.hierarchytype.HierarchyType`
        :rtype: :class:`aeneas.task.Task`
        """
        self._log("Converting config string to config dict")
        parameters = gf.config_string_to_dict(config_string)
        self._log("Creating task")
        task = Task(config_string)
        task.configuration.description = "Task %s" % task_info[0]
        self._log(["Task description: %s", task.configuration.description])
        try:
            task.configuration.language = parameters[gc.PPN_TASK_LANGUAGE]
            self._log(
                ["Set language from task: '%s'", task.configuration.language])
        except KeyError:
            task.configuration.language = parameters[gc.PPN_JOB_LANGUAGE]
            self._log(
                ["Set language from job: '%s'", task.configuration.language])
        custom_id = task_info[0]
        task.configuration.custom_id = custom_id
        self._log(["Task custom_id: %s", task.configuration.custom_id])
        task.text_file_path = task_info[1]
        self._log(["Task text file path: %s", task.text_file_path])
        task.audio_file_path = task_info[2]
        self._log(["Task audio file path: %s", task.audio_file_path])
        task.sync_map_file_path = self._compute_sync_map_file_path(
            sync_map_root_directory, job_os_hierarchy_type, custom_id,
            task.configuration.os_file_name)
        self._log(["Task sync map file path: %s", task.sync_map_file_path])

        self._log("Replacing placeholder in os_file_smil_audio_ref")
        task.configuration.os_file_smil_audio_ref = self._replace_placeholder(
            task.configuration.os_file_smil_audio_ref, custom_id)
        self._log("Replacing placeholder in os_file_smil_page_ref")
        task.configuration.os_file_smil_page_ref = self._replace_placeholder(
            task.configuration.os_file_smil_page_ref, custom_id)
        self._log("Returning task")
        return task
Esempio n. 22
0
def process_aeneas(txt_filename, wav_filename, csv_filename):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=csv"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = wav_filename
    task.text_file_path_absolute = txt_filename
    task.sync_map_file_path_absolute = csv_filename

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Esempio n. 23
0
 def execute(self, config_string, audio_path, text_path):
     handler, tmp_path = gf.tmp_file()
     task = Task(config_string)
     task.audio_file_path_absolute = gf.absolute_path(audio_path, __file__)
     task.text_file_path_absolute = gf.absolute_path(text_path, __file__)
     executor = ExecuteTask(task)
     executor.execute()
     task.sync_map_file_path_absolute = tmp_path
     result_path = task.output_sync_map_file()
     self.assertIsNotNone(result_path)
     self.assertEqual(result_path, tmp_path)
     self.assertGreater(len(gf.read_file_bytes(result_path)), 0)
     gf.delete_file(handler, tmp_path)
    def align_audio_and_text(self, file_path):
        config = TaskConfiguration()
        config[gc.PPN_TASK_LANGUAGE] = Language.PAN
        config[gc.PPN_TASK_IS_TEXT_FILE_FORMAT] = TextFileFormat.PLAIN
        config[gc.PPN_TASK_OS_FILE_FORMAT] = SyncMapFormat.JSON
        task = Task()
        task.configuration = config

        task.audio_file_path_absolute = self.audio_file_path
        task.text_file_path_absolute = self.txt_file_path
        task.sync_map_file_path_absolute = file_path
        ExecuteTask(task).execute()
        task.output_sync_map_file()
Esempio n. 25
0
def process_aeneas_map(filepath_, format_):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = filepath_ + '.' + format_
    task.text_file_path_absolute = filepath_ + ".txt"
    task.sync_map_file_path_absolute = filepath_ + ".json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Esempio n. 26
0
def run_aeneas_task(audio_filepath, lyrics_filepath, output_filepath):
    config_string = "task_language=eng|os_task_file_format=json|is_text_type=plain"

    task = Task(config_string=config_string)

    task.audio_file_path_absolute = audio_filepath
    task.text_file_path_absolute = lyrics_filepath
    task.sync_map_file_path_absolute = output_filepath

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
Esempio n. 27
0
def align(audio_file_path, text_file_path, syncmap_file_path):
    # create Task object
    config_string = "task_language=zh|is_text_type=plain|os_task_file_format=txt"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = audio_file_path
    task.text_file_path_absolute = text_file_path
    task.sync_map_file_path_absolute = syncmap_file_path

    # process Task
    ExecuteTask(task).execute()
    # output sync map to file
    task.output_sync_map_file()

    return True
Esempio n. 28
0
def executeAeneas(text_path, audio_path):
    audio_name, _ = os.path.splitext(audio_path)

    # create Task object
    config_string = u'task_language=tur|is_text_type=plain|os_task_file_format=json'
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = audio_path
    task.text_file_path_absolute = text_path
    task.sync_map_file_path_absolute = audio_name + "_syncmap.json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
def sync_map_generator(song_name):
# create Task object
    print('Creating sync map file...')
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Songs/" + song_name +".wav"
    task.text_file_path_absolute = "C:/Users/jaysh/Desktop/VandyHacks/Hack Prototype/Lyrics/" + song_name +".txt"
    task.sync_map_file_path_absolute = u"output/"+ song_name + ".json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
    print('Created sync map file')
Esempio n. 30
0
def generate_epub(pub_id):
    # create Task object
    config_string = u"task_language=eng|is_text_type=plain|os_task_file_format=json"
    task = Task(config_string=config_string)
    task.audio_file_path_absolute = u"./test/audio.mp3"
    task.text_file_path_absolute = u"./test/sonnet_plain.txt"
    task.sync_map_file_path_absolute = u"./test/output.json"

    # process Task
    ExecuteTask(task).execute()

    # output sync map to file
    task.output_sync_map_file()
    with open(task.sync_map_file_path_absolute, 'r') as f:
        read_data = f.read()

    return read_data  #task.sync_map_file_path_absolute