Python align_audio 예제들, readalongs.align.align_audio Python 예제들

예제 #1

0

파일 보기

    def test_align_with_invalid_preg2p(self):
        """readalongs g2p gracefully handling wrong inputs"""
        txt = """<document><s xml:lang="und">
            <w>word</w>
            <w ARPABET="G OW D">good</w>
            <w ARPABET="NOT ARPABET">error</w>
        </s></document>"""
        input_file = os.path.join(self.tempdir, "pre-g2p.xml")
        with open(input_file, "w", encoding="utf8") as f:
            print(txt, file=f)

        results = self.runner.invoke(g2p, [input_file, "-"])
        self.assertNotEqual(results.exit_code, 0)
        # print(results.output)
        self.assertIn("could not be g2p", results.output)
        self.assertIn('<w id="s0w0" ARPABET="W OW D D">word</w>',
                      results.output)
        self.assertIn('<w ARPABET="G OW D" id="s0w1">good</w>', results.output)
        self.assertIn('<w ARPABET="NOT ARPABET" id="s0w2">error</w>',
                      results.output)

        audio_file = os.path.join(self.data_dir, "ej-fra.m4a")
        with self.assertRaises(RuntimeError) as e:
            results = align_audio(input_file, audio_file)
        self.assertIn("could not be g2p'd", str(e.exception))

예제 #2

0

파일 보기

파일: test_force_align.py 프로젝트: littell/ReadAlong-Studio

    def testAlign(self):
        xml_path = os.path.join(self.data_dir, "ej-fra.xml")
        wav_path = os.path.join(self.data_dir, "ej-fra.m4a")
        results = align_audio(xml_path, wav_path, unit="w")

        # Verify that the same IDs are in the output
        converted_path = os.path.join(self.data_dir, "ej-fra-converted.xml")
        xml = etree.parse(converted_path).getroot()
        words = results["words"]
        xml_words = xml.xpath(".//w")
        self.assertEqual(len(words), len(xml_words))
        for w, xw in zip(words, xml_words):
            self.assertEqual(xw.attrib["id"], w["id"])

예제 #3

0

파일 보기

 def test_align_with_preg2p(self):
     """readalongs align working on previously g2p'd text"""
     text_file = os.path.join(self.data_dir, "mixed-langs.tokenized.xml")
     audio_file = os.path.join(self.data_dir, "ej-fra.m4a")
     with SoundSwallowerStub("t0b0d0p0s0w0:920:1620",
                             "t0b0d0p0s1w0:1620:1690"):
         _ = align_audio(text_file,
                         audio_file,
                         save_temps=os.path.join(self.tempdir, "foo"))
     with open(os.path.join(self.tempdir, "foo.dict"), "r",
               encoding="utf8") as f:
         dict_file = f.read()
         self.assertIn("S AH S IY", dict_file)  # "ceci" in fra
         self.assertIn("DH IH S", dict_file)  # "this" in eng
         self.assertIn("HH EH Y", dict_file)  # "Hej" in dan
         self.assertIn("D G IY T UW P IY D", dict_file)  # pre-g2p'd OOV

예제 #4

0

파일 보기

파일: test_force_align.py 프로젝트: littell/ReadAlong-Studio

    def testAlignText(self):
        txt_path = os.path.join(self.data_dir, "ej-fra.txt")
        wav_path = os.path.join(self.data_dir, "ej-fra.m4a")
        # tempfh, temp_fn = create_input_xml(txt_path, text_language='git', save_temps="unit")
        tempfh, temp_fn = create_input_tei(input_file_name=txt_path,
                                           text_language="fra",
                                           save_temps=None)
        results = align_audio(temp_fn, wav_path, unit="w", save_temps=None)

        # Verify that the same IDs are in the output
        converted_path = os.path.join(self.data_dir, "ej-fra-converted.xml")
        xml = etree.parse(converted_path).getroot()
        words = results["words"]
        xml_words = xml.xpath(".//w")
        self.assertEqual(len(words), len(xml_words))
        for w, xw in zip(words, xml_words):
            self.assertEqual(xw.attrib["id"], w["id"])

예제 #5

0

파일 보기

파일: test_force_align.py 프로젝트: ReadAlongs/Studio

    def test_align_text(self):
        """Basic alignment test case with plain text input"""
        txt_path = os.path.join(self.data_dir, "ej-fra.txt")
        wav_path = os.path.join(self.data_dir, "ej-fra.m4a")
        _, temp_fn = create_input_tei(input_file_name=txt_path,
                                      text_languages=("fra", ),
                                      save_temps=None)
        results = align_audio(temp_fn, wav_path, unit="w", save_temps=None)

        # Verify that the same IDs are in the output
        converted_path = os.path.join(self.data_dir, "ej-fra-converted.xml")
        xml = etree.parse(converted_path).getroot()
        words = results["words"]
        xml_words = xml.xpath(".//w")
        self.assertEqual(len(words), len(xml_words))
        for w, xw in zip(words, xml_words):
            self.assertEqual(xw.attrib["id"], w["id"])

예제 #6

0

파일 보기

    def test_anchors_inner_only(self):
        """Test aligning with anchors only between existing text"""

        # ej-fra-anchors has anchors between words/sentences only
        results = align_audio(
            os.path.join(self.data_dir, "ej-fra-anchors.xml"),
            os.path.join(self.data_dir, "ej-fra.m4a"),
        )
        words = results["words"]
        # The input text file has 99 words, so should the aligned segments.
        self.assertEqual(len(words), 99)

        # Make sure the aligned segments stay on the right side of their anchors
        self.assertLessEqual(words[0]["end"], 1.62)
        self.assertGreaterEqual(words[1]["start"], 1.62)
        self.assertLessEqual(words[8]["end"], 3.81)
        self.assertGreaterEqual(words[9]["start"], 3.82)
        self.assertLessEqual(words[21]["end"], 6.74)
        self.assertGreaterEqual(words[22]["start"], 6.74)

예제 #7

0

파일 보기

    def test_anchors_outer_too(self):
        """Test aligning with anchors defining DNA segments at start and end too"""

        # ej-fra-anchors2 also has anchors before the first word and after the last word
        save_temps_prefix = os.path.join(self.tempdir, "anchors2-temps")
        results = align_audio(
            os.path.join(self.data_dir, "ej-fra-anchors2.xml"),
            os.path.join(self.data_dir, "ej-fra.m4a"),
            save_temps=save_temps_prefix,
        )
        words = results["words"]
        # The input text file has 99 words, so should the aligned segments.
        self.assertEqual(len(words), 99)

        # Make sure the aligned segments stay on the right side of their anchors,
        # including the initial and final ones inserted into anchors2.xml
        self.assertGreaterEqual(words[0]["start"], 0.5)
        self.assertLessEqual(words[0]["end"], 1.2)
        self.assertGreaterEqual(words[1]["start"], 1.2)
        self.assertLessEqual(words[8]["end"], 3.6)
        self.assertGreaterEqual(words[9]["start"], 3.9)
        self.assertLessEqual(words[21]["end"], 7.0)
        self.assertGreaterEqual(words[22]["start"], 7.0)
        self.assertLessEqual(words[-1]["end"], 33.2)

        # Make sure the audio segment temp files were written and are not empty
        for suff in ("", ".2", ".3", ".4"):
            partial_wav_file = save_temps_prefix + ".wav" + suff
            self.assertTrue(
                os.path.exists(partial_wav_file), f"{partial_wav_file} should exist"
            )
            self.assertGreater(
                os.path.getsize(partial_wav_file),
                0,
                f"{partial_wav_file} should not be empty",
            )

예제 #8

0

파일 보기

def align(**kwargs):
    """Align TEXTFILE and AUDIOFILE and create output files as OUTPUT_BASE.* in directory
    OUTPUT_BASE/.

    TEXTFILE:    Input text file path (in XML, or plain text with -i)

    AUDIOFILE:   Input audio file path, in any format supported by ffmpeg

    OUTPUT_BASE: Base name for output files
    """
    config = kwargs.get("config", None)
    if config:
        if config.endswith("json"):
            try:
                with open(config) as f:
                    config = json.load(f)
            except json.decoder.JSONDecodeError:
                LOGGER.error(f"Config file at {config} is not valid json.")
        else:
            raise click.BadParameter(f"Config file '{config}' must be in JSON format")

    output_dir = kwargs["output_base"]
    if os.path.exists(output_dir):
        if not os.path.isdir(output_dir):
            raise click.UsageError(
                f"Output folder '{output_dir}' already exists but is a not a directory."
            )
        if not kwargs["force_overwrite"]:
            raise click.UsageError(
                f"Output folder '{output_dir}' already exists, use -f to overwrite."
            )
    else:
        os.mkdir(output_dir)

    # Make sure we can write to the output directory, for early error checking and user
    # friendly error messages.
    try:
        with TemporaryFile(dir=output_dir):
            pass
    except Exception:
        raise click.UsageError(
            f"Cannot write into output folder '{output_dir}'. Please verify permissions."
        )

    output_basename = os.path.basename(output_dir)
    output_base = os.path.join(output_dir, output_basename)
    temp_base = None
    if kwargs["save_temps"]:
        temp_dir = os.path.join(output_dir, "tempfiles")
        if not os.path.isdir(temp_dir):
            if os.path.exists(temp_dir) and kwargs["force_overwrite"]:
                os.unlink(temp_dir)
            os.mkdir(temp_dir)
        temp_base = os.path.join(temp_dir, output_basename)

    if kwargs["debug"]:
        LOGGER.setLevel("DEBUG")
    if kwargs["text_input"]:
        if not kwargs["language"]:
            LOGGER.warn("No input language provided, using undetermined mapping")
        tempfile, kwargs["textfile"] = create_input_tei(
            input_file_name=kwargs["textfile"],
            text_language=kwargs["language"],
            save_temps=temp_base,
        )
    if kwargs["output_xhtml"]:
        tokenized_xml_path = "%s.xhtml" % output_base
    else:
        _, input_ext = os.path.splitext(kwargs["textfile"])
        tokenized_xml_path = "%s%s" % (output_base, input_ext)
    if os.path.exists(tokenized_xml_path) and not kwargs["force_overwrite"]:
        raise click.BadParameter(
            "Output file %s exists already, use -f to overwrite." % tokenized_xml_path
        )
    smil_path = output_base + ".smil"
    if os.path.exists(smil_path) and not kwargs["force_overwrite"]:
        raise click.BadParameter(
            "Output file %s exists already, use -f to overwrite." % smil_path
        )
    _, audio_ext = os.path.splitext(kwargs["audiofile"])
    audio_path = output_base + audio_ext
    if os.path.exists(audio_path) and not kwargs["force_overwrite"]:
        raise click.BadParameter(
            "Output file %s exists already, use -f to overwrite." % audio_path
        )
    unit = kwargs.get("unit", "w")
    bare = kwargs.get("bare", False)
    if (
        not unit
    ):  # .get() above should handle this but apparently the way kwargs is implemented
        unit = "w"  # unit could still be None here.
    try:
        results = align_audio(
            kwargs["textfile"],
            kwargs["audiofile"],
            unit=unit,
            bare=bare,
            config=config,
            save_temps=temp_base,
        )
    except RuntimeError as e:
        LOGGER.error(e)
        exit(1)

    if kwargs["text_grid"]:
        audio = read_audio_from_file(kwargs["audiofile"])
        duration = audio.frame_count() / audio.frame_rate
        words, sentences = return_words_and_sentences(results)
        textgrid = write_to_text_grid(words, sentences, duration)
        textgrid.to_file(output_base + ".TextGrid")
        textgrid.to_eaf().to_file(output_base + ".eaf")

    if kwargs["closed_captioning"]:
        words, sentences = return_words_and_sentences(results)
        webvtt_sentences = write_to_subtitles(sentences)
        webvtt_sentences.save(output_base + "_sentences.vtt")
        webvtt_sentences.save_as_srt(output_base + "_sentences.srt")
        webvtt_words = write_to_subtitles(words)
        webvtt_words.save(output_base + "_words.vtt")
        webvtt_words.save_as_srt(output_base + "_words.srt")

    if kwargs["output_xhtml"]:
        convert_to_xhtml(results["tokenized"])

    save_minimal_index_html(
        os.path.join(output_dir, "index.html"),
        os.path.basename(tokenized_xml_path),
        os.path.basename(smil_path),
        os.path.basename(audio_path),
    )

    save_xml(tokenized_xml_path, results["tokenized"])
    smil = make_smil(
        os.path.basename(tokenized_xml_path), os.path.basename(audio_path), results
    )
    shutil.copy(kwargs["audiofile"], audio_path)
    save_txt(smil_path, smil)

예제 #9

0

파일 보기

파일: cli.py 프로젝트: ReadAlongs/Studio

def align(**kwargs):
    """Align TEXTFILE and AUDIOFILE and create output files as OUTPUT_BASE.* in directory
    OUTPUT_BASE/.

    TEXTFILE:    Input text file path (in XML or plain text)

    \b
    If TEXTFILE has a .xml extension or starts with an XML declaration line,
    it is parsed as XML and can be in one of three formats:
     - the output of 'readalongs prepare',
     - the output of 'readalongs tokenize', or
     - the output of 'readalongs g2p'.

    \b
    If TEXTFILE has a .txt extension or does not start with an XML declaration
    line, is it read as plain text with the following conventions:
     - The text should be plain UTF-8 text without any markup.
     - Paragraph breaks are indicated by inserting one blank line.
     - Page breaks are indicated by inserting two blank lines.

    One can add the known ARPABET phonetics in the XML for words (<w> elements)
    that are not correctly handled by g2p in the output of 'readalongs tokenize'
    or 'readalongs g2p', via the ARPABET attribute.

    One can add anchor elements in the XML, e.g., '<anchor time="2.345s"/>', to
    mark known anchor points between the audio and text stream.

    AUDIOFILE:   Input audio file path, in any format supported by ffmpeg

    OUTPUT_BASE: Output files will be saved as OUTPUT_BASE/OUTPUT_BASE.*
    """
    config_file = kwargs.get("config", None)
    config = None
    if config_file:
        if config_file.endswith("json"):
            try:
                with open(config_file, encoding="utf8") as f:
                    config = json.load(f)
            except json.decoder.JSONDecodeError as e:
                raise click.BadParameter(
                    f"Config file at {config_file} is not in valid JSON format."
                ) from e
        else:
            raise click.BadParameter(
                f"Config file '{config_file}' must be in JSON format")

    output_dir = kwargs["output_base"]
    if os.path.exists(output_dir):
        if not os.path.isdir(output_dir):
            raise click.UsageError(
                f"Output folder '{output_dir}' already exists but is a not a directory."
            )
        if not kwargs["force_overwrite"]:
            raise click.UsageError(
                f"Output folder '{output_dir}' already exists, use -f to overwrite."
            )
    else:
        os.mkdir(output_dir)

    # Make sure we can write to the output directory, for early error checking and user
    # friendly error messages.
    try:
        with TemporaryFile(dir=output_dir):
            pass
    except Exception as e:
        raise click.UsageError(
            f"Cannot write into output folder '{output_dir}'. Please verify permissions."
        ) from e

    if kwargs["g2p_fallback"] is not None:
        raise click.BadParameter(
            "The --g2p-fallback option is obsolete.\n"
            "Specify multiple languages with the -l/--language option instead,\n"
            "or by adding the 'fallback-langs' attribute where relevant in your XML input."
        )

    output_basename = os.path.basename(output_dir)
    temp_base = None
    if kwargs["save_temps"]:
        temp_dir = os.path.join(output_dir, "tempfiles")
        if not os.path.isdir(temp_dir):
            if os.path.exists(temp_dir) and kwargs["force_overwrite"]:
                os.unlink(temp_dir)
            os.mkdir(temp_dir)
        temp_base = os.path.join(temp_dir, output_basename)

    if kwargs["debug"]:
        LOGGER.setLevel("DEBUG")

    if kwargs["text_input"] is not None:
        raise click.BadParameter(
            "The -i option is obsolete. .txt files are now read as plain text, "
            ".xml as XML, and other files based on whether they start with <?xml or not."
        )

    # Determine if the file is plain text or XML
    textfile_name = kwargs["textfile"]
    if textfile_name.endswith(".xml"):
        textfile_is_plaintext = False  # .xml is XML
    elif textfile_name.endswith(".txt"):
        textfile_is_plaintext = True  # .txt is plain text
    else:
        # Files other than .xml or .txt are parsed using etree. If the parse is
        # successful or the first syntax error is past the first line, the file
        # is assumed to be XML. Plain text files will yield an error in the
        # first few characters of line 1, typically complaining about not
        # finding "<" at the start.
        # There are many valid "magic numbers" for XML files, depending on
        # their encoding (utf8, utf16, endianness, etc). If we looked for
        # "<?xml " at the beginning, that would only catch some of the valid
        # XML encodings that etree can parse.
        # We could also use python-magic or filetype, but why introduce another
        # dependency when we can ask the library we're already using!?
        try:
            _ = etree.parse(textfile_name)
            textfile_is_plaintext = False
        except etree.XMLSyntaxError as e:
            textfile_is_plaintext = e.position <= (1, 10)

    if textfile_is_plaintext:
        if not kwargs["language"]:
            raise click.BadParameter(
                "No input language specified for plain text input. "
                "Please provide the -l/--language switch.")
        languages = kwargs["language"]
        if not kwargs["lang_no_append_und"] and "und" not in languages:
            languages.append("und")
        plain_textfile = kwargs["textfile"]
        try:
            _, xml_textfile = create_input_tei(
                input_file_name=plain_textfile,
                text_languages=languages,
                save_temps=temp_base,
            )
        except RuntimeError as e:
            raise click.UsageError(e) from e
    else:
        xml_textfile = kwargs["textfile"]

    bare = kwargs.get("bare", False)

    try:
        results = align_audio(
            xml_textfile,
            kwargs["audiofile"],
            bare=bare,
            config=config,
            save_temps=temp_base,
            verbose_g2p_warnings=kwargs["g2p_verbose"],
        )
    except RuntimeError as e:
        raise click.UsageError(e) from e
        # LOGGER.error(e)
        # sys.exit(1)

    output_formats = kwargs["output_formats"]

    save_readalong(
        align_results=results,
        output_dir=output_dir,
        output_basename=output_basename,
        config=config,
        audiofile=kwargs["audiofile"],
        audiosegment=results["audio"],
        output_formats=output_formats,
    )