コード例 #1
0
def proscript_segments_to_textgrid(proscript,
                                   output_dir,
                                   file_prefix="",
                                   speaker_segmented=False,
                                   no_write=False):
    output_files = []
    assert (proscript.duration > 0.0, "Proscript duration is 0")

    fix_segment_overlaps(proscript)

    if speaker_segmented:
        proscript.populate_speaker_ids()
        assert (len(proscript.speaker_ids) > 0,
                "No speaker info set on proscript")
        for speaker_index, speaker_id in enumerate(proscript.speaker_ids):
            try:
                textgrid_file = proscript.speaker_textgrid_files[speaker_index]
            except:
                textgrid_file = os.path.join(
                    output_dir, "%s-%s.TextGrid" % (file_prefix, speaker_id))
            if not no_write:
                tg = tgio.Textgrid()
                segment_entry_list = [
                    (segment.start_time, segment.end_time, segment.transcript)
                    for segment in proscript.get_speaker_segments(speaker_id)
                ]
                segment_tier = tgio.IntervalTier('%s' % speaker_id,
                                                 segment_entry_list, 0,
                                                 proscript.duration)
                tg.addTier(segment_tier)
                saveTextGridWithTags(tg, textgrid_file)
            output_files.append(textgrid_file)
            proscript.speaker_textgrid_files.append(textgrid_file)
    else:
        if proscript.textgrid_file:
            textgrid_file = proscript.textgrid_file
        else:
            textgrid_file = os.path.join(output_dir,
                                         "%s.TextGrid" % (file_prefix))
            proscript.textgrid_file = textgrid_file

        if not no_write:
            tg = tgio.Textgrid()
            segment_entry_list = [(segment.start_time, segment.end_time,
                                   segment.transcript)
                                  for segment in proscript.segment_list]
            segment_tier = tgio.IntervalTier('segments', segment_entry_list, 0,
                                             proscript.duration)

            tg.addTier(segment_tier)
            saveTextGridWithTags(tg, textgrid_file)
        output_files.append(textgrid_file)
    return output_files
コード例 #2
0
def write_intervals(wav_dir, align_dir, intervals):
    for audio_name, entries in intervals.items():
        audio_align_dir = align_dir / audio_name
        audio_align_dir.mkdir(parents=True, exist_ok=True)

        grid = tgio.Textgrid()
        tier = tgio.IntervalTier("sentences", entries)
        grid.addTier(tier)

        grid_path = audio_align_dir / f"{audio_name}.TextGrid"
        grid.save(str(grid_path))
        logger.debug(f"Wrote {grid_path}")

        # Split audio
        wav_path = wav_dir / f"{audio_name}.wav"
        audio_wav_dir = audio_align_dir / "wav"
        logger.debug(f"Splitting {wav_path}")
        splitAudioOnTier(str(wav_path), str(grid_path), "sentences", str(audio_wav_dir))

        # Write transcriptions
        text_align_dir = audio_align_dir / "text"
        text_align_dir.mkdir(parents=True, exist_ok=True)

        num_zeros = int(math.ceil(math.log10(len(entries))))
        n_format = "{0:0" + str(num_zeros) + "d}"
        for i, interval in enumerate(entries):
            n = n_format.format(i)
            text_path = text_align_dir / f"{audio_name}_{n}.txt"
            text_path.write_text(interval.label.strip())
            logger.debug(f"Wrote {text_path}")
コード例 #3
0
def alignment_to_textgrid(alignment, path):
    """
    Take a filename and its associated transcription and fill in all the gaps
    """
    with contextlib.closing(wave.open(path, 'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
    rearranged_words = []
    file_ons = 0
    try:
        content = json.loads(alignment.to_json())
        all_ons = content['words'][0]['start']
        for ix, word in enumerate(content['words']):
            word_ons = word['start']
            word_off = word['end']
            target = word['alignedWord']
            if (ix >= 1) and (ix < (len(content['words']))):
                prev_word = content['words'][ix - 1]
                prev_off = prev_word['end']
                if word['start'] > prev_off:
                    rearranged_words.append((prev_off, word_ons, ''))
            elif ix == 0:
                rearranged_words.append(
                    (file_ons, all_ons, ''))  # make empty first tier
            rearranged_words.append((word_ons, word_off, target))
        if word_off < duration:
            rearranged_words.append((word_off, duration, ''))
    except:
        rearranged_words = [(0, duration, '')]
    tg = tgio.Textgrid()
    tg.addTier(tgio.IntervalTier('word', rearranged_words))
    return tg
コード例 #4
0
ファイル: utils.py プロジェクト: yhgon/mass-dataset
def create_textgrid_obj(textgrid_list):
    new_dict = dict()
    keys = ["ORT", "KAN", "MAU"]
    for key in keys:
        new_dict[key] = tgio.TextgridTier(key, [], 0.0,
                                          textgrid_list[-1].interval.end)
        new_dict[key].tierType = tgio.INTERVAL_TIER

    for element in textgrid_list:
        new_dict["ORT"].entryList.append(element.interval)
        try:
            phonetic = element.phonetic
            phones_list = element.phones_list
        except AttributeError:
            phonetic = element.interval
            phones_list = [element.interval]

        new_dict["KAN"].entryList.append(phonetic)
        new_dict["MAU"].entryList += phones_list

    textgrid_obj = tgio.Textgrid()
    for key in keys:
        textgrid_obj.addTier(new_dict[key])

    return textgrid_obj
コード例 #5
0
def syllabifyTextgrids(tgPath, islePath):

    isleDict = isletool.LexicalTool(islePath)

    outputPath = join(tgPath, "syllabifiedTGs")
    utils.makeDir(outputPath)
    skipLabelList = ["<VOCNOISE>", "xx", "<SIL>", "{B_TRANS}", '{E_TRANS}']

    for fn in utils.findFiles(tgPath, filterExt=".TextGrid"):

        if os.path.exists(join(outputPath, fn)):
            continue

        tg = tgio.openTextgrid(join(tgPath, fn))

        syllableTG = praattools.syllabifyTextgrid(isleDict,
                                                  tg,
                                                  "words",
                                                  "phones",
                                                  skipLabelList=skipLabelList)

        outputTG = tgio.Textgrid()
        outputTG.addTier(tg.tierDict["words"])
        outputTG.addTier(tg.tierDict["phones"])
        #         outputTG.addTier(syllableTG.tierDict["syllable"])
        outputTG.addTier(syllableTG.tierDict["tonic"])

        outputTG.save(join(outputPath, fn))
コード例 #6
0
def ctm2tg(wavdir, outdir):
    '''Convert CTM alignment files to Praat's TextGrid format.

    Args:
    wavdir -- path to the directory containing speech wav files
    outdir -- path to output the textgrid files in
    '''
    print "Converting ctm files to Praat Textgrids...",
    words = readCSV(os.path.join(outdir, 'wordlvl.ctm'))
    phones = readCSV(os.path.join(outdir, 'phonelvl.ctm'))
    word_dict = csv2tgdict(words)
    phone_dict = csv2tgdict(phones)
    wavscp = wavscp2dict(readCSV(os.path.join(outdir, 'wav.scp')))
    if not os.path.exists(os.path.join(outdir, 'tg')):
        os.makedirs(os.path.join(outdir, 'tg'))
    for utt in wavscp.keys():
        tg = tgio.Textgrid()
        wordTier = tgio.IntervalTier('words',
                                     word_dict[utt],
                                     0,
                                     pairedWav=wavscp[utt])
        phoneTier = tgio.IntervalTier('phones',
                                      phone_dict[utt],
                                      0,
                                      pairedWav=wavscp[utt])
        tg.addTier(wordTier)
        tg.addTier(phoneTier)
        tg.save(os.path.join(outdir, 'tg', utt + '.TextGrid'))
    print "stored in " + os.path.join(outdir, 'tg')
コード例 #7
0
def Create_textgrid(phones, out_path, raw=False):
    ''' 
    Create a textgrid based on the alignment
        sample: an pd DataFrame of an alignment file
        out_path: the output path
    '''
    tg = tgio.Textgrid()
    syl_tier = tgio.IntervalTier('phones', [], 0,
                                 sample.iloc[-1, 1] + sample.iloc[-1, 2])
    entries = []

    if raw:
        for i in range(len(sample)):

            ph = (sample.iloc[i, 3], sample.iloc[i, 3] + sample.iloc[i, 4],
                  sample.iloc[i, -1])
            entries.append(ph)
    else:
        for i in range(len(sample)):
            ph = (sample.iloc[i, 1], sample.iloc[i, 1] + sample.iloc[i, 2],
                  sample.iloc[i, -1])
            entries.append(ph)

    syl_tier = syl_tier.new(entryList=entries)
    tg.addTier(syl_tier)
    out_path = os.path.join(out_path, sample.iloc[0, 0] + '.TextGrid')
    tg.save(out_path)
コード例 #8
0
ファイル: ctm2tg.py プロジェクト: zhuangweiji/DAE
def main():
    '''Convert CTM alignment files to Praat's TextGrid format.
    Args:
    wavscp -- path to the directory containing speech wav files
    outdir -- path to output the textgrid files in
    '''
    if (len(sys.argv) < 3):
        print("Usage:%s <wavscp> <outdir> <cmt-1>...<cmt-n>\n" % (sys.argv[0]))
        exit(1)
    print("Converting ctm files to Praat Textgrids...\n")
    wavscp = sys.argv[1]
    outdir = sys.argv[2]
    #absOutDir = os.path.abspath(outdir)
    wavdict = wavscp2dict(readCSV(wavscp))
    if not os.path.exists(os.path.join(outdir)):
        os.makedirs(os.path.join(outdir))

    for utt in wavdict.keys():
        tg = tgio.Textgrid()
        for num in range(3, len(sys.argv)):
            ctmcsv = readCSV(sys.argv[num])
            tgdict = csv2tgdict(ctmcsv)
            if not os.path.isfile(wavdict[utt]):
                print("%s not exist!" % (wavdict[utt]))
                break
            else:
                fpath, fname = os.path.split(wavdict[utt])
                shutil.copyfile(wavdict[utt], os.path.join(outdir, fname))
                intervalTier = tgio.IntervalTier(sys.argv[num],
                                                 tgdict[utt],
                                                 0,
                                                 pairedWav=wavdict[utt])
                tg.addTier(intervalTier)
        tg.save(os.path.join(outdir, utt + '.TextGrid'))
    print("stored in %s" % (outdir))
コード例 #9
0
def outputStereoTextgrid(outputFN, duration, leftEntryList, rightEntryList,
                         leftChannelName, rightChannelName):

    # Give all entries a label indicating their order of occurrence
    leftEntryList.sort()
    newLeftEntryList = [(entry[0], entry[1], str(i))
                        for i, entry in enumerate(leftEntryList)]

    rightEntryList.sort()
    newRightEntryList = [(entry[0], entry[1], str(i))
                         for i, entry in enumerate(rightEntryList)]

    # This shouldn't be necessary
    newLeftEntryList = [
        entry for entry in newLeftEntryList
        if entry[1] <= duration and entry[0] < entry[1]
    ]
    newRightEntryList = [
        entry for entry in newRightEntryList
        if entry[1] <= duration and entry[0] < entry[1]
    ]

    # Output textgrid
    leftTier = tgio.IntervalTier(leftChannelName, newLeftEntryList, 0,
                                 duration)
    rightTier = tgio.IntervalTier(rightChannelName, newRightEntryList, 0,
                                  duration)

    outputTG = tgio.Textgrid()
    outputTG.addTier(leftTier)
    outputTG.addTier(rightTier)

    outputTG.save(outputFN)
コード例 #10
0
def convert_json_to_textgrid(wav_file_path, transcript_file_path):

    textgrid_file_path = transcript_file_path.replace(".tlog", ".TextGrid")

    with open(transcript_file_path) as json_file:
        textgrid_entries_list = []
        json_data = json.load(json_file)
        for transcript in json_data:
            start_seconds = float(transcript["start"] / 1000)
            end_seconds = float(transcript["end"] / 1000)
            textgrid_entry = (start_seconds, end_seconds,
                              transcript["transcript"])
            textgrid_entries_list.append(textgrid_entry)

        utterance_tier = tgio.IntervalTier('utterance',
                                           textgrid_entries_list,
                                           0,
                                           pairedWav=wav_file_path)
        tg = tgio.Textgrid()
        tg.addTier(utterance_tier)
        tg.save(textgrid_file_path,
                useShortForm=False,
                outputFormat='textgrid')

        print("Textgrid of transcription saved to %s" % textgrid_file_path)
コード例 #11
0
def markTranscriptForAnnotations(tgFN,
                                 tierName,
                                 outputTGFN,
                                 proportion=1 / 5.0):
    '''
    Prep a noisy silence annotation for an annotation task
    
    Voice activity detectors are liable to segment speech into very small
    chunks (fragments of speech and silence).  The point of this code is
    to segment a recording into larger units that could be used in a
    speech transcription task.
    
    Assumes the speaker is speaking for most of the recording.
    '''
    tg = tgio.openTextgrid(tgFN)

    duration = tg.maxTimestamp
    numEntries = int(math.ceil(duration * proportion))
    entryList = tg.tierDict[tierName].entryList

    # Get all silent intervals
    entryList = [(stop - start, start, stop, label)
                 for start, stop, label in entryList if label == "silent"]

    # Remove silent intervals at the start or end of the file
    entryList = [
        entry for entry in entryList if entry[1] != 0 and entry[2] != duration
    ]

    # Put longest intervals first
    entryList.sort(reverse=True)

    # Get the mid point of the longest n intervals and convert them
    # into intervals to be transcribed
    entryList = entryList[:numEntries]
    pointList = [
        start + ((stop - start) / 2.0) for _, start, stop, _ in entryList
    ]
    pointList.sort()

    pointList = [
        0.0,
    ] + pointList + [
        duration,
    ]

    newEntryList = []
    for i in range(len(pointList) - 1):
        newEntryList.append((pointList[i], pointList[i + 1], "%d" % i))

    outputTG = tgio.Textgrid()
    tier = tgio.IntervalTier("toTranscribe", newEntryList, 0, duration)
    outputTG.addTier(tier)

    outputTG.save(outputTGFN)
コード例 #12
0
def create_textgrid(wav_dictionary: Dict[str, str],
                    ctm_dictionary: dict,
                    output_directory: str) -> None:
    for index, utterance_id in enumerate(wav_dictionary.keys()):
        textgrid = tgio.Textgrid()
        tier = tgio.IntervalTier(name='default',
                                 entryList=ctm_dictionary[utterance_id],
                                 minT=0,
                                 pairedWav=str(Path(wav_dictionary[utterance_id])))
        textgrid.addTier(tier)
        textgrid.save(str(Path(output_directory, f"utterance-{index}.TextGrid")))
コード例 #13
0
ファイル: io_tests.py プロジェクト: tudou2015/praatIO
    def test_save_with_force_larger_value_as_maximum_time(self):
        userEntryList = [[0.4, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']]
        expectedEntryList = [[0.3, 0.4, ''], [0.4, 0.6, 'A'], [0.6, 0.8, ''],
                             [0.8, 1.0, 'E'], [1.0, 1.2, ''], [1.2, 1.3, 'I'],
                             [1.3, 3.0, '']]

        tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0)
        tg = tgio.Textgrid()
        tg.addTier(tier)
        actualEntryList = run_save(tg, maxTimestamp=3.0)

        self.assertEqual(expectedEntryList, actualEntryList)
コード例 #14
0
ファイル: io_tests.py プロジェクト: tudou2015/praatIO
    def test_save_with_minimum_time_stamp(self):
        userEntryList = [[0.4, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']]
        expectedEntryList = [[0.3, 0.4, ''], [0.4, 0.6, 'A'], [0.6, 0.8, ''],
                             [0.8, 1.0, 'E'], [1.0, 1.2, ''], [1.2, 1.3, 'I'],
                             [1.3, 2.0, '']]

        tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0)
        tg = tgio.Textgrid()
        tg.addTier(tier)
        actualEntryList = run_save(tg)

        self.assertEqual(expectedEntryList, actualEntryList)
コード例 #15
0
def outputTextgrid(outputFN, duration, entryList, tierName):

    # Give all entries a label indicating their order of occurrence
    entryList.sort()
    newEntryList = [(entry[0], entry[1], str(i))
                    for i, entry in enumerate(entryList)]

    # Output textgrid
    tierSpeech = tgio.IntervalTier(tierName, newEntryList, 0, duration)

    tg = tgio.Textgrid()
    tg.addTier(tierSpeech)
    tg.save(outputFN)
コード例 #16
0
ファイル: io_tests.py プロジェクト: tudou2015/praatIO
    def test_save_with_force_too_large_minimum_time(self):
        # If you choose to force save to use a minTimestamp, all
        # of your entries must be higher than that minTimestamp
        userEntryList = [[0.4, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']]
        expectedEntryList = [[0, 0.4, ''], [0.4, 0.6, 'A'], [0.6, 0.8, ''],
                             [0.8, 1.0, 'E'], [1.0, 1.2, ''], [1.2, 1.3, 'I'],
                             [1.3, 2.0, '']]

        tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0)
        tg = tgio.Textgrid()
        tg.addTier(tier)

        self.assertRaises(AssertionError, run_save, tg, maxTimestamp=1.0)
コード例 #17
0
def convert_to_textgrid_file(text, wav_file, tg_file):
    tg = tgio.Textgrid()
    tg_entries_list = []

    tg_entry = (0.0, get_wav_duration(wav_file), text)
    tg_entries_list.append(tg_entry)

    utteranceTier = tgio.IntervalTier('utterance',
                                      tg_entries_list,
                                      0,
                                      pairedWav=wav_file)
    tg.addTier(utteranceTier)
    tg.save(tg_file)
コード例 #18
0
ファイル: io_tests.py プロジェクト: tudou2015/praatIO
    def test_save_with_minimum_interval_length(self):
        # The first entry will be stretched to fill the unlabeled region in
        # front of it: [0.30, 0.35, ''] (The unlabeled region starts at 0.3
        # instead of 0 because the minTimestamp for this tg is 0.3)
        userEntryList = [[0.35, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']]
        expectedEntryList = [[0.3, 0.6, 'A'], [0.6, 0.8, ''], [0.8, 1.0, 'E'],
                             [1.0, 1.2, ''], [1.2, 1.3, 'I'], [1.3, 2.0, '']]

        tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0)
        tg = tgio.Textgrid()
        tg.addTier(tier)
        actualEntryList = run_save(tg, minimumIntervalLength=0.06)

        self.assertEqual(expectedEntryList, actualEntryList)
コード例 #19
0
def audiosplitOnTone(inputPath,
                     fn,
                     pitchPath,
                     tgPath,
                     subwavPath,
                     minPitch,
                     maxPitch,
                     toneFrequency,
                     minEventDuration,
                     praatEXE,
                     praatScriptPath,
                     forceRegen,
                     generateWavs=False):

    utils.makeDir(pitchPath)
    utils.makeDir(tgPath)
    utils.makeDir(subwavPath)

    name = os.path.splitext(fn)[0]
    piSamplingRate = 100  # Samples per second

    # Extract pitch and find patterns in the file
    outputFN = os.path.splitext(fn)[0] + ".txt"
    sampleStep = 1 / float(piSamplingRate)
    motherPIList = pitch_and_intensity.extractPI(join(inputPath, fn),
                                                 join(pitchPath, outputFN),
                                                 praatEXE,
                                                 minPitch,
                                                 maxPitch,
                                                 sampleStep=sampleStep,
                                                 forceRegenerate=forceRegen)
    # entry = (time, pitchVal, intVal)
    pitchList = [float(entry[1]) for entry in motherPIList]
    timeDict = split_on_tone.splitFileOnTone(pitchList, piSamplingRate,
                                             toneFrequency, minEventDuration)

    # Output result as textgrid
    duration = audio_scripts.getSoundFileDuration(join(inputPath, fn))
    tg = tgio.Textgrid()
    for key in ['beep', 'speech', 'silence']:
        entryList = timeDict[key]
        tier = tgio.IntervalTier(key, entryList, 0, duration)
        tg.addTier(tier)
    tg.save(join(tgPath, name + ".TextGrid"))

    # Output audio portions between tones
    if generateWavs:
        split_on_tone.extractSubwavs(timeDict, inputPath, fn, subwavPath)
コード例 #20
0
ファイル: sppas_util.py プロジェクト: toddrme2178/praatIO
def generateSingleIPUTextgrids(wavPath,
                               txtPath,
                               outputPath,
                               nameMod=None,
                               addPause=True):
    '''
    Generates a textgrid with a single IPU for each wave file
    
    This constitutes the first step of SPPAS, chunking a recording into
    utterances.  In the cases when there is only a single utterance, SPPAS
    sometimes makes errors (or is not configured properly by the user).  This
    script is strictly for those situations.
    
    If there are multiple audio files for each transcript, you can derive the
    transcript name using /nameMod/
    
    If there is a chance of even a slight segment of silence on the edges of
    the audio file, /addPause/ should be True.
    '''
    if nameMod is None:
        nameMod = lambda x: x

    if not os.path.exists(outputPath):
        os.mkdir(outputPath)

    wavList = utils.findFiles(wavPath, filterExt=".wav", stripExt=True)

    for wavName in wavList:

        transcriptName = nameMod(wavName)

        # Add initial and final small pauses to each transcript
        with io.open(join(txtPath, transcriptName + ".txt"), "r") as fd:
            txt = fd.read()

        if addPause is True:
            txt = "+ %s +" % txt.lower()

        wavFN = join(wavPath, wavName + ".wav")
        dur = praatio_scripts.audioio.WavQueryObj(wavFN).getDuration()
        tg = tgio.Textgrid()
        tier = tgio.IntervalTier("ipu", [
            (0, dur, txt),
        ], 0, dur)

        tg.addTier(tier)
        tg.save(join(outputPath, wavName + ".TextGrid"))
コード例 #21
0
def wavFileToGrid(wavFile,outputFile):
    snd = parselmouth.Sound(wavFile)
    pitch = snd.to_pitch()
    print("Get entryList for TextGrid From {file} by pitch".format(file=wavFile))
    entryList=pitchToEntryList(pitch) 

    print("Save TextGrid to {output} ".format(output=outputFile))
    tierName="Pitch"
    if os.path.isfile(outputFile):
        tg = tgio.openTextgrid(outputFile)
        if tierName in tg.tierDict:
            tierName=tierName+datetime.now().strftime("%m%d%Y%H%M%S")
    else:
        tg = tgio.Textgrid()    
    wordTier = tgio.IntervalTier(tierName, entryList, 0, pairedWav=wavFile)
    tg.addTier(wordTier)
    tg.save(outputFile)
コード例 #22
0
def make_textgrid(df, out_name, orig_name=None, word2phone=None):
    if orig_name:
        tg = tgio.openTextgrid(orig_name)
    else:
        tg = tgio.Textgrid()
    phones_list = []
    syllables_list = []
    curr_syllable = []
    for tup in df[['start', 'end', 'phone']].itertuples():
        phones_list.append((tup.start, tup.end, tup.phone))
        if tup.phone in set(['spn', 'sil']):
            # pass
            syllables_list.append((tup.start, tup.end, tup.phone))
            curr_syllable = []
        elif len(tup.phone) > 2 and tup.phone[-2] == '_':  # final
            curr_syllable.append(tup.phone)
            syllables_list.append(
                (initial_start, tup.end, ' '.join(curr_syllable)))
            curr_syllable = []
        else:  # initial
            curr_syllable.append(tup.phone)
            initial_start = tup.start

    phone_tier = tgio.IntervalTier('phone', phones_list)
    syllable_tier = tgio.IntervalTier('syllable\_phones', syllables_list)
    if orig_name and word2phone:
        ipus, xmins, xmaxs = get_ipus(tg)
        word_list, unmatched_words, break_list = make_word_list(
            syllable_tier, ipus, word2phone, out_name, xmaxs)
        word_tier = tgio.IntervalTier('word', word_list)
        tg.addTier(word_tier)

    tg.addTier(phone_tier)
    tg.addTier(syllable_tier)

    if not tg.tierDict['breaks'].entryList:
        tg.removeTier('breaks')
        break_tier = tgio.PointTier('break', break_list)
        tg.addTier(break_tier)
    else:
        print(out_name, 'has break tier, did not write new one')
    os.makedirs(os.path.dirname(out_name), exist_ok=True)
    tg.save(out_name, useShortForm=False)
    print('wrote to {}, # matched: {}, # unmatched: {}'.format(
        out_name, len(word_list), len(unmatched_words)))
    return len(word_list), len(unmatched_words)
コード例 #23
0
def textgridMorphDuration(fromTGFN, toTGFN):
    '''
    A convenience function.  Morphs interval durations of one tg to another.
    
    This assumes the two textgrids have the same number of segments.
    '''
    fromTG = tgio.openTextgrid(fromTGFN)
    toTG = tgio.openTextgrid(toTGFN)
    adjustedTG = tgio.Textgrid()

    for tierName in fromTG.tierNameList:
        fromTier = fromTG.tierDict[tierName]
        toTier = toTG.tierDict[tierName]
        adjustedTier = fromTier.morph(toTier)
        adjustedTG.addTier(adjustedTier)

    return adjustedTG
コード例 #24
0
ファイル: fileutils.py プロジェクト: synthbot-anon/synthbot
def write_normalized_transcript(transcript: str, audio: NormalizedAudio,
                                output_path: str):
    assert ' ' not in output_path, \
     'Please remove spaces from output path for {}'.format(output_path)
    assert '\n' not in transcript and '\t' not in transcript, \
     'Please remove the newlines and tabs in transcript for [{}]'.format(
      transcript)

    if datapipes.__verbose__:
        print('writing normalized transcript to {}'.format(output_path))

    duration = audio.duration
    textgrid = tgio.Textgrid()
    utterance = tgio.IntervalTier('utt', [], 0, duration)
    utterance.insertEntry(
        tgio.Interval(start=0, end=duration, label=transcript))
    textgrid.addTier(utterance)

    textgrid.save(output_path, useShortForm=False)
コード例 #25
0
def textgridManipulateDuration(tgFN, ratioList):

    tg = tgio.openTextgrid(tgFN)

    adjustedTG = tgio.Textgrid()

    for tierName in tg.tierNameList:
        fromTier = tg.tierDict[tierName]

        adjustedTier = None
        if isinstance(fromTier, tgio.IntervalTier):
            adjustedTier = _morphIntervalTier(fromTier, ratioList)
        elif isinstance(fromTier, tgio.PointTier):
            adjustedTier = _morphPointTier(fromTier, ratioList)

        assert (adjustedTier is not None)
        adjustedTG.addTier(adjustedTier)

    return adjustedTG
コード例 #26
0
def lab2praat(file_name, praat_align_file,state_number=5):
    """
    convert state alignment lab file to praat alignment file
    file_name should be state alignment file end with *.lab
    praat_align_file should be praat Textgrid file for visualization
    and state number are state number of one senone
    """
    fid = open(file_name)
    utt_labels = fid.readlines()
    fid.close()
    current_index = 0
    label_number = len(utt_labels)
    duration_phone_list=[]
    for line in utt_labels:
        line = line.strip()
        if len(line) < 1:
            continue
        temp_list = re.split('\s+', line)
        start_time = int(temp_list[0])
        end_time = int(temp_list[1])
        frame_number = int((end_time - start_time) / 50000)  # all frame number of this phone
        full_label = temp_list[2]
        full_label_length = len(full_label) - 3  # remove state information [k]
        state_index = full_label[full_label_length + 1]
        state_index = int(state_index) - 1
        full_label = full_label[0:full_label_length]
        match = re.match(r"^.*?\-(.*?)\+.*?$",full_label,re.M|re.I)
        phone_identity = match.group(1)
        if state_index == 1:
            phone_duration = frame_number
            for i in range(state_number - 1):
                line = utt_labels[current_index + i + 1].strip()
                temp_list = re.split('\s+', line)
                phone_duration += (int(temp_list[1]) - int(temp_list[0])) / 50000
            start_time = start_time/10000000.0
            end_time = start_time+phone_duration*0.005
            duration_phone_list.append((str(start_time),str(end_time),phone_identity))
        current_index+=1
    setTG = tgio.Textgrid()
    # pdb.set_trace()
    phoneTier = tgio.IntervalTier('phone', duration_phone_list)
    setTG.addTier(phoneTier)
    setTG.save(praat_align_file)
コード例 #27
0
def do_all(ELAN_name, individual_notes, note_phrases, frequencies,
           relative_notes, relative_phrases):

    #creates tiers of import
    individual_notes_tier = tgio.IntervalTier('Individual Notes',
                                              individual_notes)
    note_phrases_tier = tgio.IntervalTier('Note Phrases', note_phrases)
    frequencies_tier = tgio.IntervalTier('Frequencies', frequencies)
    relative_notes_tier = tgio.IntervalTier('Relative Notes', relative_notes)
    relative_phrases_tier = tgio.IntervalTier('Relative Phrases',
                                              relative_phrases)

    #nothing is left blank
    nothing = [[0, 1, '']]
    #creates tiers
    karim_tier = tgio.IntervalTier('Karim', nothing)
    ktrans_tier = tgio.IntervalTier('Karim Translation', nothing)
    ant_tier = tgio.IntervalTier('Anthony', nothing)
    atrans_tier = tgio.IntervalTier('Anthony Translation', nothing)
    emile_tier = tgio.IntervalTier('Emile', nothing)
    etrans_tier = tgio.IntervalTier('Emile Translation', nothing)

    #creates the textgrid and adds in the filled beat tier, and blank tiers for further annotation
    file_textgrid = tgio.Textgrid()

    file_textgrid.addTier(individual_notes_tier)
    file_textgrid.addTier(note_phrases_tier)
    file_textgrid.addTier(frequencies_tier)
    file_textgrid.addTier(relative_notes_tier)
    file_textgrid.addTier(relative_phrases_tier)

    # file_textgrid.addTier(beat_tier
    file_textgrid.addTier(karim_tier)
    file_textgrid.addTier(ktrans_tier)
    file_textgrid.addTier(ant_tier)
    file_textgrid.addTier(atrans_tier)
    file_textgrid.addTier(emile_tier)
    file_textgrid.addTier(etrans_tier)

    file_textgrid.save('textgrid_data/' + ELAN_name + '_combined.TextGrid')
コード例 #28
0
def merge_adjacent(path, fn, outputPath):
    '''
    Goes through every tier of a textgrid; combines adjacent filled intervals
    '''
    
    assert(path != outputPath)
    
    if not os.path.exists(outputPath):
        os.mkdir(outputPath)
    
    outputTG = tgio.Textgrid()
    
    tg = tgio.openTextgrid(join(path, fn))
    for tierName in tg.tierNameList:
        tier = tg.tierDict[tierName]
        
        newEntryList = []
        currentEntry = list(tier.entryList[0])
        for nextEntry in tier.entryList[1:]:

            # Is a boundary shared?
            if currentEntry[1] == nextEntry[0]:
                currentEntry[1] = nextEntry[1]  # Old end = new end
                currentEntry[2] += " - " + nextEntry[2]
            # If not
            else:
                newEntryList.append(currentEntry)
                currentEntry = list(nextEntry)
                
        newEntryList.append(currentEntry)
        
        replacementTier = tgio.IntervalTier(tierName,
                                            newEntryList,
                                            tier.minTimestamp,
                                            tier.maxTimestamp)
        outputTG.addTier(replacementTier)
    
    outputTG.save(join(outputPath, fn))
コード例 #29
0
def mlf2praat(mlf, praat_align_file):
    "transform cuprosody mlf file to Textgrid file"
    """
        mlf format:
        0 4400000 sil -2160.365723 sil
        4400000 5200000 I_g -504.555634 gaa
        5200000 6600000 F_aa -960.479187
        6600000 7300000 I_j -543.072876 jau
        7300000 8500000 F_au -856.253418
        8500000 8900000 I_d -320.236786 daai
        8900000 10400000 F_aai -789.435547
        10400000 11300000 I_s -523.623901 si
    """
    fid = open(mlf, 'r')
    lines = fid.readlines()
    duration_phone_list = []
    for line in lines:
        tmp_split = re.split('\s+', line.strip())
        if len(tmp_split) == 5 and tmp_split[2] == tmp_split[4]:
            start_time = int(tmp_split[0])
            end_time = int(tmp_split[1])
            phone_identity = tmp_split[4]
            start_time = start_time / 10000000.0
            end_time = end_time / 10000000.0
            duration_phone_list.append((str(start_time), str(end_time), phone_identity))
        elif len(tmp_split) == 4:
            end_time = int(tmp_split[1])
            start_time = start_time / 10000000.0
            end_time = end_time / 10000000.0
            duration_phone_list.append((str(start_time), str(end_time), phone_identity))
        else:
            start_time = int(tmp_split[0])
            phone_identity = tmp_split[4]
    setTG = tgio.Textgrid()
    pdb.set_trace()
    phoneTier = tgio.IntervalTier('syllable', duration_phone_list)
    setTG.addTier(phoneTier)
    setTG.save(praat_align_file)
コード例 #30
0
def lab2tg(input_filename, output_filename, wav_duration, tiername=None):

    lab = []

    with open(input_filename, 'r') as fid:
        for line in fid.readlines():
            start, end, label = line.rstrip().split()
            start = float(start)/10000000.
            end   = float(end)/10000000.
            lab.append((start, end, label))

    if len(lab) <= 0:
        print('Unable to convert empty lab for {0}'.format(input_filename))
        return

    if not tiername:
        tiername = 'tier_1'

    tg = tgio.Textgrid()
    tier = tgio.IntervalTier(tiername, lab, 0, wav_duration)

    tg.addTier(tier)
    tg.save(output_filename)