Ejemplo n.º 1
0
def ctm2tg(wavdir, outdir):
    '''Convert CTM alignment files to Praat's TextGrid format.

    Args:
    wavdir -- path to the directory containing speech wav files
    outdir -- path to output the textgrid files in
    '''
    print "Converting ctm files to Praat Textgrids...",
    words = readCSV(os.path.join(outdir, 'wordlvl.ctm'))
    phones = readCSV(os.path.join(outdir, 'phonelvl.ctm'))
    word_dict = csv2tgdict(words)
    phone_dict = csv2tgdict(phones)
    wavscp = wavscp2dict(readCSV(os.path.join(outdir, 'wav.scp')))
    if not os.path.exists(os.path.join(outdir, 'tg')):
        os.makedirs(os.path.join(outdir, 'tg'))
    for utt in wavscp.keys():
        tg = tgio.Textgrid()
        wordTier = tgio.IntervalTier('words',
                                     word_dict[utt],
                                     0,
                                     pairedWav=wavscp[utt])
        phoneTier = tgio.IntervalTier('phones',
                                      phone_dict[utt],
                                      0,
                                      pairedWav=wavscp[utt])
        tg.addTier(wordTier)
        tg.addTier(phoneTier)
        tg.save(os.path.join(outdir, 'tg', utt + '.TextGrid'))
    print "stored in " + os.path.join(outdir, 'tg')
Ejemplo n.º 2
0
def outputStereoTextgrid(outputFN, duration, leftEntryList, rightEntryList,
                         leftChannelName, rightChannelName):

    # Give all entries a label indicating their order of occurrence
    leftEntryList.sort()
    newLeftEntryList = [(entry[0], entry[1], str(i))
                        for i, entry in enumerate(leftEntryList)]

    rightEntryList.sort()
    newRightEntryList = [(entry[0], entry[1], str(i))
                         for i, entry in enumerate(rightEntryList)]

    # This shouldn't be necessary
    newLeftEntryList = [
        entry for entry in newLeftEntryList
        if entry[1] <= duration and entry[0] < entry[1]
    ]
    newRightEntryList = [
        entry for entry in newRightEntryList
        if entry[1] <= duration and entry[0] < entry[1]
    ]

    # Output textgrid
    leftTier = tgio.IntervalTier(leftChannelName, newLeftEntryList, 0,
                                 duration)
    rightTier = tgio.IntervalTier(rightChannelName, newRightEntryList, 0,
                                  duration)

    outputTG = tgio.Textgrid()
    outputTG.addTier(leftTier)
    outputTG.addTier(rightTier)

    outputTG.save(outputFN)
Ejemplo n.º 3
0
def proscript_segments_to_textgrid(proscript,
                                   output_dir,
                                   file_prefix="",
                                   speaker_segmented=False,
                                   no_write=False):
    output_files = []
    assert (proscript.duration > 0.0, "Proscript duration is 0")

    fix_segment_overlaps(proscript)

    if speaker_segmented:
        proscript.populate_speaker_ids()
        assert (len(proscript.speaker_ids) > 0,
                "No speaker info set on proscript")
        for speaker_index, speaker_id in enumerate(proscript.speaker_ids):
            try:
                textgrid_file = proscript.speaker_textgrid_files[speaker_index]
            except:
                textgrid_file = os.path.join(
                    output_dir, "%s-%s.TextGrid" % (file_prefix, speaker_id))
            if not no_write:
                tg = tgio.Textgrid()
                segment_entry_list = [
                    (segment.start_time, segment.end_time, segment.transcript)
                    for segment in proscript.get_speaker_segments(speaker_id)
                ]
                segment_tier = tgio.IntervalTier('%s' % speaker_id,
                                                 segment_entry_list, 0,
                                                 proscript.duration)
                tg.addTier(segment_tier)
                saveTextGridWithTags(tg, textgrid_file)
            output_files.append(textgrid_file)
            proscript.speaker_textgrid_files.append(textgrid_file)
    else:
        if proscript.textgrid_file:
            textgrid_file = proscript.textgrid_file
        else:
            textgrid_file = os.path.join(output_dir,
                                         "%s.TextGrid" % (file_prefix))
            proscript.textgrid_file = textgrid_file

        if not no_write:
            tg = tgio.Textgrid()
            segment_entry_list = [(segment.start_time, segment.end_time,
                                   segment.transcript)
                                  for segment in proscript.segment_list]
            segment_tier = tgio.IntervalTier('segments', segment_entry_list, 0,
                                             proscript.duration)

            tg.addTier(segment_tier)
            saveTextGridWithTags(tg, textgrid_file)
        output_files.append(textgrid_file)
    return output_files
Ejemplo n.º 4
0
def Create_textgrid(phones, out_path, raw=False):
    ''' 
    Create a textgrid based on the alignment
        sample: an pd DataFrame of an alignment file
        out_path: the output path
    '''
    tg = tgio.Textgrid()
    syl_tier = tgio.IntervalTier('phones', [], 0,
                                 sample.iloc[-1, 1] + sample.iloc[-1, 2])
    entries = []

    if raw:
        for i in range(len(sample)):

            ph = (sample.iloc[i, 3], sample.iloc[i, 3] + sample.iloc[i, 4],
                  sample.iloc[i, -1])
            entries.append(ph)
    else:
        for i in range(len(sample)):
            ph = (sample.iloc[i, 1], sample.iloc[i, 1] + sample.iloc[i, 2],
                  sample.iloc[i, -1])
            entries.append(ph)

    syl_tier = syl_tier.new(entryList=entries)
    tg.addTier(syl_tier)
    out_path = os.path.join(out_path, sample.iloc[0, 0] + '.TextGrid')
    tg.save(out_path)
Ejemplo n.º 5
0
def main():
    '''Convert CTM alignment files to Praat's TextGrid format.
    Args:
    wavscp -- path to the directory containing speech wav files
    outdir -- path to output the textgrid files in
    '''
    if (len(sys.argv) < 3):
        print("Usage:%s <wavscp> <outdir> <cmt-1>...<cmt-n>\n" % (sys.argv[0]))
        exit(1)
    print("Converting ctm files to Praat Textgrids...\n")
    wavscp = sys.argv[1]
    outdir = sys.argv[2]
    #absOutDir = os.path.abspath(outdir)
    wavdict = wavscp2dict(readCSV(wavscp))
    if not os.path.exists(os.path.join(outdir)):
        os.makedirs(os.path.join(outdir))

    for utt in wavdict.keys():
        tg = tgio.Textgrid()
        for num in range(3, len(sys.argv)):
            ctmcsv = readCSV(sys.argv[num])
            tgdict = csv2tgdict(ctmcsv)
            if not os.path.isfile(wavdict[utt]):
                print("%s not exist!" % (wavdict[utt]))
                break
            else:
                fpath, fname = os.path.split(wavdict[utt])
                shutil.copyfile(wavdict[utt], os.path.join(outdir, fname))
                intervalTier = tgio.IntervalTier(sys.argv[num],
                                                 tgdict[utt],
                                                 0,
                                                 pairedWav=wavdict[utt])
                tg.addTier(intervalTier)
        tg.save(os.path.join(outdir, utt + '.TextGrid'))
    print("stored in %s" % (outdir))
Ejemplo n.º 6
0
def convert_json_to_textgrid(wav_file_path, transcript_file_path):

    textgrid_file_path = transcript_file_path.replace(".tlog", ".TextGrid")

    with open(transcript_file_path) as json_file:
        textgrid_entries_list = []
        json_data = json.load(json_file)
        for transcript in json_data:
            start_seconds = float(transcript["start"] / 1000)
            end_seconds = float(transcript["end"] / 1000)
            textgrid_entry = (start_seconds, end_seconds,
                              transcript["transcript"])
            textgrid_entries_list.append(textgrid_entry)

        utterance_tier = tgio.IntervalTier('utterance',
                                           textgrid_entries_list,
                                           0,
                                           pairedWav=wav_file_path)
        tg = tgio.Textgrid()
        tg.addTier(utterance_tier)
        tg.save(textgrid_file_path,
                useShortForm=False,
                outputFormat='textgrid')

        print("Textgrid of transcription saved to %s" % textgrid_file_path)
Ejemplo n.º 7
0
def alignment_to_textgrid(alignment, path):
    """
    Take a filename and its associated transcription and fill in all the gaps
    """
    with contextlib.closing(wave.open(path, 'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
    rearranged_words = []
    file_ons = 0
    try:
        content = json.loads(alignment.to_json())
        all_ons = content['words'][0]['start']
        for ix, word in enumerate(content['words']):
            word_ons = word['start']
            word_off = word['end']
            target = word['alignedWord']
            if (ix >= 1) and (ix < (len(content['words']))):
                prev_word = content['words'][ix - 1]
                prev_off = prev_word['end']
                if word['start'] > prev_off:
                    rearranged_words.append((prev_off, word_ons, ''))
            elif ix == 0:
                rearranged_words.append(
                    (file_ons, all_ons, ''))  # make empty first tier
            rearranged_words.append((word_ons, word_off, target))
        if word_off < duration:
            rearranged_words.append((word_off, duration, ''))
    except:
        rearranged_words = [(0, duration, '')]
    tg = tgio.Textgrid()
    tg.addTier(tgio.IntervalTier('word', rearranged_words))
    return tg
Ejemplo n.º 8
0
def write_intervals(wav_dir, align_dir, intervals):
    for audio_name, entries in intervals.items():
        audio_align_dir = align_dir / audio_name
        audio_align_dir.mkdir(parents=True, exist_ok=True)

        grid = tgio.Textgrid()
        tier = tgio.IntervalTier("sentences", entries)
        grid.addTier(tier)

        grid_path = audio_align_dir / f"{audio_name}.TextGrid"
        grid.save(str(grid_path))
        logger.debug(f"Wrote {grid_path}")

        # Split audio
        wav_path = wav_dir / f"{audio_name}.wav"
        audio_wav_dir = audio_align_dir / "wav"
        logger.debug(f"Splitting {wav_path}")
        splitAudioOnTier(str(wav_path), str(grid_path), "sentences", str(audio_wav_dir))

        # Write transcriptions
        text_align_dir = audio_align_dir / "text"
        text_align_dir.mkdir(parents=True, exist_ok=True)

        num_zeros = int(math.ceil(math.log10(len(entries))))
        n_format = "{0:0" + str(num_zeros) + "d}"
        for i, interval in enumerate(entries):
            n = n_format.format(i)
            text_path = text_align_dir / f"{audio_name}_{n}.txt"
            text_path.write_text(interval.label.strip())
            logger.debug(f"Wrote {text_path}")
def make_textgrid(df, out_name, orig_name=None, word2phone=None):
    if orig_name:
        tg = tgio.openTextgrid(orig_name)
    else:
        tg = tgio.Textgrid()
    phones_list = []
    syllables_list = []
    curr_syllable = []
    for tup in df[['start', 'end', 'phone']].itertuples():
        phones_list.append((tup.start, tup.end, tup.phone))
        if tup.phone in set(['spn', 'sil']):
            # pass
            syllables_list.append((tup.start, tup.end, tup.phone))
            curr_syllable = []
        elif len(tup.phone) > 2 and tup.phone[-2] == '_':  # final
            curr_syllable.append(tup.phone)
            syllables_list.append(
                (initial_start, tup.end, ' '.join(curr_syllable)))
            curr_syllable = []
        else:  # initial
            curr_syllable.append(tup.phone)
            initial_start = tup.start

    phone_tier = tgio.IntervalTier('phone', phones_list)
    syllable_tier = tgio.IntervalTier('syllable\_phones', syllables_list)
    if orig_name and word2phone:
        ipus, xmins, xmaxs = get_ipus(tg)
        word_list, unmatched_words, break_list = make_word_list(
            syllable_tier, ipus, word2phone, out_name, xmaxs)
        word_tier = tgio.IntervalTier('word', word_list)
        tg.addTier(word_tier)

    tg.addTier(phone_tier)
    tg.addTier(syllable_tier)

    if not tg.tierDict['breaks'].entryList:
        tg.removeTier('breaks')
        break_tier = tgio.PointTier('break', break_list)
        tg.addTier(break_tier)
    else:
        print(out_name, 'has break tier, did not write new one')
    os.makedirs(os.path.dirname(out_name), exist_ok=True)
    tg.save(out_name, useShortForm=False)
    print('wrote to {}, # matched: {}, # unmatched: {}'.format(
        out_name, len(word_list), len(unmatched_words)))
    return len(word_list), len(unmatched_words)
Ejemplo n.º 10
0
def spellCheckEntries(tg,
                      targetTierName,
                      newTierName,
                      checkFunction,
                      printEntries=False):
    '''
    Spell checks words in a textgrid
    
    Entries can contain one or more words, separated by whitespace.
    If a mispelling is found, it is noted in a special tier and optionally
    printed to the screen.
    
    checkFunction is user-defined.  There are robust spell check libraries
    for python like woosh or pyenchant.  I have already written a naive
    spell checker in the pysle.praattools library.
    
    checkFunction: should return True if a word is spelled correctly and
                   False otherwise
    '''
    punctuationList = [
        '_',
        ',',
        "'",
        '"',
        '!',
        '?',
        '.',
        ';',
    ]

    tg = tg.new()
    tier = tg.tierDict[targetTierName]

    mispelledEntryList = []
    for startT, stopT, label in tier.entryList:

        # Remove punctuation
        for char in punctuationList:
            label = label.replace(char, "")

        wordList = label.split()
        mispelledList = []
        for word in wordList:
            if not checkFunction(word):
                mispelledList.append(word)

        if len(mispelledList) > 0:
            mispelledTxt = u", ".join(mispelledList)
            mispelledEntryList.append((startT, stopT, mispelledTxt))

            if printEntries is True:
                print((startT, stopT, mispelledTxt))

    tier = tgio.IntervalTier(newTierName, mispelledEntryList, tg.minTimestamp,
                             tg.maxTimestamp)
    tg.addTier(tier)

    return tg
Ejemplo n.º 11
0
    def test_mintimestamp_behaviour(self):
        userEntryList = [[0.4, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']]

        # By default, the min and max timestamp values come from the entry list
        tier = tgio.IntervalTier('test', userEntryList)
        self.assertEqual(0.4, tier.minTimestamp)
        self.assertEqual(1.3, tier.maxTimestamp)

        # The user can specify the min and max timestamp
        tier = tgio.IntervalTier('test', userEntryList, 0.2, 2.0)
        self.assertEqual(0.2, tier.minTimestamp)
        self.assertEqual(2.0, tier.maxTimestamp)

        # When the user specified min/max timestamps are less/greater
        # than the min/max specified in the entry list, use the values
        # specified in the entry list
        tier = tgio.IntervalTier('test', userEntryList, 1.0, 1.1)
        self.assertEqual(0.4, tier.minTimestamp)
        self.assertEqual(1.3, tier.maxTimestamp)
Ejemplo n.º 12
0
def markTranscriptForAnnotations(tgFN,
                                 tierName,
                                 outputTGFN,
                                 proportion=1 / 5.0):
    '''
    Prep a noisy silence annotation for an annotation task
    
    Voice activity detectors are liable to segment speech into very small
    chunks (fragments of speech and silence).  The point of this code is
    to segment a recording into larger units that could be used in a
    speech transcription task.
    
    Assumes the speaker is speaking for most of the recording.
    '''
    tg = tgio.openTextgrid(tgFN)

    duration = tg.maxTimestamp
    numEntries = int(math.ceil(duration * proportion))
    entryList = tg.tierDict[tierName].entryList

    # Get all silent intervals
    entryList = [(stop - start, start, stop, label)
                 for start, stop, label in entryList if label == "silent"]

    # Remove silent intervals at the start or end of the file
    entryList = [
        entry for entry in entryList if entry[1] != 0 and entry[2] != duration
    ]

    # Put longest intervals first
    entryList.sort(reverse=True)

    # Get the mid point of the longest n intervals and convert them
    # into intervals to be transcribed
    entryList = entryList[:numEntries]
    pointList = [
        start + ((stop - start) / 2.0) for _, start, stop, _ in entryList
    ]
    pointList.sort()

    pointList = [
        0.0,
    ] + pointList + [
        duration,
    ]

    newEntryList = []
    for i in range(len(pointList) - 1):
        newEntryList.append((pointList[i], pointList[i + 1], "%d" % i))

    outputTG = tgio.Textgrid()
    tier = tgio.IntervalTier("toTranscribe", newEntryList, 0, duration)
    outputTG.addTier(tier)

    outputTG.save(outputTGFN)
Ejemplo n.º 13
0
def create_textgrid(wav_dictionary: Dict[str, str],
                    ctm_dictionary: dict,
                    output_directory: str) -> None:
    for index, utterance_id in enumerate(wav_dictionary.keys()):
        textgrid = tgio.Textgrid()
        tier = tgio.IntervalTier(name='default',
                                 entryList=ctm_dictionary[utterance_id],
                                 minT=0,
                                 pairedWav=str(Path(wav_dictionary[utterance_id])))
        textgrid.addTier(tier)
        textgrid.save(str(Path(output_directory, f"utterance-{index}.TextGrid")))
Ejemplo n.º 14
0
    def test_save_with_force_larger_value_as_maximum_time(self):
        userEntryList = [[0.4, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']]
        expectedEntryList = [[0.3, 0.4, ''], [0.4, 0.6, 'A'], [0.6, 0.8, ''],
                             [0.8, 1.0, 'E'], [1.0, 1.2, ''], [1.2, 1.3, 'I'],
                             [1.3, 3.0, '']]

        tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0)
        tg = tgio.Textgrid()
        tg.addTier(tier)
        actualEntryList = run_save(tg, maxTimestamp=3.0)

        self.assertEqual(expectedEntryList, actualEntryList)
Ejemplo n.º 15
0
    def test_save_with_minimum_time_stamp(self):
        userEntryList = [[0.4, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']]
        expectedEntryList = [[0.3, 0.4, ''], [0.4, 0.6, 'A'], [0.6, 0.8, ''],
                             [0.8, 1.0, 'E'], [1.0, 1.2, ''], [1.2, 1.3, 'I'],
                             [1.3, 2.0, '']]

        tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0)
        tg = tgio.Textgrid()
        tg.addTier(tier)
        actualEntryList = run_save(tg)

        self.assertEqual(expectedEntryList, actualEntryList)
Ejemplo n.º 16
0
def outputTextgrid(outputFN, duration, entryList, tierName):

    # Give all entries a label indicating their order of occurrence
    entryList.sort()
    newEntryList = [(entry[0], entry[1], str(i))
                    for i, entry in enumerate(entryList)]

    # Output textgrid
    tierSpeech = tgio.IntervalTier(tierName, newEntryList, 0, duration)

    tg = tgio.Textgrid()
    tg.addTier(tierSpeech)
    tg.save(outputFN)
Ejemplo n.º 17
0
    def test_save_with_force_too_large_minimum_time(self):
        # If you choose to force save to use a minTimestamp, all
        # of your entries must be higher than that minTimestamp
        userEntryList = [[0.4, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']]
        expectedEntryList = [[0, 0.4, ''], [0.4, 0.6, 'A'], [0.6, 0.8, ''],
                             [0.8, 1.0, 'E'], [1.0, 1.2, ''], [1.2, 1.3, 'I'],
                             [1.3, 2.0, '']]

        tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0)
        tg = tgio.Textgrid()
        tg.addTier(tier)

        self.assertRaises(AssertionError, run_save, tg, maxTimestamp=1.0)
Ejemplo n.º 18
0
def convert_to_textgrid_file(text, wav_file, tg_file):
    tg = tgio.Textgrid()
    tg_entries_list = []

    tg_entry = (0.0, get_wav_duration(wav_file), text)
    tg_entries_list.append(tg_entry)

    utteranceTier = tgio.IntervalTier('utterance',
                                      tg_entries_list,
                                      0,
                                      pairedWav=wav_file)
    tg.addTier(utteranceTier)
    tg.save(tg_file)
Ejemplo n.º 19
0
    def test_save_with_minimum_interval_length(self):
        # The first entry will be stretched to fill the unlabeled region in
        # front of it: [0.30, 0.35, ''] (The unlabeled region starts at 0.3
        # instead of 0 because the minTimestamp for this tg is 0.3)
        userEntryList = [[0.35, 0.6, 'A'], [0.8, 1.0, 'E'], [1.2, 1.3, 'I']]
        expectedEntryList = [[0.3, 0.6, 'A'], [0.6, 0.8, ''], [0.8, 1.0, 'E'],
                             [1.0, 1.2, ''], [1.2, 1.3, 'I'], [1.3, 2.0, '']]

        tier = tgio.IntervalTier('test', userEntryList, 0.3, 2.0)
        tg = tgio.Textgrid()
        tg.addTier(tier)
        actualEntryList = run_save(tg, minimumIntervalLength=0.06)

        self.assertEqual(expectedEntryList, actualEntryList)
Ejemplo n.º 20
0
def addEpochsToTextgrids(tgPath, epochPath, outputPath):
    
    utils.makeDir(outputPath)
    
    for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True):
        print name
        tg = tgio.openTextGrid(join(tgPath, name+".TextGrid"))

        entryList = utils.openCSV(epochPath, name+".txt")
        entryList = [(float(start), float(end), label) for label, start, end in entryList]
        
        tier = tgio.IntervalTier("epochs", entryList, minT=0, maxT=tg.maxTimestamp)
        
        tg.addTier(tier)
        tg.save(join(outputPath, name+".TextGrid"))
Ejemplo n.º 21
0
def _xsampaToIPATier(tg, tierName):

    tier = tg.tierDict[tierName]
    entryList = []
    for start, stop, label in tier.entryList:
        try:
            label = xsampa.xs2uni(label)
        except AssertionError:
            pass
        entryList.append((start, stop, label))

    tier = tgio.IntervalTier(tierName, entryList, 0, tg.maxTimestamp)
    tg.replaceTier(tierName, tier)

    return tg
def audiosplitOnTone(inputPath,
                     fn,
                     pitchPath,
                     tgPath,
                     subwavPath,
                     minPitch,
                     maxPitch,
                     toneFrequency,
                     minEventDuration,
                     praatEXE,
                     praatScriptPath,
                     forceRegen,
                     generateWavs=False):

    utils.makeDir(pitchPath)
    utils.makeDir(tgPath)
    utils.makeDir(subwavPath)

    name = os.path.splitext(fn)[0]
    piSamplingRate = 100  # Samples per second

    # Extract pitch and find patterns in the file
    outputFN = os.path.splitext(fn)[0] + ".txt"
    sampleStep = 1 / float(piSamplingRate)
    motherPIList = pitch_and_intensity.extractPI(join(inputPath, fn),
                                                 join(pitchPath, outputFN),
                                                 praatEXE,
                                                 minPitch,
                                                 maxPitch,
                                                 sampleStep=sampleStep,
                                                 forceRegenerate=forceRegen)
    # entry = (time, pitchVal, intVal)
    pitchList = [float(entry[1]) for entry in motherPIList]
    timeDict = split_on_tone.splitFileOnTone(pitchList, piSamplingRate,
                                             toneFrequency, minEventDuration)

    # Output result as textgrid
    duration = audio_scripts.getSoundFileDuration(join(inputPath, fn))
    tg = tgio.Textgrid()
    for key in ['beep', 'speech', 'silence']:
        entryList = timeDict[key]
        tier = tgio.IntervalTier(key, entryList, 0, duration)
        tg.addTier(tier)
    tg.save(join(tgPath, name + ".TextGrid"))

    # Output audio portions between tones
    if generateWavs:
        split_on_tone.extractSubwavs(timeDict, inputPath, fn, subwavPath)
Ejemplo n.º 23
0
def generateSingleIPUTextgrids(wavPath,
                               txtPath,
                               outputPath,
                               nameMod=None,
                               addPause=True):
    '''
    Generates a textgrid with a single IPU for each wave file
    
    This constitutes the first step of SPPAS, chunking a recording into
    utterances.  In the cases when there is only a single utterance, SPPAS
    sometimes makes errors (or is not configured properly by the user).  This
    script is strictly for those situations.
    
    If there are multiple audio files for each transcript, you can derive the
    transcript name using /nameMod/
    
    If there is a chance of even a slight segment of silence on the edges of
    the audio file, /addPause/ should be True.
    '''
    if nameMod is None:
        nameMod = lambda x: x

    if not os.path.exists(outputPath):
        os.mkdir(outputPath)

    wavList = utils.findFiles(wavPath, filterExt=".wav", stripExt=True)

    for wavName in wavList:

        transcriptName = nameMod(wavName)

        # Add initial and final small pauses to each transcript
        with io.open(join(txtPath, transcriptName + ".txt"), "r") as fd:
            txt = fd.read()

        if addPause is True:
            txt = "+ %s +" % txt.lower()

        wavFN = join(wavPath, wavName + ".wav")
        dur = praatio_scripts.audioio.WavQueryObj(wavFN).getDuration()
        tg = tgio.Textgrid()
        tier = tgio.IntervalTier("ipu", [
            (0, dur, txt),
        ], 0, dur)

        tg.addTier(tier)
        tg.save(join(outputPath, wavName + ".TextGrid"))
Ejemplo n.º 24
0
def wavFileToGrid(wavFile,outputFile):
    snd = parselmouth.Sound(wavFile)
    pitch = snd.to_pitch()
    print("Get entryList for TextGrid From {file} by pitch".format(file=wavFile))
    entryList=pitchToEntryList(pitch) 

    print("Save TextGrid to {output} ".format(output=outputFile))
    tierName="Pitch"
    if os.path.isfile(outputFile):
        tg = tgio.openTextgrid(outputFile)
        if tierName in tg.tierDict:
            tierName=tierName+datetime.now().strftime("%m%d%Y%H%M%S")
    else:
        tg = tgio.Textgrid()    
    wordTier = tgio.IntervalTier(tierName, entryList, 0, pairedWav=wavFile)
    tg.addTier(wordTier)
    tg.save(outputFile)
Ejemplo n.º 25
0
def write_normalized_transcript(transcript: str, audio: NormalizedAudio,
                                output_path: str):
    assert ' ' not in output_path, \
     'Please remove spaces from output path for {}'.format(output_path)
    assert '\n' not in transcript and '\t' not in transcript, \
     'Please remove the newlines and tabs in transcript for [{}]'.format(
      transcript)

    if datapipes.__verbose__:
        print('writing normalized transcript to {}'.format(output_path))

    duration = audio.duration
    textgrid = tgio.Textgrid()
    utterance = tgio.IntervalTier('utt', [], 0, duration)
    utterance.insertEntry(
        tgio.Interval(start=0, end=duration, label=transcript))
    textgrid.addTier(utterance)

    textgrid.save(output_path, useShortForm=False)
Ejemplo n.º 26
0
def lab2praat(file_name, praat_align_file,state_number=5):
    """
    convert state alignment lab file to praat alignment file
    file_name should be state alignment file end with *.lab
    praat_align_file should be praat Textgrid file for visualization
    and state number are state number of one senone
    """
    fid = open(file_name)
    utt_labels = fid.readlines()
    fid.close()
    current_index = 0
    label_number = len(utt_labels)
    duration_phone_list=[]
    for line in utt_labels:
        line = line.strip()
        if len(line) < 1:
            continue
        temp_list = re.split('\s+', line)
        start_time = int(temp_list[0])
        end_time = int(temp_list[1])
        frame_number = int((end_time - start_time) / 50000)  # all frame number of this phone
        full_label = temp_list[2]
        full_label_length = len(full_label) - 3  # remove state information [k]
        state_index = full_label[full_label_length + 1]
        state_index = int(state_index) - 1
        full_label = full_label[0:full_label_length]
        match = re.match(r"^.*?\-(.*?)\+.*?$",full_label,re.M|re.I)
        phone_identity = match.group(1)
        if state_index == 1:
            phone_duration = frame_number
            for i in range(state_number - 1):
                line = utt_labels[current_index + i + 1].strip()
                temp_list = re.split('\s+', line)
                phone_duration += (int(temp_list[1]) - int(temp_list[0])) / 50000
            start_time = start_time/10000000.0
            end_time = start_time+phone_duration*0.005
            duration_phone_list.append((str(start_time),str(end_time),phone_identity))
        current_index+=1
    setTG = tgio.Textgrid()
    # pdb.set_trace()
    phoneTier = tgio.IntervalTier('phone', duration_phone_list)
    setTG.addTier(phoneTier)
    setTG.save(praat_align_file)
Ejemplo n.º 27
0
def isolateMotherSpeech(path, filterGrid, outputPath):
    '''
    Removes mother speech when the child is also speaking
    '''
    
    utils.makeDir(outputPath)
    
    for fn in utils.findFiles(path, filterExt=".TextGrid"):
        
        tg = tgio.openTextGrid(join(path, fn))
        motherTier = tg.tierDict["Mother"]
        
        newEntryList = []
        for start, stop, label in motherTier.entryList:
            croppedTG = tg.crop(False, False, start, stop)
            entryList = croppedTG.tierDict[filterGrid].entryList
            
            resultList = [(start, stop, label),]
            
            for subStart, subStop, subLabel in entryList:
                
                i = 0
                while i < len(resultList):
                    tmpStart = resultList[i][0]
                    tmpEnd = resultList[i][1]
                    tmpResultList = subtractOverlap(tmpStart,
                                                    tmpEnd,
                                                    label,
                                                    subStart,
                                                    subStop)
                     # Replace if there has been a change
                    if tmpResultList != [[tmpStart, tmpEnd, label],]:
                        resultList = resultList[:i] + tmpResultList
                        i += len(tmpResultList) - 1
                    i += 1

            newEntryList.extend(resultList)

        newMotherTier = tgio.IntervalTier("Mother", newEntryList)
        tg.replaceTier("Mother", newMotherTier.entryList)
        tg.save(join(outputPath, fn))
Ejemplo n.º 28
0
def merge_adjacent(path, fn, outputPath):
    '''
    Goes through every tier of a textgrid; combines adjacent filled intervals
    '''
    
    assert(path != outputPath)
    
    if not os.path.exists(outputPath):
        os.mkdir(outputPath)
    
    outputTG = tgio.Textgrid()
    
    tg = tgio.openTextgrid(join(path, fn))
    for tierName in tg.tierNameList:
        tier = tg.tierDict[tierName]
        
        newEntryList = []
        currentEntry = list(tier.entryList[0])
        for nextEntry in tier.entryList[1:]:

            # Is a boundary shared?
            if currentEntry[1] == nextEntry[0]:
                currentEntry[1] = nextEntry[1]  # Old end = new end
                currentEntry[2] += " - " + nextEntry[2]
            # If not
            else:
                newEntryList.append(currentEntry)
                currentEntry = list(nextEntry)
                
        newEntryList.append(currentEntry)
        
        replacementTier = tgio.IntervalTier(tierName,
                                            newEntryList,
                                            tier.minTimestamp,
                                            tier.maxTimestamp)
        outputTG.addTier(replacementTier)
    
    outputTG.save(join(outputPath, fn))
Ejemplo n.º 29
0
def mlf2praat(mlf, praat_align_file):
    "transform cuprosody mlf file to Textgrid file"
    """
        mlf format:
        0 4400000 sil -2160.365723 sil
        4400000 5200000 I_g -504.555634 gaa
        5200000 6600000 F_aa -960.479187
        6600000 7300000 I_j -543.072876 jau
        7300000 8500000 F_au -856.253418
        8500000 8900000 I_d -320.236786 daai
        8900000 10400000 F_aai -789.435547
        10400000 11300000 I_s -523.623901 si
    """
    fid = open(mlf, 'r')
    lines = fid.readlines()
    duration_phone_list = []
    for line in lines:
        tmp_split = re.split('\s+', line.strip())
        if len(tmp_split) == 5 and tmp_split[2] == tmp_split[4]:
            start_time = int(tmp_split[0])
            end_time = int(tmp_split[1])
            phone_identity = tmp_split[4]
            start_time = start_time / 10000000.0
            end_time = end_time / 10000000.0
            duration_phone_list.append((str(start_time), str(end_time), phone_identity))
        elif len(tmp_split) == 4:
            end_time = int(tmp_split[1])
            start_time = start_time / 10000000.0
            end_time = end_time / 10000000.0
            duration_phone_list.append((str(start_time), str(end_time), phone_identity))
        else:
            start_time = int(tmp_split[0])
            phone_identity = tmp_split[4]
    setTG = tgio.Textgrid()
    pdb.set_trace()
    phoneTier = tgio.IntervalTier('syllable', duration_phone_list)
    setTG.addTier(phoneTier)
    setTG.save(praat_align_file)
Ejemplo n.º 30
0
def lab2tg(input_filename, output_filename, wav_duration, tiername=None):

    lab = []

    with open(input_filename, 'r') as fid:
        for line in fid.readlines():
            start, end, label = line.rstrip().split()
            start = float(start)/10000000.
            end   = float(end)/10000000.
            lab.append((start, end, label))

    if len(lab) <= 0:
        print('Unable to convert empty lab for {0}'.format(input_filename))
        return

    if not tiername:
        tiername = 'tier_1'

    tg = tgio.Textgrid()
    tier = tgio.IntervalTier(tiername, lab, 0, wav_duration)

    tg.addTier(tier)
    tg.save(output_filename)