Ejemplo n.º 1
0
def ctm_to_textgrid(phone_ctm, out_directory, utt2dur, frameshift=0.01):
    textgrid_write_errors = {}
    frameshift = Decimal(str(frameshift))
    if not os.path.exists(out_directory):
        os.makedirs(out_directory)

    utt2dur_mapping = generate_utt2dur(utt2dur)

    for i, (k, v) in enumerate(sorted(phone_ctm.items())):
        maxtime = Decimal(str(utt2dur_mapping[k]))
        try:
            tg = TextGrid(maxTime=maxtime)
            phonetier = IntervalTier(name='phones', maxTime=maxtime)
            for interval in v:
                if maxtime - interval[1] < frameshift:
                    interval[1] = maxtime
                #remove B E I and stress (0,1) information from phoneme
                interval[2] = re.sub("\d+", "", interval[2].split('_')[0])
                phonetier.add(*interval)
            tg.append(phonetier)
            outpath = os.path.join(out_directory, k + '.TextGrid')
            tg.write(outpath)
        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            textgrid_write_errors[k] = '\n'.join(
                traceback.format_exception(exc_type, exc_value, exc_traceback))
    if textgrid_write_errors:
        error_log = os.path.join(out_directory, 'output_errors.txt')
        with io_open(error_log, 'w', encoding='utf-8') as f:
            f.write(
                u'The following exceptions were encountered during the ouput of the alignments to TextGrids:\n\n'
            )
            for k, v in textgrid_write_errors.items():
                f.write(u'{}:\n'.format(k))
                f.write(u'{}\n\n'.format(v))
def reorg_noncollapsed(f):
    padding = 0.1
    print(f)
    tg_path = os.path.join(noncollapsed_dir, f)
    tg = TextGrid()
    tg.read(tg_path)
    new_tg = TextGrid(maxTime=tg.maxTime)
    new_tg_path = tg_path.replace(noncollapsed_dir, data_dir)
    for tier in tg.tiers:
        new_tier = IntervalTier(name=tier.name, maxTime=tg.maxTime)
        for i in tier:
            new_mark = sub_pattern.sub(' ', i.mark).strip()
            if not new_mark:
                continue
            new_begin = i.minTime - padding
            if new_begin < 0:
                new_begin = 0
            new_end = i.maxTime + padding
            if new_end > tg.maxTime:
                new_end = tg.maxTime
            try:
                new_tier.add(new_begin, new_end, new_mark)
            except ValueError:
                new_tier[-1].maxTime = new_end
                new_tier[-1].mark += ' ' + new_mark
        print(len(new_tier))
        new_tg.append(new_tier)
    new_tg.write(new_tg_path)
    def export_segments(self, output_directory):
        from decimal import Decimal
        from textgrid import TextGrid, IntervalTier

        file_dict = {}
        for utt, segment in self.corpus.vad_segments.items():
            filename, utt_begin, utt_end = segment
            utt_begin = Decimal(utt_begin)
            utt_end = Decimal(utt_end)
            if filename not in file_dict:
                file_dict[filename] = {}
            speaker = 'segments'
            text = 'speech'
            if speaker not in file_dict[filename]:
                file_dict[filename][speaker] = []
            file_dict[filename][speaker].append([utt_begin, utt_end, text])
        for filename, speaker_dict in file_dict.items():
            try:
                speaker_directory = os.path.join(
                    output_directory,
                    self.corpus.file_directory_mapping[filename])
            except KeyError:
                speaker_directory = output_directory
            os.makedirs(speaker_directory, exist_ok=True)
            max_time = self.corpus.get_wav_duration(filename)
            tg = TextGrid(maxTime=max_time)
            for speaker in sorted(speaker_dict.keys()):
                words = speaker_dict[speaker]
                tier = IntervalTier(name=speaker, maxTime=max_time)
                for w in words:
                    if w[1] > max_time:
                        w[1] = max_time
                    tier.add(*w)
                tg.append(tier)
            tg.write(os.path.join(speaker_directory, filename + '.TextGrid'))
Ejemplo n.º 4
0
 def loadOrGenerate(self):
     fname = self.app.Data.checkFileLevel('.TextGrid', shoulderror=False)
     if fname:
         self.TextGrid = self.fromFile(fname)
     else:
         minTime = 0.
         if not hasattr(self.app.Audio, 'duration'):
             self.app.Audio.reset()
         maxTime = self.app.Audio.duration
         self.TextGrid = TextGridFile(maxTime=maxTime)
         sentenceTier = IntervalTier("text")
         sentenceTier.add(minTime, maxTime, "text")
         self.TextGrid.tiers.append(sentenceTier)
         fname = self.app.Data.unrelativize(
             self.app.Data.getCurrentFilename() + '.TextGrid')
         self.app.Data.setFileLevel('.TextGrid', fname)
     names = self.TextGrid.getNames()
     for i, n in enumerate(names):
         if n in ALIGNMENT_TIER_NAMES:
             if len(self.TextGrid[i]) == 0:
                 self.TextGrid.pop(i)
                 break
             else:
                 return
     self.genFramesTier()
Ejemplo n.º 5
0
 def loadOrGenerate(self):
     fname = self.app.Data.checkFileLevel('.TextGrid', shoulderror=False)
     if fname:
         self.TextGrid = self.fromFile(fname)
     else:
         minTime = 0.
         if not hasattr(self.app.Audio, 'duration'):
             self.app.Audio.reset()
         try:
             maxTime = self.app.Audio.duration
         except:
             warn(
                 'Audio has no duration attribute after calling reset(), defaulting to 1 second'
             )
             maxTime = 1.
         self.TextGrid = TextGridFile(maxTime=maxTime)
         keys = self.app.Data.getFileLevel('all')
         if not ('.ult' in keys and '.txt' in keys):
             sentenceTier = IntervalTier("text")
             sentenceTier.add(minTime, maxTime, "text")
             self.TextGrid.append(sentenceTier)
         fname = self.app.Data.unrelativize(
             self.app.Data.getCurrentFilename() + '.TextGrid')
         self.app.Data.setFileLevel('.TextGrid', fname)
     names = self.TextGrid.getNames()
     for i, n in enumerate(names):
         if n in ALIGNMENT_TIER_NAMES:
             if len(self.TextGrid[i]) == 0:
                 self.TextGrid.pop(i)
                 break
             else:
                 self.frameTierName = n
                 return
     self.genFramesTier()
def clean_tier(old_tier):
    new_tier = IntervalTier(name=old_tier.name)

    for interval in iter(old_tier):
        new_tier.addInterval(clean_interval(interval))

    return new_tier
Ejemplo n.º 7
0
 def setUp(self):
     self.utt = utterance.Utterance()
     self.float_mat = FloatMatrix()
     self.int_mat = Int32Matrix()
     self.bool_mat = BinaryMatrix()
     self.seg = Segment()
     self.tier = IntervalTier('test', 0, 2)
     self.tier.add(0, 1, "a")
     self.tier.add(1, 2, "b")
Ejemplo n.º 8
0
def createTextGrid(data, tierName = "words"):
	tier = IntervalTier(tierName)
	txtgrid = TextGrid()
	prevTime = 0
	for (name, time, dur, words) in data:
		tier.add(prevTime, prevTime+dur, makeSentence(words))
		prevTime += dur
	txtgrid.append(tier)
	return txtgrid
    def export_classification(self, output_directory):
        if self.cluster:
            self.cluster_utterances()
        else:
            self.get_classification_stats()
        from decimal import Decimal
        from textgrid import TextGrid, IntervalTier
        spk2utt_path = os.path.join(self.classify_directory, 'spk2utt')
        utt2spk_path = os.path.join(self.classify_directory, 'utt2spk')
        if self.corpus.segments:
            utt2spk = load_scp(utt2spk_path)
            file_dict = {}
            for utt, segment in self.corpus.segments.items():

                filename, utt_begin, utt_end = segment.split(' ')
                utt_begin = Decimal(utt_begin)
                utt_end = Decimal(utt_end)
                if filename not in file_dict:
                    file_dict[filename] = {}
                speaker = utt2spk[utt]
                text = self.corpus.text_mapping[utt]
                if speaker not in file_dict[filename]:
                    file_dict[filename][speaker] = []
                file_dict[filename][speaker].append([utt_begin, utt_end, text])
            for filename, speaker_dict in file_dict.items():
                try:
                    speaker_directory = os.path.join(
                        output_directory,
                        self.corpus.file_directory_mapping[filename])
                except KeyError:
                    speaker_directory = output_directory
                max_time = self.corpus.get_wav_duration(filename)
                tg = TextGrid(maxTime=max_time)
                for speaker in sorted(speaker_dict.keys()):
                    words = speaker_dict[speaker]
                    tier = IntervalTier(name=speaker, maxTime=max_time)
                    for w in words:
                        if w[1] > max_time:
                            w[1] = max_time
                        tier.add(*w)
                    tg.append(tier)
                tg.write(
                    os.path.join(speaker_directory, filename + '.TextGrid'))

        else:
            spk2utt = load_scp(spk2utt_path)
            for speaker, utts in spk2utt.items():
                speaker_dir = os.path.join(output_directory, speaker)
                os.makedirs(speaker_dir, exist_ok=True)
                with open(os.path.join(speaker_dir, 'utterances.txt'),
                          'w',
                          encoding='utf8') as f:
                    for u in utts:
                        f.write('{}\n'.format(u))
def convert_ctm_to_textgrid(ctm, textgrid):
    words = []
    phonemes = []
    with open(ctm, encoding='utf-8') as f:
        for l in f:
            tok = l.strip().split()
            text = tok[4]
            beg = float(tok[2])
            dur = float(tok[3])
            if tok[0][0] == '@':
                if besi.match(text):
                    text = text[:-2]
                phonemes.append((text, beg, dur))
            else:
                words.append((text, beg, dur))
    tw = IntervalTier(name='words')
    tp = IntervalTier(name='phonemes')
    for seg in words:
        try:
            tw.add(round(seg[1], 2), round(seg[1] + seg[2], 2), seg[0])
        except ValueError:
            print("Error in word seg: " + seg[0])
    for seg in phonemes:
        try:
            tp.add(round(seg[1], 2), round(seg[1] + seg[2], 2), seg[0])
        except ValueError:
            print("Error in phoneme seg: " + seg[0])
    tg = TextGrid()
    tg.append(tw)
    tg.append(tp)
    tg.write(textgrid)
Ejemplo n.º 11
0
def saveSyllableInTextGrid(sylIntervals, gridobj, f):

    if len(sylIntervals) == 0:  #No interval to save
        return

    minTime = sylIntervals[0].minTime  #getting the min time for syllable tier
    maxTime = sylIntervals[len(sylIntervals) -
                           1].maxTime  #getting the max time for syllable tier

    syllableTier = IntervalTier('Syllable', minTime, maxTime)  #creating Tier
    for interval in sylIntervals:
        syllableTier.addInterval(interval)

    gridobj.append(syllableTier)  #appending Tier in text grid
    gridobj.write(f)  # writing the new Text Grid
Ejemplo n.º 12
0
def test_tier_duplication():
    error_log.flush()
    tg = TextGrid()
    interval = IntervalTier("A", minTime=0, maxTime=10)
    tg.tiers = [interval]
    tg_doc = SingleAnnotatorTextGrid.from_textgrid(tg, [], None)
    tg_doc.check()
Ejemplo n.º 13
0
def generator_textgrid(maxtime, lines, output):
    # Download Praat: https://www.fon.hum.uva.nl/praat/
    interval = maxtime / (len(lines) + 1)
    margin = 0.0001

    tg = TextGrid(maxTime=maxtime)
    linetier = IntervalTier(name="line", maxTime=maxtime)

    i = 0
    for l in lines:
        s, e, w = l.split()
        linetier.add(minTime=float(s) + margin, maxTime=float(e), mark=w)

    tg.append(linetier)
    print("successfully generator {}".format(output))
    tg.write(output)
Ejemplo n.º 14
0
def createNew(textgrid, tier_name, VERBOSE=False):
	tiers = textgrid.getList(tier_name)
	tier = tiers[0]
	new_tier = IntervalTier(tier_name+'_clean') 
	new_txtgrid = TextGrid()
	if VERBOSE == True:
		print ("Old tier: %s" % tier)
	for interval in tier:
		if isPause(interval.mark) == True:
			new_tier.add(interval.minTime, interval.maxTime, '')
		else:
			new_tier.add(interval.minTime, interval.maxTime, fixString(interval.mark))
	new_txtgrid.append(new_tier)
	if VERBOSE == True:
		print ("New tier: %s" % new_tier)
	return new_txtgrid
Ejemplo n.º 15
0
def read_tg_from_str(tg_str, round_digits=DEFAULT_TEXTGRID_PRECISION):
    """
    Read the tiers contained in the Praat-formatted string tg_str into a
    TextGrid object.
    Times are rounded to the specified precision.

    Adapted from TextGrid.read()
    """
    source = StringIO(tg_str)
    tg = TextGrid()

    file_type, short = parse_header(source)
    if file_type != "TextGrid":
        raise ValueError("The file could not be parsed as a TextGrid as it is "
                         "lacking a proper header.")
    tg.minTime = parse_line(source.readline(), short, round_digits)
    tg.maxTime = parse_line(source.readline(), short, round_digits)
    source.readline()  # More header junk
    if short:
        m = int(source.readline().strip())  # Will be tg.n
    else:
        m = int(source.readline().strip().split()[2])  # Will be tg.n
    if not short:
        source.readline()
    for i in range(m):  # Loop over grids
        if not short:
            source.readline()
        if parse_line(source.readline(), short,
                      round_digits) == "IntervalTier":
            inam = parse_line(source.readline(), short, round_digits)
            imin = parse_line(source.readline(), short, round_digits)
            imax = parse_line(source.readline(), short, round_digits)
            itie = IntervalTier(inam, imin, imax)
            itie.strict = tg.strict
            n = int(parse_line(source.readline(), short, round_digits))
            for j in range(n):
                if not short:
                    source.readline().rstrip().split()  # Header junk
                jmin = parse_line(source.readline(), short, round_digits)
                jmax = parse_line(source.readline(), short, round_digits)
                jmrk = get_mark(source, short)
                if jmin < jmax:  # Non-null
                    itie.addInterval(Interval(jmin, jmax, jmrk))
            tg.append(itie)
        else:  # PointTier
            inam = parse_line(source.readline(), short, round_digits)
            imin = parse_line(source.readline(), short, round_digits)
            imax = parse_line(source.readline(), short, round_digits)
            itie = PointTier(inam)
            n = int(parse_line(source.readline(), short, round_digits))
            for j in range(n):
                source.readline().rstrip()  # Header junk
                jtim = parse_line(source.readline(), short, round_digits)
                jmrk = get_mark(source, short)
                itie.addPoint(Point(jtim, jmrk))
            tg.append(itie)
    return tg
Ejemplo n.º 16
0
def ctm_to_textgrid(word_ctm,
                    phone_ctm,
                    out_directory,
                    corpus,
                    dictionary,
                    frameshift=0.01):
    textgrid_write_errors = {}
    frameshift = Decimal(str(frameshift))
    if not os.path.exists(out_directory):
        os.makedirs(out_directory, exist_ok=True)

    silences = {dictionary.optional_silence, dictionary.nonoptional_silence}
    for i, (filename, speaker_dict) in enumerate(sorted(word_ctm.items())):
        maxtime = corpus.get_wav_duration(filename)
        try:
            speaker_directory = os.path.join(
                out_directory, corpus.file_directory_mapping[filename])
            tg = TextGrid(maxTime=maxtime)
            for speaker in corpus.speaker_ordering[filename]:
                words = speaker_dict[speaker]
                word_tier_name = '{} - words'.format(speaker)
                phone_tier_name = '{} - phones'.format(speaker)
                word_tier = IntervalTier(name=word_tier_name, maxTime=maxtime)
                phone_tier = IntervalTier(name=phone_tier_name,
                                          maxTime=maxtime)
                for w in words:
                    word_tier.add(*w)
                for p in phone_ctm[filename][speaker]:
                    if len(phone_tier) > 0 and phone_tier[
                            -1].mark in silences and p[2] in silences:
                        phone_tier[-1].maxTime = p[1]
                    else:
                        if len(phone_tier) > 0 and p[2] in silences and p[
                                0] < phone_tier[-1].maxTime:
                            p = phone_tier[-1].maxTime, p[1], p[2]
                        elif len(phone_tier) > 0 and p[2] not in silences and p[0] < phone_tier[-1].maxTime and \
                                        phone_tier[-1].mark in silences:
                            phone_tier[-1].maxTime = p[0]
                        phone_tier.add(*p)
                tg.append(word_tier)
                tg.append(phone_tier)
            tg.write(os.path.join(speaker_directory, filename + '.TextGrid'))
        except Exception as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            textgrid_write_errors[filename] = '\n'.join(
                traceback.format_exception(exc_type, exc_value, exc_traceback))
    if textgrid_write_errors:
        error_log = os.path.join(out_directory, 'output_errors.txt')
        with open(error_log, 'w', encoding='utf8') as f:
            f.write(
                'The following exceptions were encountered during the ouput of the alignments to TextGrids:\n\n'
            )
            for k, v in textgrid_write_errors.items():
                f.write('{}:\n'.format(k))
                f.write('{}\n\n'.format(v))
Ejemplo n.º 17
0
    def gen_template_tg(self, duration: float, filename: str):
        new_tg = TextGrid(name=filename, minTime=0.0, maxTime=duration)
        for tier_name in self.tiers_specs.keys():
            new_tier = IntervalTier(name=tier_name,
                                    minTime=0.0,
                                    maxTime=duration)
            new_tg.append(new_tier)

        return new_tg
Ejemplo n.º 18
0
    def read(self, f):
        """
        Read the tiers contained in the Praat-formated TextGrid file
        indicated by string f
        """
        source = readFile(f)
        self.minTime = round(float(source.readline().split()[2]), 5)
        self.maxTime = round(float(source.readline().split()[2]), 5)
        source.readline()  # more header junk
        m = int(source.readline().rstrip().split()[2])  # will be self.n
        source.readline()
        for i in range(m):  # loop over grids
            source.readline()
            if source.readline().rstrip().split()[2] == '"IntervalTier"':
                inam = source.readline().rstrip().split(' = ')[1].strip('"')
                inam = self.name_filter(inam)
                imin = round(float(source.readline().rstrip().split()[2]), 5)
                imax = round(float(source.readline().rstrip().split()[2]), 5)
                itie = IntervalTier(inam)
                for j in range(int(source.readline().rstrip().split()[3])):
                    source.readline().rstrip().split()  # header junk
                    jmin = round(float(source.readline().rstrip().split()[2]),
                                 5)
                    jmax = round(float(source.readline().rstrip().split()[2]),
                                 5)
                    jmrk = _getMark(source)
                    if jmin < jmax:  # non-null
                        itie.addInterval(Interval(jmin, jmax, jmrk))
                self.append(itie)
            else:  # pointTier
                inam = source.readline().rstrip().split(' = ')[1].strip('"')
                inam = self.name_filter(inam)
                imin = round(float(source.readline().rstrip().split()[2]), 5)
                imax = round(float(source.readline().rstrip().split()[2]), 5)
                itie = PointTier(inam)
                n = int(source.readline().rstrip().split()[3])
                for j in range(n):
                    source.readline().rstrip()  # header junk
                    jtim = round(float(source.readline().rstrip().split()[2]),
                                 5)
                    jmrk = source.readline().rstrip().split()[2][1:-1]

        source.close()
Ejemplo n.º 19
0
def convert_ctm_to_textgrid(ctms, textgrid):
    for ctm in ctms:
        tiername = ctm.stem
        ret = []
        with open(ctm, encoding='utf-8') as f:
            for l in f:
                tok = l.strip().split()
                word = tok[4]
                beg = float(tok[2])
                dur = float(tok[3])
                ret.append((word, beg, dur))
        t = IntervalTier(name=tiername)
        for seg in ret:
            try:
                t.add(round(seg[1], 2), round(seg[1] + seg[2], 2), seg[0])
            except ValueError:
                print("Error in seg: " + seg[0])
        tg = TextGrid()
        tg.append(t)
        tg.write(textgrid)
Ejemplo n.º 20
0
def create_grid(wav_file: Path, text: str, tier_name: str,
                n_digits: int) -> TextGrid:
    assert wav_file.is_file()
    assert len(text) > 0
    duration_s = get_wav_duration_s(wav_file)
    duration_s = round(duration_s, n_digits)
    result = TextGrid(None, 0, duration_s)
    tier = IntervalTier(tier_name, 0, duration_s)
    symbols = list(text)
    tier.intervals.extend(get_intervals(symbols, duration_s, n_digits))
    result.append(tier)
    return result
Ejemplo n.º 21
0
def read_segment(val: Segment) -> IntervalTier:
    """Read a Segment message and save it to an IntervalTier object.

    Args:
        val: A Segment message as defined in data_utterance.pb.

    Returns:
        interval: The Segment message saved in an IntervalTier object.
    """
    symbols = val.symbol
    start_time = mat_to_numpy(val.start_time)
    end_time = mat_to_numpy(val.end_time)
    num_items = val.num_item

    if not (len(symbols) == len(start_time) == len(end_time) == num_items):
        raise ValueError("Interval item number is not consistent!")

    interval = IntervalTier(minTime=start_time[0], maxTime=end_time[-1])
    for sym, min_time, max_time in zip(symbols, start_time, end_time):
        interval.add(min_time, max_time, sym)
    return interval
Ejemplo n.º 22
0
def parseFile(path, fn):
	filename= fn.split(".")[0] #just name of file
	with open(path, "r") as f1:
		lines = f1.readlines()
	SAM = getSAM(lines)
	allSegs = getMAU(lines, SAM, filename)
	if allSegs is None:
		return
	segs = []
	for seg in allSegs:
		
		#print("%f %f %s %s"%(seg.start, seg.end, seg.segment, seg.index))
		tup = getSegInfo(seg)
		segs.append(tup)
	words = getWords(lines, allSegs, filename)


	maxtime = getMaxTime(allSegs)
	if maxtime == -1:
		return
	tg = TextGrid(maxTime = maxtime)
	wordtier = IntervalTier(name = 'words', maxTime = maxtime)
	phonetier = IntervalTier(name = 'phones', maxTime = maxtime)
	for interval in words:
		wordtier.add(*interval)
	for interval in segs:
		phonetier.add(*interval)
	tg.append(wordtier)
	tg.append(phonetier)
	outpath = "/Users/elias/Desktop/TextGrids/%s.TextGrid"%filename
	tg.write(outpath)
Ejemplo n.º 23
0
    def read(self, f):
        """
        Read the tiers contained in the Praat-formated TextGrid file
        indicated by string f
        """
        source = readFile(f)
        self.minTime = round(float(source.readline().split()[2]), 5)
        self.maxTime = round(float(source.readline().split()[2]), 5)
        source.readline() # more header junk
        m = int(source.readline().rstrip().split()[2]) # will be self.n
        source.readline()
        for i in range(m): # loop over grids
            source.readline()
            if source.readline().rstrip().split()[2] == '"IntervalTier"':
                inam = source.readline().rstrip().split(' = ')[1].strip('"')
                imin = round(float(source.readline().rstrip().split()[2]), 5)
                imax = round(float(source.readline().rstrip().split()[2]), 5)
                itie = IntervalTier(inam)
                for j in range(int(source.readline().rstrip().split()[3])):
                    source.readline().rstrip().split() # header junk
                    jmin = round(float(source.readline().rstrip().split()[2]), 5)
                    jmax = round(float(source.readline().rstrip().split()[2]), 5)
                    jmrk = self._getMark(source)
                    if jmin < jmax: # non-null
                        itie.addInterval(Interval(jmin, jmax, jmrk))
                self.append(itie)
            else: # pointTier
                inam = source.readline().rstrip().split(' = ')[1].strip('"')
                imin = round(float(source.readline().rstrip().split()[2]), 5)
                imax = round(float(source.readline().rstrip().split()[2]), 5)
                itie = PointTier(inam)
                n = int(source.readline().rstrip().split()[3])
                for j in range(n):
                    source.readline().rstrip() # header junk
                    jtim = round(float(source.readline().rstrip().split()[2]),
                                                                           5)
                    jmrk = source.readline().rstrip().split()[2][1:-1]

        source.close()
Ejemplo n.º 24
0
def time_to_frame_interval_tier(time_tier: IntervalTier,
                                shift) -> IntervalTier:
    """Convert an IntervalTier in time to frame.

    Args:
        time_tier: IntervalTier represented in seconds.
        shift: Window shift in ms.

    Returns:
        frame_tier: IntervalTier represented in frames.
    """
    max_frame = time_to_frame(time_tier.maxTime, shift)
    frame_tier = IntervalTier(time_tier.name, 0, max_frame)

    # Deal with (occasionally) very short segments -- less than a frame shift
    # If we have consecutive very small segments then the function will raise a
    # ValueError
    start_shift = 0
    for each_interval in time_tier.intervals:
        curr_min_frame = time_to_frame(each_interval.minTime, shift)
        if start_shift > 0:
            logging.warning("Last segment is too short, have to cut the %d "
                            "frame(s) from the beginning of the current "
                            "segment.", start_shift)
            curr_min_frame += start_shift
            start_shift = 0
        curr_max_frame = time_to_frame(each_interval.maxTime, shift)
        if curr_min_frame >= curr_max_frame:
            curr_max_frame = curr_min_frame + 1
            start_shift = curr_max_frame - curr_min_frame
            logging.warning("The current segment is too short, extend it for "
                            "%d frame(s).", start_shift)
        if curr_max_frame > frame_tier.maxTime:
            raise ValueError("Extreme short segments in the tier, please fix "
                             "these.")
        frame_tier.add(curr_min_frame, curr_max_frame, each_interval.mark)
    return frame_tier
Ejemplo n.º 25
0
    def genFramesTier(self):
        debug('generating frames tier for %s' %
              self.app.Data.getCurrentFilename())
        self.frameTierName = 'frames'
        times = self.app.Dicom.getFrameTimes()
        self.app.Data.setFileLevel("NumberOfFrames", len(times))
        try:
            maxTime = max(self.app.Audio.duration, times[-1])
        except AttributeError:
            maxTime = times[-1]
        tier = PointTier('frames', maxTime=maxTime)
        for f, t in enumerate(times):
            tier.addPoint(Point(t, str(f)))
        if not self.TextGrid.maxTime or maxTime > self.TextGrid.maxTime:
            self.TextGrid.maxTime = maxTime
        self.TextGrid.append(tier)

        keys = self.app.Data.getFileLevel('all')
        if '.ult' in keys and '.txt' in keys:
            fname = self.app.Data.unrelativize(
                self.app.Data.getFileLevel('.txt'))
            f = open(fname, 'rb')
            s = util.decode_bytes(f.read())
            f.close()
            if s:
                line = s.splitlines()[0]
                sentenceTier = IntervalTier("sentence")
                sentenceTier.add(0, self.app.Audio.duration, line)
                self.TextGrid.append(sentenceTier)
                self.TextGrid.tiers = [self.TextGrid.tiers[-1]
                                       ] + self.TextGrid.tiers[:-1]

        path = self.app.Data.unrelativize(
            self.app.Data.getFileLevel('.TextGrid'))
        self.TextGrid.write(path)
        self.TextGrid = TextGridFile.fromFile(path)
Ejemplo n.º 26
0
 def test_time_to_frame_interval_tier_short_seg(self):
     tier = IntervalTier('test', 0, 0.01)
     tier.add(0, 0.003, "a")
     tier.add(0.003, 0.01, "b")
     frame_tier = utterance.time_to_frame_interval_tier(tier, 5)
     self.assertEqual(frame_tier.minTime, 0)
     self.assertEqual(frame_tier.maxTime, 2)
     self.assertEqual(frame_tier.intervals[0].minTime, 0)
     self.assertEqual(frame_tier.intervals[0].maxTime, 1)
     self.assertEqual(frame_tier.intervals[1].minTime, 1)
     self.assertEqual(frame_tier.intervals[1].maxTime, 2)
    def export_transcriptions(self, output_directory, source=None):
        transcripts = self._load_transcripts(source)
        print(self.corpus.file_directory_mapping)
        if not self.corpus.segments:
            for utt, t in transcripts.items():
                relative = self.corpus.file_directory_mapping[utt]
                if relative:
                    speaker_directory = os.path.join(output_directory,
                                                     relative)
                else:
                    speaker_directory = output_directory
                os.makedirs(speaker_directory, exist_ok=True)
                outpath = os.path.join(speaker_directory, utt + '.lab')
                with open(outpath, 'w', encoding='utf8') as f:
                    f.write(t)

        else:
            for filename in self.corpus.speaker_ordering.keys():
                maxtime = self.corpus.get_wav_duration(filename)
                try:
                    speaker_directory = os.path.join(
                        output_directory,
                        self.corpus.file_directory_mapping[filename])
                except KeyError:
                    speaker_directory = output_directory
                tiers = {}
                for speaker in self.corpus.speaker_ordering[filename]:
                    tiers[speaker] = IntervalTier(name=speaker,
                                                  maxTime=maxtime)

                tg = TextGrid(maxTime=maxtime)
                for utt_name, text in transcripts.items():
                    utt_filename, begin, end = self.corpus.segments[
                        utt_name].split(' ')
                    if utt_filename != filename:
                        continue
                    speaker = self.corpus.utt_speak_mapping[utt_name]
                    begin = float(begin)
                    end = float(end)
                    tiers[speaker].add(begin, end, text)
                for t in tiers.values():
                    tg.append(t)
                tg.write(
                    os.path.join(speaker_directory, filename + '.TextGrid'))
Ejemplo n.º 28
0
def merge_and_mark_tiers(tg_file="", output_file="", tiers=()):
    """
    Creates a new TextGrid file with an added IntervalTier.
    """
    tg = textgrid.TextGrid()
    tg.read(f=tg_file)

    for t1_name, t2_name in combinations(tiers, 2):
        validate_overlapping_tiers(tg.getFirst(t1_name), tg.getFirst(t2_name))

    merged_tier = IntervalTier(
        name="Merged",
        minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)),
        maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers)))

    marked_tier = IntervalTier(
        name="Marked",
        minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)),
        maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers)))

    for tier_name, interval in filter(
            lambda x: x[1].mark,
            chain.from_iterable(
                map(lambda x: zip(repeat(x.name), iter(x)),
                    map(lambda t: tg.getFirst(t), tiers)))):
        marked_tier.addInterval(
            Interval(minTime=interval.minTime,
                     maxTime=interval.maxTime,
                     mark=tier_name))

        merged_tier.addInterval(
            Interval(minTime=interval.minTime,
                     maxTime=interval.maxTime,
                     mark=interval.mark))

    tg.tiers.insert(1, marked_tier)
    tg.tiers.insert(2, merged_tier)

    with open(output_file, "w") as f:
        tg.write(f)
def reorg_original(f):
    print(f)
    tg_path = os.path.join(noncollapsed_dir, f)
    tg = TextGrid()
    tg.read(tg_path)
    new_tg = TextGrid(maxTime=tg.maxTime)
    new_tg_path = tg_path.replace('_original.TextGrid', '.TextGrid')
    sentence_tier = tg.getFirst('Sentences')
    speaker_tier = tg.getFirst('Speakers')
    speaker_tiers = {}
    for i in speaker_tier:
        if i.mark == '':
            continue
        if ',' in i.mark:
            continue
        if i.mark == 'Tân.':
            continue
        speaker_tiers[i.mark] = IntervalTier(i.mark, maxTime=tg.maxTime)
    for i in sentence_tier:
        if not i.mark.strip():
            continue
        duration = i.maxTime - i.minTime
        mid_point = i.minTime + duration / 2
        speaker_int = speaker_tier.intervalContaining(mid_point)
        speaker = speaker_int.mark
        if speaker == 'Tân.':
            speaker = 'Tan'
        if speaker == '':
            continue
        if len(speaker_tiers[speaker]
               ) > 0 and speaker_tiers[speaker][-1].maxTime == i.minTime:
            speaker_tiers[speaker][-1].maxTime = i.maxTime
            speaker_tiers[speaker][
                -1].mark = speaker_tiers[speaker][-1].mark + ' ' + i.mark
        else:
            speaker_tiers[speaker].addInterval(i)
    for k, v in sorted(speaker_tiers.items()):
        new_tg.append(v)
    print(speaker_tiers.keys())
    new_tg.write(new_tg_path)
Ejemplo n.º 30
0
    def save_text_file(self, file_name):
        text_file_path = None
        if self.segments:
            first_utt = self.file_utt_mapping[file_name][0]
            text_file_path = self.utt_text_file_mapping[first_utt]
            tg = TextGrid()
            tg.read(text_file_path)
            tiers = {}

            for utt in self.file_utt_mapping[file_name]:
                seg = self.segments[utt]
                fn, begin, end = seg.split()
                begin = round(float(begin), 4)
                end = round(float(end), 4)
                text = self.text_mapping[utt]
                speaker = self.utt_speak_mapping[utt]
                if speaker not in tiers:
                    tiers[speaker] = IntervalTier(name=speaker,
                                                  maxTime=tg.maxTime)
                tiers[speaker].add(begin, end, text)
            tg.tiers = [x for x in tiers.values()]
            tg.write(text_file_path)
Ejemplo n.º 31
0
def ctm_to_textgrid(word_ctm, phone_ctm, out_directory, corpus, dictionary, frameshift=0.01):
    textgrid_write_errors = {}
    frameshift = Decimal(str(frameshift))
    if not os.path.exists(out_directory):
        os.makedirs(out_directory, exist_ok=True)
    if not corpus.segments:
        for i, (k, v) in enumerate(sorted(word_ctm.items())):
            maxtime = Decimal(str(corpus.get_wav_duration(k)))
            speaker = list(v.keys())[0]
            v = list(v.values())[0]
            try:
                tg = TextGrid(maxTime=maxtime)
                wordtier = IntervalTier(name='words', maxTime=maxtime)
                phonetier = IntervalTier(name='phones', maxTime=maxtime)
                for interval in v:
                    if maxtime - interval[1] < frameshift:  # Fix rounding issues
                        interval[1] = maxtime
                    wordtier.add(*interval)
                for interval in phone_ctm[k][speaker]:
                    if maxtime - interval[1] < frameshift:
                        interval[1] = maxtime
                    phonetier.add(*interval)
                tg.append(wordtier)
                tg.append(phonetier)
                relative = corpus.file_directory_mapping[k]
                if relative:
                    speaker_directory = os.path.join(out_directory, relative)
                else:
                    speaker_directory = out_directory
                os.makedirs(speaker_directory, exist_ok=True)
                outpath = os.path.join(speaker_directory, k + '.TextGrid')
                tg.write(outpath)
            except Exception as e:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                textgrid_write_errors[k] = '\n'.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
    else:
        silences = {dictionary.optional_silence, dictionary.nonoptional_silence}
        for i, (filename, speaker_dict) in enumerate(sorted(word_ctm.items())):
            maxtime = corpus.get_wav_duration(filename)
            try:
                speaker_directory = os.path.join(out_directory, corpus.file_directory_mapping[filename])
                tg = TextGrid(maxTime=maxtime)
                for speaker in corpus.speaker_ordering[filename]:
                    words = speaker_dict[speaker]
                    word_tier_name = '{} - words'.format(speaker)
                    phone_tier_name = '{} - phones'.format(speaker)
                    word_tier = IntervalTier(name=word_tier_name, maxTime=maxtime)
                    phone_tier = IntervalTier(name=phone_tier_name, maxTime=maxtime)
                    for w in words:
                        word_tier.add(*w)
                    for p in phone_ctm[filename][speaker]:
                        if len(phone_tier) > 0 and phone_tier[-1].mark in silences and p[2] in silences:
                            phone_tier[-1].maxTime = p[1]
                        else:
                            if len(phone_tier) > 0 and p[2] in silences and p[0] < phone_tier[-1].maxTime:
                                p = phone_tier[-1].maxTime, p[1], p[2]
                            elif len(phone_tier) > 0 and p[2] not in silences and p[0] < phone_tier[-1].maxTime and \
                                            phone_tier[-1].mark in silences:
                                phone_tier[-1].maxTime = p[0]
                            phone_tier.add(*p)
                    tg.append(word_tier)
                    tg.append(phone_tier)
                tg.write(os.path.join(speaker_directory, filename + '.TextGrid'))
            except Exception as e:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                textgrid_write_errors[filename] = '\n'.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
    if textgrid_write_errors:
        error_log = os.path.join(out_directory, 'output_errors.txt')
        with open(error_log, 'w', encoding='utf8') as f:
            f.write('The following exceptions were encountered during the ouput of the alignments to TextGrids:\n\n')
            for k,v in textgrid_write_errors.items():
                f.write('{}:\n'.format(k))
                f.write('{}\n\n'.format(v))
		maxtime = tg.maxTime
		for i, ti in enumerate(tg.tiers):
			if i == 0:
				for x in ti:
					x.maxTime += cur_dur
					x.minTime += cur_dur
					wordintervals.append(x)

			elif i == 1:
				for x in ti:
					x.maxTime += cur_dur
					x.minTime += cur_dur
					phoneintervals.append(x)
				cur_dur += maxtime

	words = IntervalTier(name='words')
	for i in wordintervals:
		words.addInterval(i)
	phones = IntervalTier(name='phones')
	for i in phoneintervals:
		phones.addInterval(i)
	tg1 = TextGrid(maxTime = cur_dur)
	tg1.append(words)
	tg1.append(phones)
	tg1.write(chapteroutpath1, null = '')

	speaker_tier = IntervalTier(name=speaker)
	for i in range(len(groupedwavfiles)):
		if i == 1:
			speaker_tier.add(0.0, wavfiletimes[0], groupedlabtext[0])
		else:
Ejemplo n.º 33
0
def parse_transcript(path):
    file_name = os.path.splitext(os.path.basename(path))[0]
    tg_path = path.replace(os.path.join(orig_dir, 'txt'),
                           output_dir).replace('.txt', '.TextGrid')
    tg = TextGrid()
    tiers = {}
    continuation = False
    prev_speaker = None
    with open(path, 'r', encoding='utf8') as f:
        for i, line in enumerate(f):
            line = line.strip()
            if i == 0:
                continue
            if not line:
                continue
            if line in ['<I>', '</I>']:
                continue
            if line.startswith('&'):
                continue
            m = re.match(
                r'^<\$(\w)>.*<start=?([0-9:.;l ]+) end6?=([0-9>:.;l ]*)>?[?]?\s+<#>(.+)$',
                line)
            if m is None:
                text = parse_text(line)
                try:
                    tiers[speaker][-1].mark += ' ' + text
                except UnboundLocalError:
                    continue
                    # error
            else:
                speaker_code, start, end, text = m.groups()
                if speaker_code == 'Z':
                    continue
                try:
                    speaker = file_code_to_speaker[(file_name, speaker_code)]
                except KeyError:
                    speaker = 'unknown_{}_{}'.format(file_name, speaker_code)
                if speaker not in tiers:
                    tiers[speaker] = IntervalTier(speaker)
                start = parse_time(start)
                end = parse_time(end)
                text = parse_text(text)
                if text == "Again he's quoting":
                    continue
                if not text:
                    continue
                if end is None:
                    continue
                if start is None:
                    if prev_speaker != speaker:
                        continue
                    continuation = True
                    tiers[speaker][-1].mark += ' ' + text
                if '<' in text.replace('<beep_sound>',
                                       '').replace('<unk>', ''):
                    print(file_name, start, end, text)
                    print(line)
                if continuation or (len(tiers[speaker]) > 0 and
                                    start - tiers[speaker][-1].maxTime < 0.1):
                    tiers[speaker][-1].mark += ' ' + text
                    if not continuation:
                        tiers[speaker][-1].maxTime = end
                    continuation = False
                else:
                    tiers[speaker].add(start, end, text)

                # print(speaker)
                # print(start, end)
                # print(text)
                prev_speaker = speaker
    print(tiers.keys(), [len(x) for x in tiers.values()])
    for v in tiers.values():
        tg.append(v)
    tg.write(tg_path)
Ejemplo n.º 34
0
    return norm_value * (max - min) + min

seg_ind = 0
for f in sorted(os.listdir(data_dir)):
    if not f.endswith('.TextGrid'):
        continue
    print(f)
    wav_file = f.replace('.TextGrid', '.adc.wav')
    textgrid_path = os.path.join(data_dir, f)
    wav_path = os.path.join(data_dir, wav_file)
    tg = TextGrid()
    tg.read(textgrid_path)

    word_tier = tg.getFirst('words')

    segmentation_tier = IntervalTier('segments', 0, word_tier.maxTime)
    durations = []
    for interval in word_tier:
        if interval.mark == '':
            continue
        durations.append(interval.maxTime - interval.minTime)
    max_duration = max(durations)
    min_duration = min(durations)

    min_thresh = 0.01
    max_thresh = 0.05
    segs = []

    for interval in word_tier:
        if interval.mark == '':
            continue
Ejemplo n.º 35
0
def ctm_to_textgrid(word_ctm,
                    phone_ctm,
                    out_directory,
                    corpus,
                    dictionary,
                    frameshift=0.01):
    textgrid_write_errors = {}
    frameshift = Decimal(str(frameshift))
    if not os.path.exists(out_directory):
        os.makedirs(out_directory, exist_ok=True)
    if not corpus.segments:
        for i, (k, v) in enumerate(sorted(word_ctm.items())):
            maxtime = Decimal(str(corpus.get_wav_duration(k)))
            speaker = list(v.keys())[0]
            v = list(v.values())[0]
            try:
                tg = TextGrid(maxTime=maxtime)
                wordtier = IntervalTier(name='words', maxTime=maxtime)
                phonetier = IntervalTier(name='phones', maxTime=maxtime)
                for interval in v:
                    if maxtime - interval[
                            1] < frameshift:  # Fix rounding issues
                        interval[1] = maxtime
                    wordtier.add(*interval)
                for interval in phone_ctm[k][speaker]:
                    if maxtime - interval[1] < frameshift:
                        interval[1] = maxtime
                    phonetier.add(*interval)
                tg.append(wordtier)
                tg.append(phonetier)
                if corpus.speaker_directories:
                    speaker_directory = os.path.join(
                        out_directory, corpus.utt_speak_mapping[k])
                else:
                    speaker_directory = out_directory
                os.makedirs(speaker_directory, exist_ok=True)
                outpath = os.path.join(speaker_directory, k + '.TextGrid')
                tg.write(outpath)
            except Exception as e:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                textgrid_write_errors[k] = '\n'.join(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback))
    else:
        silences = {
            dictionary.optional_silence, dictionary.nonoptional_silence
        }
        for i, (filename, speaker_dict) in enumerate(sorted(word_ctm.items())):
            maxtime = corpus.get_wav_duration(filename)
            try:
                tg = TextGrid(maxTime=maxtime)
                for speaker, words in speaker_dict.items():
                    word_tier_name = '{} - words'.format(speaker)
                    phone_tier_name = '{} - phones'.format(speaker)
                    word_tier = IntervalTier(name=word_tier_name,
                                             maxTime=maxtime)
                    phone_tier = IntervalTier(name=phone_tier_name,
                                              maxTime=maxtime)
                    for w in words:
                        word_tier.add(*w)
                    for p in phone_ctm[filename][speaker]:
                        if len(phone_tier) > 0 and phone_tier[
                                -1].mark in silences and p[2] in silences:
                            phone_tier[-1].maxTime = p[1]
                        else:
                            if len(phone_tier) > 0 and p[2] in silences and p[
                                    0] < phone_tier[-1].maxTime:
                                p = phone_tier[-1].maxTime, p[1], p[2]
                            elif len(phone_tier) > 0 and p[2] not in silences and p[0] < phone_tier[-1].maxTime and \
                                            phone_tier[-1].mark in silences:
                                phone_tier[-1].maxTime = p[0]
                            phone_tier.add(*p)
                    tg.append(word_tier)
                    tg.append(phone_tier)
                tg.write(os.path.join(out_directory, filename + '.TextGrid'))
            except Exception as e:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                textgrid_write_errors[filename] = '\n'.join(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback))
    if textgrid_write_errors:
        error_log = os.path.join(out_directory, 'output_errors.txt')
        with open(error_log, 'w', encoding='utf8') as f:
            f.write(
                'The following exceptions were encountered during the ouput of the alignments to TextGrids:\n\n'
            )
            for k, v in textgrid_write_errors.items():
                f.write('{}:\n'.format(k))
                f.write('{}\n\n'.format(v))
Ejemplo n.º 36
0
        label = ss[4]
        result.append([begin, end, label])
    return result


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('words_ctm')
    parser.add_argument('phones_ctm')
    parser.add_argument('output_textgrid')

    args = parser.parse_args()

    words = read_ctm(args.words_ctm)
    phones = read_ctm(args.phones_ctm)

    max_time = phones[-1][1]
    tg = TextGrid(maxTime=max_time)
    word_tier = IntervalTier(name="words", maxTime=max_time)
    phone_tier = IntervalTier(name="phones", maxTime=max_time)

    for w in words:
        word_tier.add(*w)
    for p in phones:
        phone_tier.add(*p)

    tg.append(word_tier)
    tg.append(phone_tier)

    tg.write(args.output_textgrid)
def ctm_to_textgrid(word_ctm, phone_ctm, out_directory, corpus):

    if not os.path.exists(out_directory):
        os.makedirs(out_directory, exist_ok=True)
    if not corpus.segments:
        for i,(k,v) in enumerate(word_ctm.items()):
            maxtime = corpus.get_wav_duration(k)
            try:
                tg = TextGrid(maxTime = maxtime)
                wordtier = IntervalTier(name = 'words', maxTime = maxtime)
                phonetier = IntervalTier(name = 'phones', maxTime = maxtime)
                for interval in v:
                    wordtier.add(*interval)
                for interval in phone_ctm[k]:
                    phonetier.add(*interval)
                tg.append(wordtier)
                tg.append(phonetier)
                if corpus.speaker_directories:
                    speaker_directory = os.path.join(out_directory, corpus.utt_speak_mapping[k])
                else:
                    speaker_directory = out_directory
                os.makedirs(speaker_directory, exist_ok=True)
                outpath = os.path.join(speaker_directory, k + '.TextGrid')
                tg.write(outpath)
            except ValueError as e:
                print('Could not write textgrid for {}'.format(k))
                print(e)
    else:
        tgs = {}
        for i,(k,v) in enumerate(word_ctm.items()):
            rec = corpus.segments[k]
            rec, begin, end = rec.split(' ')
            maxtime = corpus.get_wav_duration(k)
            if rec not in tgs:
                tgs[rec] = TextGrid(maxTime = maxtime)
            tg = tgs[rec]
            begin = float(begin)
            speaker = corpus.utt_speak_mapping[k]
            word_tier_name = '{} - words'.format(speaker)
            phone_tier_name = '{} - phones'.format(speaker)
            wordtier = tg.getFirst(word_tier_name)
            if wordtier is None:
                wordtier = IntervalTier(name = word_tier_name, maxTime = maxtime)
                tg.append(wordtier)
            phonetier = tg.getFirst(phone_tier_name)
            if phonetier is None:
                phonetier = IntervalTier(name = phone_tier_name, maxTime = maxtime)
                tg.append(phonetier)
            for interval in v:
                interval = interval[0] + begin, interval[1] + begin, interval[2]
                wordtier.add(*interval)
            for interval in phone_ctm[k]:
                interval = interval[0] + begin, interval[1] + begin, interval[2]
                phonetier.add(*interval)
        for k,v in tgs.items():
            outpath = os.path.join(out_directory, k + '.TextGrid')
            try:
                v.write(outpath)
            except ValueError as e:
                print('Could not write textgrid for {}'.format(k))
                print(e)