def clean_tier(old_tier):
    new_tier = IntervalTier(name=old_tier.name)

    for interval in iter(old_tier):
        new_tier.addInterval(clean_interval(interval))

    return new_tier
def read_tg_from_str(tg_str, round_digits=DEFAULT_TEXTGRID_PRECISION):
    """
    Read the tiers contained in the Praat-formatted string tg_str into a
    TextGrid object.
    Times are rounded to the specified precision.

    Adapted from TextGrid.read()
    """
    source = StringIO(tg_str)
    tg = TextGrid()

    file_type, short = parse_header(source)
    if file_type != "TextGrid":
        raise ValueError("The file could not be parsed as a TextGrid as it is "
                         "lacking a proper header.")
    tg.minTime = parse_line(source.readline(), short, round_digits)
    tg.maxTime = parse_line(source.readline(), short, round_digits)
    source.readline()  # More header junk
    if short:
        m = int(source.readline().strip())  # Will be tg.n
    else:
        m = int(source.readline().strip().split()[2])  # Will be tg.n
    if not short:
        source.readline()
    for i in range(m):  # Loop over grids
        if not short:
            source.readline()
        if parse_line(source.readline(), short,
                      round_digits) == "IntervalTier":
            inam = parse_line(source.readline(), short, round_digits)
            imin = parse_line(source.readline(), short, round_digits)
            imax = parse_line(source.readline(), short, round_digits)
            itie = IntervalTier(inam, imin, imax)
            itie.strict = tg.strict
            n = int(parse_line(source.readline(), short, round_digits))
            for j in range(n):
                if not short:
                    source.readline().rstrip().split()  # Header junk
                jmin = parse_line(source.readline(), short, round_digits)
                jmax = parse_line(source.readline(), short, round_digits)
                jmrk = get_mark(source, short)
                if jmin < jmax:  # Non-null
                    itie.addInterval(Interval(jmin, jmax, jmrk))
            tg.append(itie)
        else:  # PointTier
            inam = parse_line(source.readline(), short, round_digits)
            imin = parse_line(source.readline(), short, round_digits)
            imax = parse_line(source.readline(), short, round_digits)
            itie = PointTier(inam)
            n = int(parse_line(source.readline(), short, round_digits))
            for j in range(n):
                source.readline().rstrip()  # Header junk
                jtim = parse_line(source.readline(), short, round_digits)
                jmrk = get_mark(source, short)
                itie.addPoint(Point(jtim, jmrk))
            tg.append(itie)
    return tg
Beispiel #3
0
def saveSyllableInTextGrid(sylIntervals, gridobj, f):

    if len(sylIntervals) == 0:  #No interval to save
        return

    minTime = sylIntervals[0].minTime  #getting the min time for syllable tier
    maxTime = sylIntervals[len(sylIntervals) -
                           1].maxTime  #getting the max time for syllable tier

    syllableTier = IntervalTier('Syllable', minTime, maxTime)  #creating Tier
    for interval in sylIntervals:
        syllableTier.addInterval(interval)

    gridobj.append(syllableTier)  #appending Tier in text grid
    gridobj.write(f)  # writing the new Text Grid
Beispiel #4
0
    def read(self, f):
        """
        Read the tiers contained in the Praat-formated TextGrid file
        indicated by string f
        """
        source = readFile(f)
        self.minTime = round(float(source.readline().split()[2]), 5)
        self.maxTime = round(float(source.readline().split()[2]), 5)
        source.readline()  # more header junk
        m = int(source.readline().rstrip().split()[2])  # will be self.n
        source.readline()
        for i in range(m):  # loop over grids
            source.readline()
            if source.readline().rstrip().split()[2] == '"IntervalTier"':
                inam = source.readline().rstrip().split(' = ')[1].strip('"')
                inam = self.name_filter(inam)
                imin = round(float(source.readline().rstrip().split()[2]), 5)
                imax = round(float(source.readline().rstrip().split()[2]), 5)
                itie = IntervalTier(inam)
                for j in range(int(source.readline().rstrip().split()[3])):
                    source.readline().rstrip().split()  # header junk
                    jmin = round(float(source.readline().rstrip().split()[2]),
                                 5)
                    jmax = round(float(source.readline().rstrip().split()[2]),
                                 5)
                    jmrk = _getMark(source)
                    if jmin < jmax:  # non-null
                        itie.addInterval(Interval(jmin, jmax, jmrk))
                self.append(itie)
            else:  # pointTier
                inam = source.readline().rstrip().split(' = ')[1].strip('"')
                inam = self.name_filter(inam)
                imin = round(float(source.readline().rstrip().split()[2]), 5)
                imax = round(float(source.readline().rstrip().split()[2]), 5)
                itie = PointTier(inam)
                n = int(source.readline().rstrip().split()[3])
                for j in range(n):
                    source.readline().rstrip()  # header junk
                    jtim = round(float(source.readline().rstrip().split()[2]),
                                 5)
                    jmrk = source.readline().rstrip().split()[2][1:-1]

        source.close()
Beispiel #5
0
def merge_and_mark_tiers(tg_file="", output_file="", tiers=()):
    """
    Creates a new TextGrid file with an added IntervalTier.
    """
    tg = textgrid.TextGrid()
    tg.read(f=tg_file)

    for t1_name, t2_name in combinations(tiers, 2):
        validate_overlapping_tiers(tg.getFirst(t1_name), tg.getFirst(t2_name))

    merged_tier = IntervalTier(
        name="Merged",
        minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)),
        maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers)))

    marked_tier = IntervalTier(
        name="Marked",
        minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)),
        maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers)))

    for tier_name, interval in filter(
            lambda x: x[1].mark,
            chain.from_iterable(
                map(lambda x: zip(repeat(x.name), iter(x)),
                    map(lambda t: tg.getFirst(t), tiers)))):
        marked_tier.addInterval(
            Interval(minTime=interval.minTime,
                     maxTime=interval.maxTime,
                     mark=tier_name))

        merged_tier.addInterval(
            Interval(minTime=interval.minTime,
                     maxTime=interval.maxTime,
                     mark=interval.mark))

    tg.tiers.insert(1, marked_tier)
    tg.tiers.insert(2, merged_tier)

    with open(output_file, "w") as f:
        tg.write(f)
Beispiel #6
0
    def read(self, f):
        """
        Read the tiers contained in the Praat-formated TextGrid file
        indicated by string f
        """
        source = readFile(f)
        self.minTime = round(float(source.readline().split()[2]), 5)
        self.maxTime = round(float(source.readline().split()[2]), 5)
        source.readline() # more header junk
        m = int(source.readline().rstrip().split()[2]) # will be self.n
        source.readline()
        for i in range(m): # loop over grids
            source.readline()
            if source.readline().rstrip().split()[2] == '"IntervalTier"':
                inam = source.readline().rstrip().split(' = ')[1].strip('"')
                imin = round(float(source.readline().rstrip().split()[2]), 5)
                imax = round(float(source.readline().rstrip().split()[2]), 5)
                itie = IntervalTier(inam)
                for j in range(int(source.readline().rstrip().split()[3])):
                    source.readline().rstrip().split() # header junk
                    jmin = round(float(source.readline().rstrip().split()[2]), 5)
                    jmax = round(float(source.readline().rstrip().split()[2]), 5)
                    jmrk = self._getMark(source)
                    if jmin < jmax: # non-null
                        itie.addInterval(Interval(jmin, jmax, jmrk))
                self.append(itie)
            else: # pointTier
                inam = source.readline().rstrip().split(' = ')[1].strip('"')
                imin = round(float(source.readline().rstrip().split()[2]), 5)
                imax = round(float(source.readline().rstrip().split()[2]), 5)
                itie = PointTier(inam)
                n = int(source.readline().rstrip().split()[3])
                for j in range(n):
                    source.readline().rstrip() # header junk
                    jtim = round(float(source.readline().rstrip().split()[2]),
                                                                           5)
                    jmrk = source.readline().rstrip().split()[2][1:-1]

        source.close()
			if i == 0:
				for x in ti:
					x.maxTime += cur_dur
					x.minTime += cur_dur
					wordintervals.append(x)

			elif i == 1:
				for x in ti:
					x.maxTime += cur_dur
					x.minTime += cur_dur
					phoneintervals.append(x)
				cur_dur += maxtime

	words = IntervalTier(name='words')
	for i in wordintervals:
		words.addInterval(i)
	phones = IntervalTier(name='phones')
	for i in phoneintervals:
		phones.addInterval(i)
	tg1 = TextGrid(maxTime = cur_dur)
	tg1.append(words)
	tg1.append(phones)
	tg1.write(chapteroutpath1, null = '')

	speaker_tier = IntervalTier(name=speaker)
	for i in range(len(groupedwavfiles)):
		if i == 1:
			speaker_tier.add(0.0, wavfiletimes[0], groupedlabtext[0])
		else:
			speaker_tier.add(wavfiletimes[i-2], wavfiletimes[i-1], groupedlabtext[i-1])
	tg2 = TextGrid(maxTime = duration)
            if i == 0:
                for x in ti:
                    x.maxTime += cur_dur
                    x.minTime += cur_dur
                    wordintervals.append(x)

            elif i == 1:
                for x in ti:
                    x.maxTime += cur_dur
                    x.minTime += cur_dur
                    phoneintervals.append(x)
                cur_dur += maxtime

    words = IntervalTier(name='words')
    for i in wordintervals:
        words.addInterval(i)
    phones = IntervalTier(name='phones')
    for i in phoneintervals:
        phones.addInterval(i)
    tg1 = TextGrid(maxTime=cur_dur)
    tg1.append(words)
    tg1.append(phones)
    tg1.write(chapteroutpath1, null='')

    speaker_tier = IntervalTier(name=speaker)
    for i in range(len(groupedwavfiles)):
        if i == 1:
            speaker_tier.add(0.0, wavfiletimes[0], groupedlabtext[0])
        else:
            speaker_tier.add(wavfiletimes[i - 2], wavfiletimes[i - 1],
                             groupedlabtext[i - 1])
 for s in sorted(speaker_word_tiers.keys()):
     w_tier = IntervalTier('{} - word'.format(s), 0, duration)
     p_tier = IntervalTier('{} - phone'.format(s), 0, duration)
     for w in sorted(speaker_word_tiers[s]):
         if len(w_tier) and w_tier[-1].mark in [
                 'sp', '{OOV}'
         ] and w_tier[-1].maxTime > w.minTime:
             w_tier[-1].maxTime = w.minTime
         if len(w_tier) and w.mark in [
                 'sp', '{OOV}'
         ] and w_tier[-1].maxTime > w.minTime:
             w.minTime = w_tier[-1].maxTime
         #print(w)
         if w.maxTime > duration:
             w.maxTime = duration
         w_tier.addInterval(w)
     for p in sorted(speaker_phone_tiers[s]):
         if len(p_tier) and p_tier[
                 -1].mark == 'sil' and p_tier[-1].maxTime > p.minTime:
             p_tier[-1].maxTime = p.minTime
         if len(p_tier) and p.mark == 'sil' and p_tier[
                 -1].maxTime > p.minTime:
             p.minTime = p_tier[-1].maxTime
         #print(p)
         if p.maxTime > duration:
             p.maxTime = duration
         try:
             p_tier.addInterval(p)
         except ValueError:
             pass
     new_tg.append(w_tier)