def getSyllebleFromPhones(self, phones, fileNm): syllabelList = [] global utterence_not_found if (len(phones) == 0): print "Some issues with file, blank phone ", fileNm return syllabelList for utterence in self.utterences: #print "Trying to match phone:", phones, " with Utterence", utterence syllabelList = [] dict_phone_index = 0 txGrd_phone_index = 0 start_time = phones[0].minTime end_time = 0.0 syllableName = '' matchFail = False while (dict_phone_index < len(utterence)): dictPhone = utterence[dict_phone_index] #if syllable marker found, create a new Interval with start time, end time and combined mark if (dictPhone == '-'): sylInterval = Interval(start_time, end_time, syllableName) syllabelList.append(sylInterval) syllableName = '' dict_phone_index += 1 dictPhone = utterence[dict_phone_index] start_time = txGrdPhone.maxTime if (txGrd_phone_index >= len(phones)): print "Some issues with file ", fileNm, " around the place", phones else: txGrdPhone = phones[txGrd_phone_index] if (txGrdPhone.mark.strip() == dictPhone.strip()): end_time = txGrdPhone.maxTime syllableName += dictPhone else: matchFail = True break dict_phone_index += 1 txGrd_phone_index += 1 if (not matchFail): end_time = phones[len(phones) - 1].maxTime sylInterval = Interval(start_time, end_time, syllableName) syllabelList.append(sylInterval) break #matched utterence if (matchFail): print "Could not match word", self.word, " with phone ", phones, " with Utterence", utterence, fileNm utterence_not_found[self.word] = phones start_time = phones[0].minTime end_time = phones[len(phones) - 1].maxTime syllabelList = [(Interval(start_time, end_time, self.word + '_unMatched'))] return syllabelList
def convert_textgrid(tg_old, sil): tg_new = TextGrid(tg_old.start, tg_old.end) phontier = Tier('phones', tg_old['PHONEMES'].start, tg_old['PHONEMES'].end, 'Interval', [Interval(x.start, x.end, convert_mark(x.mark, sil)) for x in tg_old['PHONEMES']]) wordtier = Tier('words', tg_old['WORDS'].start, tg_old['WORDS'].end, 'Interval', [Interval(x.start, x.end, convert_mark(x.mark, sil)) for x in tg_old['WORDS']]) tg_new.append_tier(wordtier) tg_new.append_tier(phontier) return tg_new
def generatePhoneSyllableRelationship(gridobj, dictMap, fileNm): syllable = [] global word_not_found wordTier = gridobj.getFirst('words') #Get all the words and their associated times for word in wordTier.intervals: if not word.mark == '': phoneList = getPhonesForWord(word, gridobj) word_text = word.mark.upper() if word_text in dictMap: wspObj = dictMap[word.mark.upper()] #Returns List of Syllables by mathing Time info from text grid with # Syllable info from Dictionary sylIntervals = wspObj.getSyllebleFromPhones(phoneList, fileNm) for interval in sylIntervals: syllable.append(interval) else: word_not_found[word_text] = [ p.mark for p in phoneList ] # the word is not found in syllable dict. syllable.append( Interval(word.minTime, word.maxTime, word_text + '_unknown')) return syllable
def get_intervals(symbols: List[str], total_duration_s: float, n_digits: int) -> Generator[Interval, None, None]: symbols_count = len(symbols) for added_symbols_count, symbol in enumerate(symbols): min_time = added_symbols_count / symbols_count * total_duration_s max_time = (added_symbols_count + 1) / symbols_count * total_duration_s symbol_interval = Interval(round(min_time, n_digits), round(max_time, n_digits), symbol) yield symbol_interval
def read_tg_from_str(tg_str, round_digits=DEFAULT_TEXTGRID_PRECISION): """ Read the tiers contained in the Praat-formatted string tg_str into a TextGrid object. Times are rounded to the specified precision. Adapted from TextGrid.read() """ source = StringIO(tg_str) tg = TextGrid() file_type, short = parse_header(source) if file_type != "TextGrid": raise ValueError("The file could not be parsed as a TextGrid as it is " "lacking a proper header.") tg.minTime = parse_line(source.readline(), short, round_digits) tg.maxTime = parse_line(source.readline(), short, round_digits) source.readline() # More header junk if short: m = int(source.readline().strip()) # Will be tg.n else: m = int(source.readline().strip().split()[2]) # Will be tg.n if not short: source.readline() for i in range(m): # Loop over grids if not short: source.readline() if parse_line(source.readline(), short, round_digits) == "IntervalTier": inam = parse_line(source.readline(), short, round_digits) imin = parse_line(source.readline(), short, round_digits) imax = parse_line(source.readline(), short, round_digits) itie = IntervalTier(inam, imin, imax) itie.strict = tg.strict n = int(parse_line(source.readline(), short, round_digits)) for j in range(n): if not short: source.readline().rstrip().split() # Header junk jmin = parse_line(source.readline(), short, round_digits) jmax = parse_line(source.readline(), short, round_digits) jmrk = get_mark(source, short) if jmin < jmax: # Non-null itie.addInterval(Interval(jmin, jmax, jmrk)) tg.append(itie) else: # PointTier inam = parse_line(source.readline(), short, round_digits) imin = parse_line(source.readline(), short, round_digits) imax = parse_line(source.readline(), short, round_digits) itie = PointTier(inam) n = int(parse_line(source.readline(), short, round_digits)) for j in range(n): source.readline().rstrip() # Header junk jtim = parse_line(source.readline(), short, round_digits) jmrk = get_mark(source, short) itie.addPoint(Point(jtim, jmrk)) tg.append(itie) return tg
def merge_and_mark_tiers(tg_file="", output_file="", tiers=()): """ Creates a new TextGrid file with an added IntervalTier. """ tg = textgrid.TextGrid() tg.read(f=tg_file) for t1_name, t2_name in combinations(tiers, 2): validate_overlapping_tiers(tg.getFirst(t1_name), tg.getFirst(t2_name)) merged_tier = IntervalTier( name="Merged", minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)), maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers))) marked_tier = IntervalTier( name="Marked", minTime=min(map(lambda x: tg.getFirst(x).minTime, tiers)), maxTime=min(map(lambda x: tg.getFirst(x).maxTime, tiers))) for tier_name, interval in filter( lambda x: x[1].mark, chain.from_iterable( map(lambda x: zip(repeat(x.name), iter(x)), map(lambda t: tg.getFirst(t), tiers)))): marked_tier.addInterval( Interval(minTime=interval.minTime, maxTime=interval.maxTime, mark=tier_name)) merged_tier.addInterval( Interval(minTime=interval.minTime, maxTime=interval.maxTime, mark=interval.mark)) tg.tiers.insert(1, marked_tier) tg.tiers.insert(2, merged_tier) with open(output_file, "w") as f: tg.write(f)
def get_intervals(sig, fs, winlen=0.05, threshold=0.005, minlen=0.01): """Return a list of intervals (in sec) and an 'X' mark if they contain energy, otherwise a '#' mark Arguments: :param sig: s :param fs: :param winlen: windowlength for the rms in seconds :param threshold: energy cutoff :param minlen: minimum length of an interval in seconds """ transients = detect_transients(sig, fs, winlen, threshold, minlen) intervals = [] prev_end = 0.0 if transients.shape[0] == 0: intervals.append(Interval(0.0, len(sig) / fs, '#')) else: for start, end in transients: intervals.append(Interval(prev_end, start / fs, '#')) intervals.append(Interval(start / fs, end / fs, 'X')) prev_end = end / fs intervals.append(Interval(prev_end, len(sig) / fs, '#')) return intervals
except OSError: pass for tgfile in sorted(list(data.rglob(indir, '*.TextGrid'))): basename = path.splitext(path.basename(tgfile))[0] print basename tg = TextGrid.read(tgfile) try: tier = tg['CALLS'] except KeyError as e: print 'KeyError in', basename raise e intervals = [i for i in tg['CALLS'] if not i.mark in ['#', 'X']] start, end = tg.start, tg.end prev_end = start new_intervals = [] for interval in intervals: new_intervals.append(Interval(prev_end, interval.start, '')) new_intervals.append(interval) prev_end = interval.end new_intervals.append(Interval(prev_end, end, '')) new_tier = Tier('', tg['CALLS'].start, tg['CALLS'].end, 'Interval', intervals) new_tg = TextGrid(start, end, [new_tier]) outfile = path.join(outdir, basename + '.TextGrid') with open(outfile, 'w') as fid: new_tg.write(fid, fmt='long')
def clean_interval(old_interval): new_interval = Interval(minTime=old_interval.minTime, maxTime=old_interval.maxTime, mark=clean_mark(old_interval.mark)) return new_interval