def save_intervalframe_to_textgrid(framedict, filepath, encoding='utf-8'): """Write a dict of IntervalFrames in a textgrid-File. Arguments: framedict -- Dictionary of dataframes. The keys become tier names in the textgrid file filepath -- Path + filename of the file to be written. Keyword arguments: encoding: character encoding to save textgrid file """ if len(framedict) < 1: print "invalid data!" return mytextgrid = tgt.TextGrid() for tier_name in framedict.keys(): newtier = framedict[tier_name] if len(newtier.columns) == 3: mytier = tgt.IntervalTier(name=tier_name) for row in newtier.index: myinterval = tgt.Interval(newtier[newtier.columns[0]][row], newtier[newtier.columns[1]][row], newtier[newtier.columns[2]][row]) mytier.add_interval(myinterval) elif len(newtier.columns) == 2: mytier = tgt.PointTier(name=tier_name) for row in newtier.index: mypoint = tgt.Point(newtier[newtier.columns[0]][row], newtier[newtier.columns[1]][row]) mytier.add_point(mypoint) mytextgrid.add_tier(mytier) tgt.write_to_file(mytextgrid, filepath, encoding=encoding, format="long")
def save_annotations(self, filename, tiers=['cycles', 'holds'], filetype='textgrid', merge_holds=False): """Save annotations to file.""" if filetype not in ['textgrid', 'eaf', 'table']: raise ValueError('Unsupported file type: {}'.format(filetype)) tg = tgt.TextGrid() if 'holds' in tiers or merge_holds: # holds = tgt.IntervalTier(name='holds') # for start, end in self.holds: # holds.add_interval(tgt.Interval(start, end, 'hold')) if not merge_holds: tg.add_tier(self.holds) if 'cycles' in tiers: if merge_holds: segments_merged= tgt.IntervalTier(name='cycles') tg.add_tier(self.merge_holds(self.segments, self.holds)) else: tg.add_tier(self.segments) if len(tg.tiers): filetype = 'short' if filetype == 'textgrid' else filetype tgt.write_to_file(tg, filename, format=filetype)
def main(): # Parse the command-line arguments. args = parse_arguments() tg_path = args['tg_path'] offset_start = args['offset_start'] offset_end = args['offset_end'] outpath = args['outpath'] # Read the TextGrid tg = tgt.read_textgrid(tg_path) tg_part = tgt.TextGrid() if offset_start is None and offset_end is None: raise Exception( 'At least one of offset_start and offset_end must be specified.') elif offset_start is None: offset_start = tg.start_time elif offset_end is None: offset_end = tg.end_time for tr in tg: intr_part = tr.get_annotations_between_timepoints( offset_start, offset_end) tier_part = tgt.IntervalTier(name=tr.name, start_time=tr.start_time, end_time=tr.end_time, objects=intr_part) tg_part.add_tier(tier_part) if outpath is None: tg_dirname, tg_filename = os.path.split(tg_path) outpath = os.path.splitext(tg_filename)[0] + '_part.TextGrid' tgt.write_to_file(tg_part, outpath)
def get_textgrid(self): tg = tgt.TextGrid() t = tgt.IntervalTier(name='Word') for w in self.words.segments: t.add_interval(tgt.Interval(w.start, w.end, w.text)) tg.add_tier(t) t = tgt.IntervalTier(name='Phoneme') for ph in self.phonemes.segments: t.add_interval(tgt.Interval(ph.start, ph.end, ph.text)) tg.add_tier(t) return tgt.io.export_to_long_textgrid(tg)
def prepare_textgrid(self, df, offset): grid = tgt.TextGrid() tier = tgt.IntervalTier() tier.name = "Context" grid.add_tier(tier) for x in df.index: start = df.loc[x]["coq_word_starttime_1"] end = df.loc[x]["coq_word_endtime_1"] text = df.loc[x]["coq_word_label_1"] interval = tgt.Interval(start - offset, end - offset) interval.text = text tier.add_interval(interval) return grid
def init_textgrid(infile, duration, *tiers): tg = tgt.TextGrid() if infile is not None: logging.info("reading TextGrid %s" % infile) tg = tgt.io.read_textgrid(infile) result = [tg] for tier in tiers: if tg.has_tier(tier): logging.info("overwriting tier %s" % tier) tg.delete_tier(tier) tier = tgt.IntervalTier(name=tier, start_time=0, end_time=duration) result.append(tier) return tuple(result)
def main(): ap = argparse.ArgumentParser() ap.add_argument( 'shift', help='offset by which to shift the boundaries (positive or negative)', type=float) ap.add_argument('file', help='the textgrid file', type=str) ap.add_argument('-e', '--encoding', help='file encoding (default "utf-8")', default='utf-8', type=str) ap.add_argument('-f', '--format', help='the output format (default "short")', default='short', type=str) ap.add_argument( '-o', '--outfile', help='the output file (defaults to inputfile.shifted.Extension)', type=str) arguments = ap.parse_args() # Read file try: tg = tgt.read_textgrid(filename=arguments.file, encoding=arguments.encoding) except IOError: print('An error occurred reading file {file}'.format( file=arguments.file)) sys.exit(1) # Create new textgrid if arguments.outfile is None: basename, extension = os.path.splitext(arguments.file) output_filename = basename + '.shifted' + extension else: output_filename = arguments.outfile tg_shifted = tgt.TextGrid(filename=output_filename) # Shift boundaries for tier in tg: ts = tgt.util.shift_boundaries(tier, arguments.shift, 0) tg_shifted.add_tier(ts) # Write file tgt.write_to_file(textgrid=tg_shifted, filename=tg_shifted.filename, format=arguments.format, encoding=arguments.encoding)
def writeTextGrid(outfile, word_alignments): tg = tgt.TextGrid() phone_tier = tgt.IntervalTier(name='phone') word_tier = tgt.IntervalTier(name='word') for data in word_alignments: word = data[0] phones = data[1:] if len(phones) > 0: start_time = phones[0][1] end_time = phones[-1][2] word_tier.add_interval( tgt.Interval(start_time, end_time, text=word)) for (p, p_start, p_end) in phones: phone_tier.add_interval(tgt.Interval(p_start, p_end, text=p)) tg.add_tier(phone_tier) tg.add_tier(word_tier) tgt.io.write_to_file(tg, outfile, format='long')
def format(self, syncmap): try: import tgt except ImportError as exc: self.log_exc(u"Python module tgt is not installed", exc, True, ImportError) # from https://github.com/hbuschme/TextGridTools/blob/master/tgt/io.py textgrid = tgt.TextGrid() tier = tgt.IntervalTier(name="Token") for fragment in syncmap.fragments: begin = float(fragment.begin) end = float(fragment.end) text = fragment.text_fragment.text if text == u"": text = u"SIL" interval = tgt.Interval(begin, end, text=text) tier.add_interval(interval) textgrid.add_tier(tier) if self.variant == self.DEFAULT: msg = tgt.io.export_to_long_textgrid(textgrid) else: msg = tgt.io.export_to_short_textgrid(textgrid) return gf.safe_unicode(msg)
def stitch_textgrid(batch_title, sequenced_title, input2b_path, input2_path, output3_path): combined_intervals = [] new_tg = tgt.TextGrid() new_phone_tier = tgt.IntervalTier() final_phone_tier = tgt.IntervalTier() new_word_tier = tgt.IntervalTier() last_dur = 0.0 for i, title in enumerate(sequenced_title): wave_file = wave.open(os.path.join(input2b_path, title + '.wav'), 'rb') frameRate = wave_file.getframerate() n_frames = wave_file.getnframes() dur = n_frames / frameRate f0_start_time = 0.0 f0_end_time = dur tg = tgt.read_textgrid(os.path.join(input2_path, title + '.TextGrid')) # Load name of all tiers tier_names = tg.get_tier_names() words_tier_name = [name for name in tier_names if 'words' in name][0] words_tier = tg.get_tier_by_name(words_tier_name) phones_tier_name = [name for name in tier_names if 'phones' in name][0] phones_tier = tg.get_tier_by_name(phones_tier_name) word_annotations = words_tier.get_annotations_between_timepoints( f0_start_time, f0_end_time) phone_annotations = phones_tier.get_annotations_between_timepoints( f0_start_time, f0_end_time) word_intervals = [] for interval in word_annotations: interval.end_time = interval.end_time + last_dur interval.start_time = interval.start_time + last_dur word_intervals.append(interval) if word_intervals[-1].end_time > last_dur + f0_end_time: word_intervals[-1].end_time = last_dur + f0_end_time phone_intervals = [] for j, interval in enumerate(phone_annotations): interval.end_time = interval.end_time + last_dur interval.start_time = interval.start_time + last_dur if interval.text != 'sil' and interval.text != 'sp': phone_intervals.append(interval) elif i == len(sequenced_title) - 1 and j == len( phone_annotations) - 1: phone_intervals.append(interval) if phone_intervals[-1].end_time > last_dur + f0_end_time: phone_intervals[-1].end_time = last_dur + f0_end_time new_word_tier.add_annotations(word_intervals) new_phone_tier.add_annotations(phone_intervals) last_dur += dur phones_tier_copy = new_phone_tier.get_copy_with_gaps_filled( start_time=None, end_time=None, empty_string='') # Replace all sil and sp intervals with <sil> tag #store these intervals to a list so that we can add them to the other tiers sil_intervals = [] phone_intervals = [] for interval in phones_tier_copy: if interval.text == '': interval.text = 'sil' sil_intervals.append(interval) else: phone_intervals.append(interval) final_phone_tier.add_annotations(phone_intervals) final_phone_tier.add_annotations(sil_intervals) final_phone_tier.name = phones_tier_name new_word_tier.name = words_tier_name new_tg.add_tier(new_word_tier) new_tg.add_tier(final_phone_tier) tgt.write_to_file(new_tg, os.path.join(output3_path, batch_title + '.TextGrid'), format='short')
def prepare_textgrids(self, order=None, one_grid_per_match=False, remember_time=False): """ Parameters ---------- order: list A list of columns that specifies the order of the text grid tiers. """ self.feature_timing = dict() grids = {} if "coquery_invisible_origin_id" not in self.df.columns: one_grid_per_match = True if one_grid_per_match: key_columns = [self.resource.file_name, self.resource.corpus_id] else: key_columns = [self.resource.file_name] for i in self.file_data.index: grid_id = tuple(self.file_data.iloc[i][key_columns]) grids[grid_id] = tgt.TextGrid() if ("corpus_starttime" in options.cfg.selected_features and "corpus_endtime" in options.cfg.selected_features): self.feature_timing["corpus_id"] = ("corpus_starttime", "corpus_endtime") if order: features = [] for column in order: if column.startswith("coq_"): name = column.rpartition("coq_")[-1].rpartition("_")[0] else: name = column if (name not in features and not name.startswith("coquery_invisible")): features.append(name) else: features = options.cfg.selected_features tiers = set([]) for rc_feature in [ x for x in features if (not x.startswith(("func_", "coquery_", "db_"))) ]: hashed, tab, feature = ( self.resource.split_resource_feature(rc_feature)) if tab == "segment": # the segment table is hard-wired: start_label = "{}_starttime".format(tab) end_label = "{}_endtime".format(tab) self.feature_timing[rc_feature] = (start_label, end_label) else: # determine the table that contains timing information by # following the table path: self.resource.lexicon.joined_tables = ["corpus"] self.resource.lexicon.table_list = ["corpus"] self.resource.lexicon.add_table_path("corpus_id", "{}_id".format(tab)) for current_tab in self.resource.lexicon.joined_tables: # check if timing information has been selected for the # current table from the table path: start_label = "{}_starttime".format(current_tab) end_label = "{}_endtime".format(current_tab) # if so, set the timing entry for the current feature # to these timings: if (start_label in options.cfg.selected_features and end_label in options.cfg.selected_features ) and not (rc_feature.endswith( ("endtime", "starttime"))): self.feature_timing[rc_feature] = (start_label, end_label) rc_feat = "{}_{}".format(tab, feature) if hashed is not None: link, res = get_by_hash(hashed) tier_name = "{}.{}_{}".format(res.db_name, link.rc_to) else: tier_name = rc_feat if (rc_feat not in [start_label, end_label] and tier_name not in tiers): # ... but only if it is not containing timing information for x in grids: grids[x].add_tier(tgt.IntervalTier(name=tier_name)) tiers.add(tier_name) for col in [ x for x in features if x.startswith(("func", "coquery", "db")) ]: # FIXME: # db and func columns are never treated as lexicalized columns. # The fix for this is probably not quite trivial. tier_name = self.session.translate_header(col) for x in grids: grids[x].add_tier(tgt.IntervalTier(name=tier_name)) tiers.add(tier_name) # if there is no tier in the grids, but the corpus times were # selected, add a tier for the corpus IDs in all grids: if (not (grids[list(grids.keys())[0]].tiers) and ("corpus_starttime" in options.cfg.selected_features and "corpus_endtime" in options.cfg.selected_features) and not self._artificial_corpus_id): self._artificial_corpus_id = True for f in grids: grids[f].add_tier(tgt.IntervalTier(name="corpus_id")) if remember_time: for f in grids: grids[f].add_tier(tgt.PointTier(name="Original timing")) return grids
# Usage: python segment_laughter.py <input_audio_file> <stored_model_path> <output_folder> <save_to_textgrid> if __name__ == '__main__': if parse_inputs(): input_path, model_path, output_path, threshold, min_length, save_to_textgrid = parse_inputs( ) min_length = seconds_to_frames(min_length) laughs = laugh_segmenter.segment_laughs(input_path, model_path, output_path, threshold, min_length, save_to_textgrid) print("found %d laughs." % (len(laughs))) if not save_to_textgrid: for laugh in laughs: print(laugh) else: tg = tgt.TextGrid() laughs_tier = tgt.IntervalTier( name='laughter', objects=[ tgt.Interval(l['start'], l['end'], 'laugh') for l in laughs ]) tg.add_tier(laughs_tier) fname = os.path.splitext(os.path.basename(input_path))[0] tgt.write_to_file( tg, os.path.join(output_path, fname + '_laughter.TextGrid')) print('Saved laughter segments in {}'.format( os.path.join(output_path, fname + '_laughter.TextGrid')))
import tgt grid = tgt.TextGrid(filename="test") tier = tgt.IntervalTier(start_time=0, end_time=5, name="mot") label = tgt.core.Interval(2, 3, "word") tier.add_annotation(label) grid.add_tier(tier) tgt.write_to_file(grid, "/home/leferrae/Desktop/These/test.textgrid")