Beispiel #1
0
def save_intervalframe_to_textgrid(framedict, filepath, encoding='utf-8'):
    """Write a dict of IntervalFrames in a textgrid-File.

       Arguments:
       framedict    --  Dictionary of dataframes. The keys become tier
                        names in the textgrid file
       filepath     --  Path + filename of the file to be written.

       Keyword arguments:
       encoding: character encoding to save textgrid file

    """

    if len(framedict) < 1:
        print "invalid data!"
        return
    mytextgrid = tgt.TextGrid()
    for tier_name in framedict.keys():
        newtier = framedict[tier_name]
        if len(newtier.columns) == 3:
            mytier = tgt.IntervalTier(name=tier_name)
            for row in newtier.index:
                myinterval = tgt.Interval(newtier[newtier.columns[0]][row],
                                          newtier[newtier.columns[1]][row],
                                          newtier[newtier.columns[2]][row])
                mytier.add_interval(myinterval)
        elif len(newtier.columns) == 2:
            mytier = tgt.PointTier(name=tier_name)
            for row in newtier.index:
                mypoint = tgt.Point(newtier[newtier.columns[0]][row],
                                    newtier[newtier.columns[1]][row])
                mytier.add_point(mypoint)
        mytextgrid.add_tier(mytier)
    tgt.write_to_file(mytextgrid, filepath, encoding=encoding, format="long")
Beispiel #2
0
    def save_annotations(self, filename, tiers=['cycles', 'holds'],
                         filetype='textgrid', merge_holds=False):
        """Save annotations to file."""

        if filetype not in ['textgrid', 'eaf', 'table']:
            raise ValueError('Unsupported file type: {}'.format(filetype))

        tg = tgt.TextGrid()

        if 'holds' in tiers or merge_holds:
            # holds = tgt.IntervalTier(name='holds')
            # for start, end in self.holds:
            #     holds.add_interval(tgt.Interval(start, end, 'hold'))
            if not merge_holds:
                tg.add_tier(self.holds)

        if 'cycles' in tiers:
            if merge_holds:
                segments_merged= tgt.IntervalTier(name='cycles')
                tg.add_tier(self.merge_holds(self.segments, self.holds))
            else:
                tg.add_tier(self.segments)

        if len(tg.tiers):
            filetype = 'short' if filetype == 'textgrid' else filetype
            tgt.write_to_file(tg, filename, format=filetype)
def main():

    # Parse the command-line arguments.
    args = parse_arguments()
    tg_path = args['tg_path']
    offset_start = args['offset_start']
    offset_end = args['offset_end']
    outpath = args['outpath']

    # Read the TextGrid
    tg = tgt.read_textgrid(tg_path)
    tg_part = tgt.TextGrid()

    if offset_start is None and offset_end is None:
        raise Exception(
            'At least one of offset_start and offset_end must be specified.')
    elif offset_start is None:
        offset_start = tg.start_time
    elif offset_end is None:
        offset_end = tg.end_time

    for tr in tg:
        intr_part = tr.get_annotations_between_timepoints(
            offset_start, offset_end)
        tier_part = tgt.IntervalTier(name=tr.name,
                                     start_time=tr.start_time,
                                     end_time=tr.end_time,
                                     objects=intr_part)
        tg_part.add_tier(tier_part)

    if outpath is None:
        tg_dirname, tg_filename = os.path.split(tg_path)
        outpath = os.path.splitext(tg_filename)[0] + '_part.TextGrid'

    tgt.write_to_file(tg_part, outpath)
Beispiel #4
0
    def get_textgrid(self):
        tg = tgt.TextGrid()
        t = tgt.IntervalTier(name='Word')
        for w in self.words.segments:
            t.add_interval(tgt.Interval(w.start, w.end, w.text))
        tg.add_tier(t)
        t = tgt.IntervalTier(name='Phoneme')
        for ph in self.phonemes.segments:
            t.add_interval(tgt.Interval(ph.start, ph.end, ph.text))
        tg.add_tier(t)

        return tgt.io.export_to_long_textgrid(tg)
Beispiel #5
0
 def prepare_textgrid(self, df, offset):
     grid = tgt.TextGrid()
     tier = tgt.IntervalTier()
     tier.name = "Context"
     grid.add_tier(tier)
     for x in df.index:
         start = df.loc[x]["coq_word_starttime_1"]
         end = df.loc[x]["coq_word_endtime_1"]
         text = df.loc[x]["coq_word_label_1"]
         interval = tgt.Interval(start - offset, end - offset)
         interval.text = text
         tier.add_interval(interval)
     return grid
Beispiel #6
0
def init_textgrid(infile, duration, *tiers):
    tg = tgt.TextGrid()
    if infile is not None:
        logging.info("reading TextGrid %s" % infile)
        tg = tgt.io.read_textgrid(infile)

    result = [tg]
    for tier in tiers:
        if tg.has_tier(tier):
            logging.info("overwriting tier %s" % tier)
            tg.delete_tier(tier)
        tier = tgt.IntervalTier(name=tier, start_time=0, end_time=duration)
        result.append(tier)
    return tuple(result)
Beispiel #7
0
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument(
        'shift',
        help='offset by which to shift the boundaries (positive or negative)',
        type=float)
    ap.add_argument('file', help='the textgrid file', type=str)
    ap.add_argument('-e',
                    '--encoding',
                    help='file encoding (default "utf-8")',
                    default='utf-8',
                    type=str)
    ap.add_argument('-f',
                    '--format',
                    help='the output format (default "short")',
                    default='short',
                    type=str)
    ap.add_argument(
        '-o',
        '--outfile',
        help='the output file (defaults to inputfile.shifted.Extension)',
        type=str)
    arguments = ap.parse_args()

    # Read file
    try:
        tg = tgt.read_textgrid(filename=arguments.file,
                               encoding=arguments.encoding)
    except IOError:
        print('An error occurred reading file {file}'.format(
            file=arguments.file))
        sys.exit(1)
    # Create new textgrid
    if arguments.outfile is None:
        basename, extension = os.path.splitext(arguments.file)
        output_filename = basename + '.shifted' + extension
    else:
        output_filename = arguments.outfile
    tg_shifted = tgt.TextGrid(filename=output_filename)
    # Shift boundaries
    for tier in tg:
        ts = tgt.util.shift_boundaries(tier, arguments.shift, 0)
        tg_shifted.add_tier(ts)
    # Write file
    tgt.write_to_file(textgrid=tg_shifted,
                      filename=tg_shifted.filename,
                      format=arguments.format,
                      encoding=arguments.encoding)
Beispiel #8
0
def writeTextGrid(outfile, word_alignments):
    tg = tgt.TextGrid()
    phone_tier = tgt.IntervalTier(name='phone')
    word_tier = tgt.IntervalTier(name='word')

    for data in word_alignments:
        word = data[0]
        phones = data[1:]

        if len(phones) > 0:
            start_time = phones[0][1]
            end_time = phones[-1][2]

            word_tier.add_interval(
                tgt.Interval(start_time, end_time, text=word))

            for (p, p_start, p_end) in phones:
                phone_tier.add_interval(tgt.Interval(p_start, p_end, text=p))
    tg.add_tier(phone_tier)
    tg.add_tier(word_tier)

    tgt.io.write_to_file(tg, outfile, format='long')
Beispiel #9
0
 def format(self, syncmap):
     try:
         import tgt
     except ImportError as exc:
         self.log_exc(u"Python module tgt is not installed", exc, True, ImportError)
     # from https://github.com/hbuschme/TextGridTools/blob/master/tgt/io.py
     textgrid = tgt.TextGrid()
     tier = tgt.IntervalTier(name="Token")
     for fragment in syncmap.fragments:
         begin = float(fragment.begin)
         end = float(fragment.end)
         text = fragment.text_fragment.text
         if text == u"":
             text = u"SIL"
         interval = tgt.Interval(begin, end, text=text)
         tier.add_interval(interval)
     textgrid.add_tier(tier)
     if self.variant == self.DEFAULT:
         msg = tgt.io.export_to_long_textgrid(textgrid)
     else:
         msg = tgt.io.export_to_short_textgrid(textgrid)
     return gf.safe_unicode(msg)
Beispiel #10
0
def stitch_textgrid(batch_title, sequenced_title, input2b_path, input2_path,
                    output3_path):
    combined_intervals = []

    new_tg = tgt.TextGrid()

    new_phone_tier = tgt.IntervalTier()
    final_phone_tier = tgt.IntervalTier()
    new_word_tier = tgt.IntervalTier()

    last_dur = 0.0

    for i, title in enumerate(sequenced_title):

        wave_file = wave.open(os.path.join(input2b_path, title + '.wav'), 'rb')
        frameRate = wave_file.getframerate()
        n_frames = wave_file.getnframes()
        dur = n_frames / frameRate

        f0_start_time = 0.0
        f0_end_time = dur

        tg = tgt.read_textgrid(os.path.join(input2_path, title + '.TextGrid'))

        # Load name of all tiers
        tier_names = tg.get_tier_names()

        words_tier_name = [name for name in tier_names if 'words' in name][0]
        words_tier = tg.get_tier_by_name(words_tier_name)

        phones_tier_name = [name for name in tier_names if 'phones' in name][0]
        phones_tier = tg.get_tier_by_name(phones_tier_name)

        word_annotations = words_tier.get_annotations_between_timepoints(
            f0_start_time, f0_end_time)
        phone_annotations = phones_tier.get_annotations_between_timepoints(
            f0_start_time, f0_end_time)

        word_intervals = []
        for interval in word_annotations:
            interval.end_time = interval.end_time + last_dur
            interval.start_time = interval.start_time + last_dur
            word_intervals.append(interval)
        if word_intervals[-1].end_time > last_dur + f0_end_time:
            word_intervals[-1].end_time = last_dur + f0_end_time

        phone_intervals = []
        for j, interval in enumerate(phone_annotations):
            interval.end_time = interval.end_time + last_dur
            interval.start_time = interval.start_time + last_dur

            if interval.text != 'sil' and interval.text != 'sp':
                phone_intervals.append(interval)

            elif i == len(sequenced_title) - 1 and j == len(
                    phone_annotations) - 1:
                phone_intervals.append(interval)
        if phone_intervals[-1].end_time > last_dur + f0_end_time:
            phone_intervals[-1].end_time = last_dur + f0_end_time

        new_word_tier.add_annotations(word_intervals)
        new_phone_tier.add_annotations(phone_intervals)

        last_dur += dur

    phones_tier_copy = new_phone_tier.get_copy_with_gaps_filled(
        start_time=None, end_time=None, empty_string='')

    # Replace all sil and sp intervals with <sil> tag
    #store these intervals to a list so that we can add them to the other tiers
    sil_intervals = []
    phone_intervals = []
    for interval in phones_tier_copy:
        if interval.text == '':
            interval.text = 'sil'
            sil_intervals.append(interval)
        else:
            phone_intervals.append(interval)

    final_phone_tier.add_annotations(phone_intervals)
    final_phone_tier.add_annotations(sil_intervals)

    final_phone_tier.name = phones_tier_name
    new_word_tier.name = words_tier_name

    new_tg.add_tier(new_word_tier)
    new_tg.add_tier(final_phone_tier)

    tgt.write_to_file(new_tg,
                      os.path.join(output3_path, batch_title + '.TextGrid'),
                      format='short')
Beispiel #11
0
    def prepare_textgrids(self,
                          order=None,
                          one_grid_per_match=False,
                          remember_time=False):
        """
        Parameters
        ----------
        order: list
            A list of columns that specifies the order of the text grid tiers.
        """
        self.feature_timing = dict()
        grids = {}

        if "coquery_invisible_origin_id" not in self.df.columns:
            one_grid_per_match = True

        if one_grid_per_match:
            key_columns = [self.resource.file_name, self.resource.corpus_id]
        else:
            key_columns = [self.resource.file_name]

        for i in self.file_data.index:
            grid_id = tuple(self.file_data.iloc[i][key_columns])
            grids[grid_id] = tgt.TextGrid()

        if ("corpus_starttime" in options.cfg.selected_features
                and "corpus_endtime" in options.cfg.selected_features):
            self.feature_timing["corpus_id"] = ("corpus_starttime",
                                                "corpus_endtime")

        if order:
            features = []
            for column in order:
                if column.startswith("coq_"):
                    name = column.rpartition("coq_")[-1].rpartition("_")[0]
                else:
                    name = column
                if (name not in features
                        and not name.startswith("coquery_invisible")):

                    features.append(name)
        else:
            features = options.cfg.selected_features

        tiers = set([])
        for rc_feature in [
                x for x in features
                if (not x.startswith(("func_", "coquery_", "db_")))
        ]:
            hashed, tab, feature = (
                self.resource.split_resource_feature(rc_feature))

            if tab == "segment":
                # the segment table is hard-wired:
                start_label = "{}_starttime".format(tab)
                end_label = "{}_endtime".format(tab)
                self.feature_timing[rc_feature] = (start_label, end_label)
            else:
                # determine the table that contains timing information by
                # following the table path:
                self.resource.lexicon.joined_tables = ["corpus"]
                self.resource.lexicon.table_list = ["corpus"]
                self.resource.lexicon.add_table_path("corpus_id",
                                                     "{}_id".format(tab))
                for current_tab in self.resource.lexicon.joined_tables:
                    # check if timing information has been selected for the
                    # current table from the table path:
                    start_label = "{}_starttime".format(current_tab)
                    end_label = "{}_endtime".format(current_tab)

                    # if so, set the timing entry for the current feature
                    # to these timings:
                    if (start_label in options.cfg.selected_features
                            and end_label in options.cfg.selected_features
                        ) and not (rc_feature.endswith(
                            ("endtime", "starttime"))):
                        self.feature_timing[rc_feature] = (start_label,
                                                           end_label)

            rc_feat = "{}_{}".format(tab, feature)
            if hashed is not None:
                link, res = get_by_hash(hashed)
                tier_name = "{}.{}_{}".format(res.db_name, link.rc_to)
            else:
                tier_name = rc_feat
            if (rc_feat not in [start_label, end_label]
                    and tier_name not in tiers):

                # ... but only if it is not containing timing information
                for x in grids:
                    grids[x].add_tier(tgt.IntervalTier(name=tier_name))
                    tiers.add(tier_name)

        for col in [
                x for x in features if x.startswith(("func", "coquery", "db"))
        ]:
            # FIXME:
            # db and func columns are never treated as lexicalized columns.
            # The fix for this is probably not quite trivial.
            tier_name = self.session.translate_header(col)
            for x in grids:
                grids[x].add_tier(tgt.IntervalTier(name=tier_name))
                tiers.add(tier_name)

        # if there is no tier in the grids, but the corpus times were
        # selected, add a tier for the corpus IDs in all grids:
        if (not (grids[list(grids.keys())[0]].tiers)
                and ("corpus_starttime" in options.cfg.selected_features
                     and "corpus_endtime" in options.cfg.selected_features)
                and not self._artificial_corpus_id):
            self._artificial_corpus_id = True
            for f in grids:
                grids[f].add_tier(tgt.IntervalTier(name="corpus_id"))
        if remember_time:
            for f in grids:
                grids[f].add_tier(tgt.PointTier(name="Original timing"))

        return grids
# Usage: python segment_laughter.py <input_audio_file> <stored_model_path> <output_folder> <save_to_textgrid>

if __name__ == '__main__':
    if parse_inputs():
        input_path, model_path, output_path, threshold, min_length, save_to_textgrid = parse_inputs(
        )
        min_length = seconds_to_frames(min_length)

        laughs = laugh_segmenter.segment_laughs(input_path, model_path,
                                                output_path, threshold,
                                                min_length, save_to_textgrid)
        print("found %d laughs." % (len(laughs)))

        if not save_to_textgrid:
            for laugh in laughs:
                print(laugh)
        else:
            tg = tgt.TextGrid()
            laughs_tier = tgt.IntervalTier(
                name='laughter',
                objects=[
                    tgt.Interval(l['start'], l['end'], 'laugh') for l in laughs
                ])
            tg.add_tier(laughs_tier)
            fname = os.path.splitext(os.path.basename(input_path))[0]
            tgt.write_to_file(
                tg, os.path.join(output_path, fname + '_laughter.TextGrid'))

            print('Saved laughter segments in {}'.format(
                os.path.join(output_path, fname + '_laughter.TextGrid')))
Beispiel #13
0
import tgt

grid = tgt.TextGrid(filename="test")
tier = tgt.IntervalTier(start_time=0, end_time=5, name="mot")
label = tgt.core.Interval(2, 3, "word")
tier.add_annotation(label)
grid.add_tier(tier)
tgt.write_to_file(grid, "/home/leferrae/Desktop/These/test.textgrid")