def load_textgrid(path): tg = TextGrid() try: tg.read(path) except ValueError as e: print('The file {} could not be parsed: {}'.format(path, str(e))) return tg
def load_textgrid(self, path): tg = TextGrid(strict=False) try: tg.read(path) new_tiers = [] dup_tiers_maxes = { k: 0 for k, v in Counter([t.name for t in tg.tiers]).items() if v > 1 } dup_tiers_inds = {k: 0 for k in dup_tiers_maxes.keys()} for i, t in enumerate(tg.tiers): if t.name in dup_tiers_maxes: if len(t) > dup_tiers_maxes[t.name]: dup_tiers_maxes[t.name] = len(t) dup_tiers_inds[t.name] = i for i, t in enumerate(tg.tiers): if t.name in dup_tiers_maxes: if i != dup_tiers_inds[t.name]: continue new_tiers.append(t) tg.tiers = new_tiers return tg except Exception as e: print('There was an issue parsing {}:'.format(path)) raise
def main(): argparser = argparse.ArgumentParser(description='Assign a score based on similarity of two .TextGrid files.') argparser.add_argument('-first', '--first', nargs=2, metavar=('PATH', 'TIER_NAME'), help='Path to the first .TextGrid file.') argparser.add_argument('-second', '--second', nargs=2, metavar=('PATH', 'TIER_NAME'), help='Path to the second .TextGrid file.') argparser.add_argument('-d', '--d', nargs=1, help='Path to dictionary file.') argparser.add_argument('-close', '--close', nargs='?', default=500, const=500, help='Define how close, close is in milliseconds. Used for equality test.') args = argparser.parse_args() if not op.isfile(args.first[0]): print ("First file '%s' does not exist." % args.first[0]) if not op.isfile(args.second[0]): print ("Second file '%s' does not exist." % args.second[0]) if not op.isfile(args.d[0]): print ("Dictionary file '%s' does not exist." % args.d[0]) first = TextGrid.fromFile(args.first[0]) second = TextGrid.fromFile(args.second[0]) first_clean = createNew(first, args.first[1]) second_clean = createNew(second, args.second[1]) dictionary = getDictionary(args.d[0]) try: closeness = int(args.close) / float(1000) except: print ("(-close or --close) argument should be integer.\nDefaulting to 500.") closeness = 0.5 # result_skips = evaluate_skip_some(first_clean, second_clean, dictionary, closeness) result_smart = smart_evaluate(first_clean, second_clean, closeness) print ("Evaluation with skips:\n\tR=%.4f " % result_skips) return 0
def reorg_noncollapsed(f): padding = 0.1 print(f) tg_path = os.path.join(noncollapsed_dir, f) tg = TextGrid() tg.read(tg_path) new_tg = TextGrid(maxTime=tg.maxTime) new_tg_path = tg_path.replace(noncollapsed_dir, data_dir) for tier in tg.tiers: new_tier = IntervalTier(name=tier.name, maxTime=tg.maxTime) for i in tier: new_mark = sub_pattern.sub(' ', i.mark).strip() if not new_mark: continue new_begin = i.minTime - padding if new_begin < 0: new_begin = 0 new_end = i.maxTime + padding if new_end > tg.maxTime: new_end = tg.maxTime try: new_tier.add(new_begin, new_end, new_mark) except ValueError: new_tier[-1].maxTime = new_end new_tier[-1].mark += ' ' + new_mark print(len(new_tier)) new_tg.append(new_tier) new_tg.write(new_tg_path)
def test_tier_duplication(): error_log.flush() tg = TextGrid() interval = IntervalTier("A", minTime=0, maxTime=10) tg.tiers = [interval] tg_doc = SingleAnnotatorTextGrid.from_textgrid(tg, [], None) tg_doc.check()
def check_times_merging(self): """Checks that paired tiers can be merged together. Outputs the partially merged textgrid as well as the merge conflicts.""" from .tasks.double import MergeResults merged_times_tg = TextGrid( name=self.textgrid.name, maxTime=self.textgrid.maxTime, minTime=self.textgrid.minTime) merge_results = MergeResults() for tier in self.checking_scheme.all_tiers_names: merged_tier_name = tier + self.TOP_GROUP_SUFFIX target_tier_name = tier + self.BOTTOM_GROUP_SUFFIX merged_tier = self.textgrid.getFirst(merged_tier_name) target_tier = self.textgrid.getFirst(target_tier_name) # in case either tier is not present, we just skip this merge if merged_tier is None or target_tier is None: continue times_merged_tier, tier_merge = self.merge_tiers(merged_tier, target_tier) merge_results.tiers_merges.append(tier_merge) times_merged_tier.name = tier merged_times_tg.append(times_merged_tier) # logging conflicts as errors that could be displayed to the # annotator (in case of merge attempt) for conflict in merge_results.to_merge_conflicts_errors(): error_log.log_merge(conflict) return merged_times_tg, merge_results
def fromFile(self, filename): if LIBS_INSTALLED: try: return TextGridFile.fromFile(filename) except (TextGridError, UnicodeDecodeError) as e: error(e) f = open(filename, 'rb') bytes = f.read() f.close() tmp = tempfile.NamedTemporaryFile() found = False for encoding in ['Windows-1251', 'Windows-1252', 'ISO-8859-1']: try: s = bytes.decode(encoding) tmp.write(s.encode('utf-8')) tmp.seek(0) found = True break except Exception as e: error(e) if not found: raise else: try: ret = TextGridFile.fromFile(tmp.name) tmp.close() return ret except TextGridError as e: error(e) return None else: error("can't load from file: textgrid lib not installed") return None
def get_intervals(textgrid_fn, tier_name): tg = TextGrid() tg.read(textgrid_fn) try: tier_i = tg.getNames().index(tier_name) except ValueError: raise TextGridError("Cannot find tier named " + tier_name) return tg[tier_i]
def createTextGrid(data, tierName = "words"): tier = IntervalTier(tierName) txtgrid = TextGrid() prevTime = 0 for (name, time, dur, words) in data: tier.add(prevTime, prevTime+dur, makeSentence(words)) prevTime += dur txtgrid.append(tier) return txtgrid
def gen_template_tg(self, duration: float, filename: str): new_tg = TextGrid(name=filename, minTime=0.0, maxTime=duration) for tier_name in self.tiers_specs.keys(): new_tier = IntervalTier(name=tier_name, minTime=0.0, maxTime=duration) new_tg.append(new_tier) return new_tg
def get_label(path): """ Used to load a TextGrid from disk and return the array representation of it. :param path: Path to the TextGrid :return: Array representation of the TextGrid. """ grid = TextGrid(name=path) grid.read(path, Fs=int(audio_sample_rate / window_size)) return grid.FsArrayCombined
def gop_feat_simple(gop_vals, textgrid_file): """ Calculate gop statistics on vowels, consonants and syllables :param gop_vals: gop values of one utterance extracted from gop files :param textgrid_file: textgrid file :return: """ textgrid = TextGrid() textgrid.read(textgrid_file) phn_seq = "" for intervals in textgrid.tiers[1]: if intervals.mark is not None: if intervals.mark != 'SIL' and intervals.mark != 'SPN': phn_seq += " " + intervals.mark language = syllabifier.English syllables = syllabifier.syllabify(language, str(phn_seq)) vowel_gop, consonants_gop, syllable_gop = [], [], [] syllable_idx = 0 # determine which syllable current phoneme is in phn_idx = 0 syllable_phn_gop = [] for intervals in textgrid.tiers[1]: # vowels and consonants if intervals.mark is not None: if intervals.mark != 'SIL' and intervals.mark != 'SPN': if intervals.mark in vowels: vowel_gop.append(gop_vals[phn_idx]) elif intervals.mark in consonants: consonants_gop.append(gop_vals[phn_idx]) else: continue # syllables if syllable_idx < len(syllables): current_syllable = syllables[syllable_idx][1] + syllables[syllable_idx][2] + syllables[syllable_idx][3] if intervals.mark is not None: if intervals.mark != 'SIL' and intervals.mark != 'SPN': if intervals.mark in current_syllable: syllable_phn_gop.append(gop_vals[phn_idx]) if intervals.mark == current_syllable[-1]: syllable_idx += 1 syllable_gop.append(np.mean(syllable_phn_gop)) syllable_phn_gop = [] if intervals.mark is not None: phn_idx += 1 if not vowel_gop: vowel_gop.append(1) if not consonants_gop: consonants_gop.append(1) if not syllable_gop: syllable_gop.append(1) return [np.mean(vowel_gop), np.mean(consonants_gop), np.mean(syllable_gop)], ["gop_avgV", "gop_avgC", "gop_avgSyl"]
def main(text_grid_filename, output_label): if os.path.exists(text_grid_filename): t = TextGrid() t.read(text_grid_filename) onset = t._TextGrid__tiers[0]._IntervalTier__intervals[1]._Interval__xmin offset = t._TextGrid__tiers[0]._IntervalTier__intervals[1]._Interval__xmax f = open(output_label, 'w') onset_i = np.ceil(onset * 100 * 2) # extract every 5 ms offset_i = np.floor(offset * 100 * 2) # extract every 5 ms f.write('1 2\n') f.write(str(onset_i) + ' ' + str(offset_i) + '\n')
def compute_gamma(self, ref_tg: TextGrid, target_tg: TextGrid) -> Optional[float]: continuum = Continuum() for annot in ref_tg.getFirst(self.name): continuum.add("ref", Segment(annot.minTime, annot.maxTime)) for annot in target_tg.getFirst(self.name): continuum.add("target", Segment(annot.minTime, annot.maxTime)) dissim = PositionalSporadicDissimilarity(delta_empty=1) gamma_results = continuum.compute_gamma(dissim, n_samples=10, precision_level="medium") return gamma_results.gamma
def create_grid(wav_file: Path, text: str, tier_name: str, n_digits: int) -> TextGrid: assert wav_file.is_file() assert len(text) > 0 duration_s = get_wav_duration_s(wav_file) duration_s = round(duration_s, n_digits) result = TextGrid(None, 0, duration_s) tier = IntervalTier(tier_name, 0, duration_s) symbols = list(text) tier.intervals.extend(get_intervals(symbols, duration_s, n_digits)) result.append(tier) return result
def fixTiers(source, tierlist, outfile): """Takes list of TextGrids, file with new tier orders, list of output file names, returns TextGrids with new tier order""" for line, tier, out in zip(source.readlines(), tierlist, outfile.readlines()): f = line.rstrip('\n') oldtg = TextGridFromFile(f) list_from_file = eval(tier) output = out.rstrip('\n') newtg = TextGrid('newtg') for n in list_from_file: ntier = oldtg.getFirst(n) newtg.append(ntier) newtg.write(output)
def main(text_grid_filename, output_label): if os.path.exists(text_grid_filename): t = TextGrid() t.read(text_grid_filename) onset = t._TextGrid__tiers[0]._IntervalTier__intervals[ 1]._Interval__xmin offset = t._TextGrid__tiers[0]._IntervalTier__intervals[ 1]._Interval__xmax f = open(output_label, 'w') onset_i = np.ceil(onset * 100 * 2) # extract every 5 ms offset_i = np.floor(offset * 100 * 2) # extract every 5 ms f.write('1 2\n') f.write(str(onset_i) + ' ' + str(offset_i) + '\n')
def compute_gamma(self, ref_tg: TextGrid, target_tg: TextGrid): continuum = Continuum() for annot in ref_tg.getFirst(self.name): continuum.add("ref", Segment(annot.minTime, annot.maxTime), annot.mark) for annot in target_tg.getFirst(self.name): continuum.add("target", Segment(annot.minTime, annot.maxTime), annot.mark) dissim = CombinedCategoricalDissimilarity(alpha=1, beta=1) gamma_results = continuum.compute_gamma(dissim, n_samples=10, precision_level="medium") return gamma_results.gamma
def get_word_set(corpus, include_bracketed=False): word_set = corpus.word_set decode_error_files = [] textgrid_read_errors = {} for file_path in corpus.transcriptions_without_wavs: if file_path.endswith('.lab'): try: text = load_text(file_path) except UnicodeDecodeError: decode_error_files.append(file_path) continue words = parse_transcription(text) word_set.update(words) else: tg = TextGrid() try: tg.read(file_path) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_read_errors[file_path] = '\n'.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) continue for i, ti in enumerate(tg.tiers): if ti.name.lower() == 'notes': continue if not isinstance(ti, IntervalTier): continue for interval in ti: text = interval.mark.lower().strip() words = parse_transcription(text) if not words: continue word_set.update(words) if decode_error_files: print( 'WARNING: The following files were not able to be decoded using utf8:\n\n' '{}'.format('\n'.join(decode_error_files))) if textgrid_read_errors: print( 'WARNING: The following TextGrid files were not able to be read:\n\n' '{}'.format('\n'.join(textgrid_read_errors.keys()))) print( 'Generating transcriptions for the {} word types found in the corpus...' .format(len(word_set))) if not include_bracketed: word_set = [x for x in word_set if not check_bracketed(x)] return word_set
def gen_merged_times(self): """Merges times""" merged_times_tg, merge_results = self.check_times_merging() new_tg = TextGrid(name=merged_times_tg.name, maxTime=merged_times_tg.maxTime, minTime=merged_times_tg.minTime) for tier_name in self.checking_scheme.all_tiers_names: merged_tier: IntervalTier = deepcopy(merged_times_tg.getFirst(tier_name)) target_tier: IntervalTier = deepcopy(self.textgrid.getFirst(tier_name + "-target")) merged_tier.name = tier_name + "-merged" new_tg.append(merged_tier) new_tg.append(target_tier) return new_tg, merge_results
def convert_textgrid(tg_old, sil): tg_new = TextGrid(tg_old.start, tg_old.end) phontier = Tier('phones', tg_old['PHONEMES'].start, tg_old['PHONEMES'].end, 'Interval', [Interval(x.start, x.end, convert_mark(x.mark, sil)) for x in tg_old['PHONEMES']]) wordtier = Tier('words', tg_old['WORDS'].start, tg_old['WORDS'].end, 'Interval', [Interval(x.start, x.end, convert_mark(x.mark, sil)) for x in tg_old['WORDS']]) tg_new.append_tier(wordtier) tg_new.append_tier(phontier) return tg_new
def check_integrity(f): """Check for the integrity""" gridobj = TextGrid(get_grid(f)) # Check if tiers are OK print "\tChecking proper tier names..." assert gridobj.tiers[0].nameid == 'diph' assert gridobj.tiers[1].nameid == 'point' assert gridobj.tiers[2].nameid == 'word' # Check number of words and diphthings print "\tChecking if the tiers contain 32 nonempty items..." diph, word = get_nonempty(gridobj.tiers[0]), get_nonempty(gridobj.tiers[2]) assert len(word) == 32 assert len(diph) == 32 print "\tChecking that the number of empty and nonempty tiers is the same..." assert len(gridobj.tiers[0].simple_transcript) == len( gridobj.tiers[2].simple_transcript) print "\tChecking if all tiers have valid text..." all_there(diph, 'diph') all_there(word, 'word') print "\tChecking if the diphthongs have pairs..." unique(diph) print "\tChecking if all words are present..." checkwords(word) print "\tChecking if the words and diphthongs match..." all_matches(diph, word) print "\tChecking if the number of intervals is 64..." intervals = gridobj.tiers[1].simple_transcript assert len(intervals) == 64 print "\tChecking if intervals match diphthongs..." check_intervals_match(diph, intervals)
def maus_annotations(tgfile, corpusid, itemid): """Read annotations from a MAUS generated TextGrid file and generate a collection of annotation objects""" collection = AnnotationCollection([], corpusid, itemid, SecondAnnotation) tiers = {'MAU': MAUS.phonetic, 'ORT': MAUS.orthographic, 'KAN': MAUS.canonical, } tg = TextGrid.load(tgfile) for i, tier in enumerate(tg): # generate annotations for this tier last = None for row in tier.simple_transcript: (start, end, label) = row if label == "": label = "#" ann = collection.add_annotation(tiers[tier.tier_name()], label, start, end) if last != None: last.set_next(ann) last = ann collection.link_children(tiers['ORT'], tiers['KAN']) collection.link_children(tiers['ORT'], tiers['MAU']) return collection
def load_5tier_grids_for_stories(stories, rootdir): grids = dict() for story in stories: storydir = os.path.join(rootdir, [sd for sd in os.listdir(rootdir) if sd.startswith(story)][0]) storyfile = os.path.join(storydir, [sf for sf in os.listdir(storydir) if sf.endswith("TextGrid")][0]) grids[story] = TextGrid(open(storyfile).read()) return grids
def loadOrGenerate(self): fname = self.app.Data.checkFileLevel('.TextGrid', shoulderror=False) if fname: self.TextGrid = self.fromFile(fname) else: minTime = 0. if not hasattr(self.app.Audio, 'duration'): self.app.Audio.reset() maxTime = self.app.Audio.duration self.TextGrid = TextGridFile(maxTime=maxTime) sentenceTier = IntervalTier("text") sentenceTier.add(minTime, maxTime, "text") self.TextGrid.tiers.append(sentenceTier) fname = self.app.Data.unrelativize( self.app.Data.getCurrentFilename() + '.TextGrid') self.app.Data.setFileLevel('.TextGrid', fname) names = self.TextGrid.getNames() for i, n in enumerate(names): if n in ALIGNMENT_TIER_NAMES: if len(self.TextGrid[i]) == 0: self.TextGrid.pop(i) break else: return self.genFramesTier()
def get_annotation(monkey, include_noise=False): """Get the annotation for monkey :param include_noise: do not exclude noise intervals :return dict from filename to list of Fragments """ monkeydir = path.join(BASEDIR, monkey) annot = defaultdict(list) for tgfile in rglob(path.join(monkeydir, 'textgrids'), '*.TextGrid'): filename = path.splitext(path.basename(tgfile))[0] if not path.exists(path.join(monkeydir, 'audio', filename + '.wav')): print 'missing audio file:', monkey, filename + '.wav' continue tg = TextGrid.read(tgfile) tier = tg.tiers[0] for interval in tier: mark = interval.mark.strip() if mark == '' and not include_noise: continue fragment = Fragment( filename, Interval(interval.start - tier.start, interval.end - tier.start), mark) annot[filename].append(fragment) return annot
def load_grid(story, grid_dir="data/grids"): """Loads the TextGrid for the given [story] from the directory [grid_dir]. The first file that starts with [story] will be loaded, so if there are multiple versions of a grid for a story, beward. """ gridfile = [os.path.join(grid_dir, gf) for gf in os.listdir(grid_dir) if gf.startswith(story)][0] return TextGrid(open(gridfile).read())
def loadOrGenerate(self): fname = self.app.Data.checkFileLevel('.TextGrid', shoulderror=False) if fname: self.TextGrid = self.fromFile(fname) else: minTime = 0. if not hasattr(self.app.Audio, 'duration'): self.app.Audio.reset() try: maxTime = self.app.Audio.duration except: warn( 'Audio has no duration attribute after calling reset(), defaulting to 1 second' ) maxTime = 1. self.TextGrid = TextGridFile(maxTime=maxTime) keys = self.app.Data.getFileLevel('all') if not ('.ult' in keys and '.txt' in keys): sentenceTier = IntervalTier("text") sentenceTier.add(minTime, maxTime, "text") self.TextGrid.append(sentenceTier) fname = self.app.Data.unrelativize( self.app.Data.getCurrentFilename() + '.TextGrid') self.app.Data.setFileLevel('.TextGrid', fname) names = self.TextGrid.getNames() for i, n in enumerate(names): if n in ALIGNMENT_TIER_NAMES: if len(self.TextGrid[i]) == 0: self.TextGrid.pop(i) break else: self.frameTierName = n return self.genFramesTier()
def read_textgrid(file, tier): tg = TextGrid() tg.read(file) if len(tg.tiers) <= tier: raise IOError('Texgrid file ' + file + ' doesn\'t have enough tiers to get tier: ' + str(tier)) if not hasattr(tg.tiers[tier], 'intervals'): raise IOError('The selected tier: ' + str(tier) + ' is not and IntervalTier in file: ' + file) ret = [] for seg in tg.tiers[tier].intervals: ret.append(Segment(seg.mark, seg.minTime, seg.duration())) return ret
def createNew(textgrid, tier_name, VERBOSE=False): tiers = textgrid.getList(tier_name) tier = tiers[0] new_tier = IntervalTier(tier_name+'_clean') new_txtgrid = TextGrid() if VERBOSE == True: print ("Old tier: %s" % tier) for interval in tier: if isPause(interval.mark) == True: new_tier.add(interval.minTime, interval.maxTime, '') else: new_tier.add(interval.minTime, interval.maxTime, fixString(interval.mark)) new_txtgrid.append(new_tier) if VERBOSE == True: print ("New tier: %s" % new_tier) return new_txtgrid
def read_tg_from_str(tg_str, round_digits=DEFAULT_TEXTGRID_PRECISION): """ Read the tiers contained in the Praat-formatted string tg_str into a TextGrid object. Times are rounded to the specified precision. Adapted from TextGrid.read() """ source = StringIO(tg_str) tg = TextGrid() file_type, short = parse_header(source) if file_type != "TextGrid": raise ValueError("The file could not be parsed as a TextGrid as it is " "lacking a proper header.") tg.minTime = parse_line(source.readline(), short, round_digits) tg.maxTime = parse_line(source.readline(), short, round_digits) source.readline() # More header junk if short: m = int(source.readline().strip()) # Will be tg.n else: m = int(source.readline().strip().split()[2]) # Will be tg.n if not short: source.readline() for i in range(m): # Loop over grids if not short: source.readline() if parse_line(source.readline(), short, round_digits) == "IntervalTier": inam = parse_line(source.readline(), short, round_digits) imin = parse_line(source.readline(), short, round_digits) imax = parse_line(source.readline(), short, round_digits) itie = IntervalTier(inam, imin, imax) itie.strict = tg.strict n = int(parse_line(source.readline(), short, round_digits)) for j in range(n): if not short: source.readline().rstrip().split() # Header junk jmin = parse_line(source.readline(), short, round_digits) jmax = parse_line(source.readline(), short, round_digits) jmrk = get_mark(source, short) if jmin < jmax: # Non-null itie.addInterval(Interval(jmin, jmax, jmrk)) tg.append(itie) else: # PointTier inam = parse_line(source.readline(), short, round_digits) imin = parse_line(source.readline(), short, round_digits) imax = parse_line(source.readline(), short, round_digits) itie = PointTier(inam) n = int(parse_line(source.readline(), short, round_digits)) for j in range(n): source.readline().rstrip() # Header junk jtim = parse_line(source.readline(), short, round_digits) jmrk = get_mark(source, short) itie.addPoint(Point(jtim, jmrk)) tg.append(itie) return tg
def _load_tier( grid: textgrid.TextGrid, tier: str = 'phones', clean: bool = True, ): """Load one or more tiers as textgrid Tier object""" names = [name for name in grid.getNames() if fnmatch.fnmatch(name.lower(), tier.lower())] if len(names) != 1: available = ', '.join(grid.getNames()) raise IOError(f"{len(names)} tiers match {tier!r} in {grid.name or grid}. Availabe tiers: {available}") tier = grid.getFirst(names[0]) if clean: for item in tier: item.mark = item.mark.strip() if item.mark in SILENCE: item.mark = ' ' return tier
def open_str_textgrid(textgrid_str: str) -> TextGrid: """Since the textgrid librairy only can open TextGrid from an actual file (and not a TextIOWrapper type of object), this function enables us to parse a TextGrid directly from a string, using a temporary file.""" with NamedTemporaryFile(mode="w") as temptg: temptg.write(textgrid_str) temptg.flush() return TextGrid.fromFile(temptg.name)
def parse_discourse(self, path, types_only=False): ''' Parse a TextGrid file for later importing. Parameters ---------- path : str Path to TextGrid file Returns ------- :class:`~polyglotdb.io.discoursedata.DiscourseData` Parsed data from the file ''' tg = TextGrid() tg.read(path) if not self._is_valid(tg): raise ( TextGridError('This file cannot be parsed by the MFA parser.')) name = os.path.splitext(os.path.split(path)[1])[0] if self.speaker_parser is not None: speaker = self.speaker_parser.parse_path(path) else: speaker = None for a in self.annotation_types: a.reset() a.speaker = speaker #Parse the tiers for i, ti in enumerate(tg.tiers): if ti.name == 'words': self.annotation_types[0].add( ((x.mark.strip(), x.minTime, x.maxTime) for x in ti)) elif ti.name == 'phones': self.annotation_types[1].add( ((x.mark.strip(), x.minTime, x.maxTime) for x in ti)) pg_annotations = self._parse_annotations(types_only) data = DiscourseData(name, pg_annotations, self.hierarchy) for a in self.annotation_types: a.reset() data.wav_path = find_wav_path(path) return data
def guess_textgrid_format(path): """ Given a directory, tries to guess what format the textgrids are in Parameters ---------- path : str the path of the directory containing the textgrids Returns ------- str or None textgrid format or None if file is not textgrid and directory doesn't contain textgrids """ from .inspect import inspect_labbcat, inspect_mfa, inspect_fave if os.path.isdir(path): counts = {'mfa': 0, 'labbcat': 0, 'fave': 0, None: 0} for root, subdirs, files in os.walk(path): for f in files: if not f.lower().endswith('.textgrid'): continue tg_path = os.path.join(root, f) tg = TextGrid() try: tg.read(tg_path) except ValueError as e: raise (TextGridError( 'The file {} could not be parsed: {}'.format( tg_path, str(e)))) labbcat_parser = inspect_labbcat(tg_path) mfa_parser = inspect_mfa(tg_path) fave_parser = inspect_fave(tg_path) if labbcat_parser._is_valid(tg): counts['labbcat'] += 1 elif mfa_parser._is_valid(tg): counts['mfa'] += 1 elif fave_parser._is_valid(tg): counts['fave'] += 1 else: counts[None] += 1 return max(counts.keys(), key=lambda x: counts[x]) elif path.lower().endswith('.textgrid'): tg = TextGrid() tg.read(path) labbcat_parser = inspect_labbcat(path) mfa_parser = inspect_mfa(path) fave_parser = inspect_fave(path) if labbcat_parser._is_valid(tg): return 'labbcat' elif mfa_parser._is_valid(tg): return 'mfa' elif fave_parser._is_valid(tg): return 'fave' return None
def parse_discourse(self, path): ''' Parse a TextGrid file for later importing. Parameters ---------- path : str Path to TextGrid file Returns ------- :class:`~polyglotdb.io.discoursedata.DiscourseData` Parsed data from the file ''' tg = TextGrid() tg.read(path) if len(tg.tiers) != len(self.annotation_types): raise(TextGridError("The TextGrid ({}) does not have the same number of interval tiers as the number of annotation types specified.".format(path))) name = os.path.splitext(os.path.split(path)[1])[0] if self.speaker_parser is not None: speaker = self.speaker_parser.parse_path(path) else: speaker = None for a in self.annotation_types: a.reset() a.speaker = speaker #Parse the tiers for i, ti in enumerate(tg.tiers): if isinstance(ti, IntervalTier): self.annotation_types[i].add(((x.mark.strip(), x.minTime, x.maxTime) for x in ti)) else: self.annotation_types[i].add(((x.mark.strip(), x.time) for x in ti)) pg_annotations = self._parse_annotations() data = DiscourseData(name, pg_annotations, self.hierarchy) for a in self.annotation_types: a.reset() data.wav_path = find_wav_path(path) return data
def parseFile(path, fn): filename= fn.split(".")[0] #just name of file with open(path, "r") as f1: lines = f1.readlines() SAM = getSAM(lines) allSegs = getMAU(lines, SAM, filename) if allSegs is None: return segs = [] for seg in allSegs: #print("%f %f %s %s"%(seg.start, seg.end, seg.segment, seg.index)) tup = getSegInfo(seg) segs.append(tup) words = getWords(lines, allSegs, filename) maxtime = getMaxTime(allSegs) if maxtime == -1: return tg = TextGrid(maxTime = maxtime) wordtier = IntervalTier(name = 'words', maxTime = maxtime) phonetier = IntervalTier(name = 'phones', maxTime = maxtime) for interval in words: wordtier.add(*interval) for interval in segs: phonetier.add(*interval) tg.append(wordtier) tg.append(phonetier) outpath = "/Users/elias/Desktop/TextGrids/%s.TextGrid"%filename tg.write(outpath)
def ctm_to_textgrid(word_ctm, phone_ctm, out_directory, corpus, dictionary, frameshift=0.01): textgrid_write_errors = {} frameshift = Decimal(str(frameshift)) if not os.path.exists(out_directory): os.makedirs(out_directory, exist_ok=True) silences = {dictionary.optional_silence, dictionary.nonoptional_silence} for i, (filename, speaker_dict) in enumerate(sorted(word_ctm.items())): maxtime = corpus.get_wav_duration(filename) try: speaker_directory = os.path.join( out_directory, corpus.file_directory_mapping[filename]) tg = TextGrid(maxTime=maxtime) for speaker in corpus.speaker_ordering[filename]: words = speaker_dict[speaker] word_tier_name = '{} - words'.format(speaker) phone_tier_name = '{} - phones'.format(speaker) word_tier = IntervalTier(name=word_tier_name, maxTime=maxtime) phone_tier = IntervalTier(name=phone_tier_name, maxTime=maxtime) for w in words: word_tier.add(*w) for p in phone_ctm[filename][speaker]: if len(phone_tier) > 0 and phone_tier[ -1].mark in silences and p[2] in silences: phone_tier[-1].maxTime = p[1] else: if len(phone_tier) > 0 and p[2] in silences and p[ 0] < phone_tier[-1].maxTime: p = phone_tier[-1].maxTime, p[1], p[2] elif len(phone_tier) > 0 and p[2] not in silences and p[0] < phone_tier[-1].maxTime and \ phone_tier[-1].mark in silences: phone_tier[-1].maxTime = p[0] phone_tier.add(*p) tg.append(word_tier) tg.append(phone_tier) tg.write(os.path.join(speaker_directory, filename + '.TextGrid')) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_write_errors[filename] = '\n'.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) if textgrid_write_errors: error_log = os.path.join(out_directory, 'output_errors.txt') with open(error_log, 'w', encoding='utf8') as f: f.write( 'The following exceptions were encountered during the ouput of the alignments to TextGrids:\n\n' ) for k, v in textgrid_write_errors.items(): f.write('{}:\n'.format(k)) f.write('{}\n\n'.format(v))
def get_word_set(corpus, include_bracketed=False): word_set = corpus.word_set decode_error_files = [] textgrid_read_errors = {} for file_path in corpus.transcriptions_without_wavs: if file_path.endswith('.lab'): try: text = load_text(file_path) except UnicodeDecodeError: decode_error_files.append(file_path) continue words = parse_transcription(text) word_set.update(words) else: tg = TextGrid() try: tg.read(file_path) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_read_errors[file_path] = '\n'.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) continue for i, ti in enumerate(tg.tiers): if ti.name.lower() == 'notes': continue if not isinstance(ti, IntervalTier): continue for interval in ti: text = interval.mark.lower().strip() words = parse_transcription(text) if not words: continue word_set.update(words) if decode_error_files: print('WARNING: The following files were not able to be decoded using utf8:\n\n' '{}'.format('\n'.join(decode_error_files))) if textgrid_read_errors: print('WARNING: The following TextGrid files were not able to be read:\n\n' '{}'.format('\n'.join(textgrid_read_errors.keys()))) print('Generating transcriptions for the {} word types found in the corpus...'.format(len(word_set))) if not include_bracketed: word_set = [x for x in word_set if not check_bracketed(x)] return word_set
def parse_discourse(self, path, types_only = False): ''' Parse a TextGrid file for later importing. Parameters ---------- path : str Path to TextGrid file Returns ------- :class:`~polyglotdb.io.discoursedata.DiscourseData` Parsed data from the file ''' tg = TextGrid() tg.read(path) if not self._is_valid(tg): raise(TextGridError('This file cannot be parsed by the MFA parser.')) name = os.path.splitext(os.path.split(path)[1])[0] if self.speaker_parser is not None: speaker = self.speaker_parser.parse_path(path) else: speaker = None for a in self.annotation_types: a.reset() a.speaker = speaker #Parse the tiers for i, ti in enumerate(tg.tiers): if ti.name == 'words': self.annotation_types[0].add(((x.mark.strip(), x.minTime, x.maxTime) for x in ti)) elif ti.name == 'phones': self.annotation_types[1].add(((x.mark.strip(), x.minTime, x.maxTime) for x in ti)) pg_annotations = self._parse_annotations(types_only) data = DiscourseData(name, pg_annotations, self.hierarchy) for a in self.annotation_types: a.reset() data.wav_path = find_wav_path(path) return data
def main(): argparser = ArgumentParser(description="Remove pauses from a specific tier of .TextGrid files.") argparser.add_argument('-r', '--r', nargs=2, metavar=('PATH', 'TIER_NAME'), help='Path to .TextGrid file and name of tier.') argparser.add_argument('-t', '--t', nargs=1, metavar='target', help='Path to target file for cleaned textgrid.') argparser.add_argument('-v', '--v', dest='isVerbose', action='store_const', const=True, default=False, help='Make verbose.') args = argparser.parse_args() txtgrid = TextGrid.fromFile(args.r[0]) clean_txtgrid = createNew(txtgrid, args.r[1], args.isVerbose) target = args.t[0] print (target) if (op.isfile(target)): clean_txtgrid.write(target) else: print ("Target file '%s' does not exist." % (target))
def compareAll(available, closeness=0.02): results = [] names = [] dictionary = getDictionary(dirParent + '/' + dirParams + '/dictionaryDKMapped1.dict') for quad in available: list1 = getWithExtension(quad[0], 'TextGrid') list2 = getWithExtension(quad[1], 'TextGrid') total = 0 amount = 0 for file in list1: if file in list2: first = TextGrid.fromFile(quad[0]+file) second = TextGrid.fromFile(quad[1]+file) # result = evaluate_skip_some(first, second, dictionary, closeness) result = smart_evaluate(first, second, closeness) results.append(result) names.append(file) print ("-----------\n%s\n%s\n%sResult=%.4f%s\n-----------" % (quad[0]+file, quad[1]+file, bcolors.OKGREEN, result, bcolors.ENDC)) total = total + result amount = amount + 1 print ("%s\nvs.\n%s" % (quad[0], quad[1])) print ("%sAverage result=%.4f%s" % (bcolors.OKBLUE, (total / float(amount)), bcolors.ENDC)) return results, names
def guess_textgrid_format(path): """ Given a directory, tries to guess what format the textgrids are in Parameters ---------- path : str the path of the directory containing the textgrids Returns ------- str or None textgrid format or None if file is not textgrid and directory doesn't contain textgrids """ from .inspect import inspect_labbcat, inspect_mfa, inspect_fave if os.path.isdir(path): counts = {'mfa': 0, 'labbcat': 0, 'fave': 0, None: 0} for root, subdirs, files in os.walk(path): for f in files: if not f.lower().endswith('.textgrid'): continue tg_path = os.path.join(root, f) tg = TextGrid() try: tg.read(tg_path) except ValueError as e: raise(TextGridError('The file {} could not be parsed: {}'.format(tg_path, str(e)))) labbcat_parser = inspect_labbcat(tg_path) mfa_parser = inspect_mfa(tg_path) fave_parser = inspect_fave(tg_path) if labbcat_parser._is_valid(tg): counts['labbcat'] += 1 elif mfa_parser._is_valid(tg): counts['mfa'] += 1 elif fave_parser._is_valid(tg): counts['fave'] += 1 else: counts[None] += 1 return max(counts.keys(), key = lambda x: counts[x]) elif path.lower().endswith('.textgrid'): tg = TextGrid() tg.read(path) labbcat_parser = inspect_labbcat(path) mfa_parser = inspect_mfa(path) fave_parser = inspect_fave(path) if labbcat_parser._is_valid(tg): return 'labbcat' elif mfa_parser._is_valid(tg): return 'mfa' elif fave_parser._is_valid(tg): return 'fave' return None
def readTextGrid(tgAddress, images): global dbTimeInTG phonePattern = re.compile('(^[Pp]hone(s{0,1})$)|(^[Ss]egment(s{0,1})$)') wordPattern = re.compile('^[Ww]ord(s{0,1})$') start = time.time() tg = TextGrid() tg.read(tgAddress) duration = tg.maxTime - tg.minTime imageLength = duration/len(images) words = [] tiers = tg.tiers if not wordPattern.match(tg.getNames()[0]): tiers = reversed(tg.tiers) #This loop handles words and segments at once, because many of the operations are shared for tier in tiers: imageCounter = -1 wordCounter = 0 if not wordPattern.match(tier.name) and not phonePattern.match(tier.name): print ("Unexpected tier name: "+tier.name) else: if wordPattern.match(tier.name): tierType = 'Words' else: tierType = 'Segments' for interval in tier.intervals: mark = interval.mark simpleSpelling = getSimpleSpelling(mark) if len(mark)<1 and tierType=='Words': continue #Add the new segment/word to the DB if len(mark)>0: wordOrSeg = addIntervalToDB(mark,simpleSpelling,tierType) #the third arg tells whether it is a word or a segment else: continue #assign the segment/word to all of the images it covers: if imageCounter>0: imageCounter -= 1 #So in the beginning of each interval we first check the last image we saw. It may need more than one seg. while True: # if imageCounter%100==0: # print("annotating image: "+str(imageCounter)) imageCounter += 1 if imageCounter>=len(images): break image = images[imageCounter] imageMin = imageCounter*imageLength imageMax = (imageCounter+1)*imageLength imageCenter = (imageMin+imageMax)/2 #image center inside interval: if imageMax<interval.minTime: continue if imageCenter>interval.minTime and imageCenter<interval.maxTime: assignMainObject(image,wordOrSeg,tierType) #image has startSegment: elif imageCenter>interval.maxTime and imageMin<interval.maxTime: assignStartObject(image,wordOrSeg,tierType) #image has endSegment: elif imageCenter<interval.minTime and imageMax>interval.minTime: assignEndObject(image,wordOrSeg,tierType) #image occurs after the interval else: imageCounter -= 1 break #Save it somewhere if it is a word: if tierType=='Words': word = WordEntry("","",interval.maxTime,wordOrSeg.id) words.append(word) #Now assign the segment to the word if it is a segment if tierType=='Segments': #Increment the words until you reach a word that covers the current segment: while words[wordCounter].maxTime<=interval.minTime: wordCounter += 1 word = words[wordCounter] id = wordOrSeg.id if len(mark.strip())<1: simpleSpelling = "0" id = "0" word.addSegment(str(id),simpleSpelling) #Now that we're done reading the TextGrid file, it's time to add the segment sequence for each word to the DB for word in words: word.updateSegmentsInDB() end = time.time() file = open('log.txt', 'a') elapsed = str(end-start) file.write("textgrid: \t"+elapsed+'\n') file.write("dbInTG: \t"+str(dbTimeInTG)+'\n') file.close()
def ctm_to_textgrid(word_ctm, phone_ctm, out_directory, corpus, dictionary, frameshift=0.01): textgrid_write_errors = {} frameshift = Decimal(str(frameshift)) if not os.path.exists(out_directory): os.makedirs(out_directory, exist_ok=True) if not corpus.segments: for i, (k, v) in enumerate(sorted(word_ctm.items())): maxtime = Decimal(str(corpus.get_wav_duration(k))) speaker = list(v.keys())[0] v = list(v.values())[0] try: tg = TextGrid(maxTime=maxtime) wordtier = IntervalTier(name='words', maxTime=maxtime) phonetier = IntervalTier(name='phones', maxTime=maxtime) for interval in v: if maxtime - interval[1] < frameshift: # Fix rounding issues interval[1] = maxtime wordtier.add(*interval) for interval in phone_ctm[k][speaker]: if maxtime - interval[1] < frameshift: interval[1] = maxtime phonetier.add(*interval) tg.append(wordtier) tg.append(phonetier) relative = corpus.file_directory_mapping[k] if relative: speaker_directory = os.path.join(out_directory, relative) else: speaker_directory = out_directory os.makedirs(speaker_directory, exist_ok=True) outpath = os.path.join(speaker_directory, k + '.TextGrid') tg.write(outpath) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_write_errors[k] = '\n'.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) else: silences = {dictionary.optional_silence, dictionary.nonoptional_silence} for i, (filename, speaker_dict) in enumerate(sorted(word_ctm.items())): maxtime = corpus.get_wav_duration(filename) try: speaker_directory = os.path.join(out_directory, corpus.file_directory_mapping[filename]) tg = TextGrid(maxTime=maxtime) for speaker in corpus.speaker_ordering[filename]: words = speaker_dict[speaker] word_tier_name = '{} - words'.format(speaker) phone_tier_name = '{} - phones'.format(speaker) word_tier = IntervalTier(name=word_tier_name, maxTime=maxtime) phone_tier = IntervalTier(name=phone_tier_name, maxTime=maxtime) for w in words: word_tier.add(*w) for p in phone_ctm[filename][speaker]: if len(phone_tier) > 0 and phone_tier[-1].mark in silences and p[2] in silences: phone_tier[-1].maxTime = p[1] else: if len(phone_tier) > 0 and p[2] in silences and p[0] < phone_tier[-1].maxTime: p = phone_tier[-1].maxTime, p[1], p[2] elif len(phone_tier) > 0 and p[2] not in silences and p[0] < phone_tier[-1].maxTime and \ phone_tier[-1].mark in silences: phone_tier[-1].maxTime = p[0] phone_tier.add(*p) tg.append(word_tier) tg.append(phone_tier) tg.write(os.path.join(speaker_directory, filename + '.TextGrid')) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() textgrid_write_errors[filename] = '\n'.join(traceback.format_exception(exc_type, exc_value, exc_traceback)) if textgrid_write_errors: error_log = os.path.join(out_directory, 'output_errors.txt') with open(error_log, 'w', encoding='utf8') as f: f.write('The following exceptions were encountered during the ouput of the alignments to TextGrids:\n\n') for k,v in textgrid_write_errors.items(): f.write('{}:\n'.format(k)) f.write('{}\n\n'.format(v))
tier_name = TIER_NAME close_enough = CLOSE_ENOUGH / 1000 argparser = ArgumentParser(description="Alignment quality evaluation") argparser.add_argument("-f", "--fudge", type=int, help="Fudge factor in milliseconds") argparser.add_argument("-t", "--tier", help="Name of tier to use") argparser.add_argument("OneGrid") argparser.add_argument("TwoGrid") args = argparser.parse_args() if args.fudge: close_enough = args.fudge / 1000 if args.tier: tier_name = args.tier # read in first = boundaries(TextGrid.fromFile(args.OneGrid), tier_name) secnd = boundaries(TextGrid.fromFile(args.TwoGrid), tier_name) # count concordant and discordant boundaries if len(first) != len(secnd): exit("Tiers lengths do not match.") concordant = 0 discordant = 0 for (boundary1, boundary2) in zip(first, secnd): if boundary1.transition != boundary2.transition: exit("Tier labels do not match.") if is_close_enough(boundary1.time, boundary2.time, close_enough): concordant += 1 else: discordant += 1 # print out agreement = concordant / (concordant + discordant)
def parse_discourse(self, path, types_only = False): ''' Parse a TextGrid file for later importing. Parameters ---------- path : str Path to TextGrid file Returns ------- :class:`~polyglotdb.io.discoursedata.DiscourseData` Parsed data from the file ''' tg = TextGrid() tg.read(path) if not self._is_valid(tg): raise(TextGridError('The file "{}" cannot be parsed by the FAVE parser.'.format(path))) name = os.path.splitext(os.path.split(path)[1])[0] dummy = self.annotation_types self.annotation_types = [] wav_path = find_wav_path(path) speaker_channel_mapping = {} if wav_path is not None: n_channels = get_n_channels(wav_path) if n_channels > 1: #Figure speaker-channel mapping n_tiers = 0 for ti in tg.tiers: try: speaker, type = ti.name.split(' - ') except ValueError: continue n_tiers += 1 ind = 0 cutoffs = [x/n_channels for x in range(1, n_channels)] print(cutoffs) for ti in tg.tiers: try: speaker, type = ti.name.split(' - ') except ValueError: continue if speaker in speaker_channel_mapping: continue print(ind/n_channels) for i, c in enumerate(cutoffs): print(c) if ind / n_channels < c: speaker_channel_mapping[speaker] = i break else: speaker_channel_mapping[speaker] = i + 1 ind += 1 #Parse the tiers for ti in tg.tiers: try: speaker, type = ti.name.split(' - ') except ValueError: continue if len(ti) == 1 and ti[0].mark.strip() == '': continue at = OrthographyTier(type, type) at.speaker = speaker at.add(((x.mark.strip(), x.minTime, x.maxTime) for x in ti)) self.annotation_types.append(at) pg_annotations = self._parse_annotations(types_only) data = DiscourseData(name, pg_annotations, self.hierarchy) data.speaker_channel_mapping = speaker_channel_mapping data.wav_path = wav_path self.annotation_types = dummy return data
import wave import sys,os from textgrid import TextGrid if len(sys.argv)<3: sys.exit() grid=os.path.abspath(sys.argv[1]) grid=TextGrid.fromFile(grid) wavPath=os.path.abspath(sys.argv[2]) print grid grid=grid[len(grid)-1] counter=0 print grid print "files loaded" for interval in grid: intervalStr=str(interval) start,end,mark=intervalStr[intervalStr.find("(")+1:intervalStr.find(")")].split(',') print start,end,mark start=float(start) end=float(end) mark=mark.split()[0] if mark!='sil' and mark!='sp': print "extracting :%s" %mark win= wave.open(wavPath, 'rb') t0,t1=start,end #t0, t1= 1.0, 2.0 # cut audio between one and two seconds s0, s1= int(t0*win.getframerate()), int(t1*win.getframerate()) win.readframes(s0) # discard
wordintervals.append(x) elif i == 1: for x in ti: x.maxTime += cur_dur x.minTime += cur_dur phoneintervals.append(x) cur_dur += maxtime words = IntervalTier(name='words') for i in wordintervals: words.addInterval(i) phones = IntervalTier(name='phones') for i in phoneintervals: phones.addInterval(i) tg1 = TextGrid(maxTime = cur_dur) tg1.append(words) tg1.append(phones) tg1.write(chapteroutpath1, null = '') speaker_tier = IntervalTier(name=speaker) for i in range(len(groupedwavfiles)): if i == 1: speaker_tier.add(0.0, wavfiletimes[0], groupedlabtext[0]) else: speaker_tier.add(wavfiletimes[i-2], wavfiletimes[i-1], groupedlabtext[i-1]) tg2 = TextGrid(maxTime = duration) tg2.append(speaker_tier) tg2.write(chapteroutpath2, null = '')
def doCleanUp(sourceDirectory, fileName, tierName, targetDirectory): txtgrid = TextGrid.fromFile(sourceDirectory+fileName) cleanTxtgrid = createNew(txtgrid, tierName) cleanTxtgrid.write(targetDirectory+fileName)
for subdir, dirs, files in os.walk(root): for file in unmatched: tmp = file[file.rfind('/')+1:] tmp = tmp[:-9]+'.flac' if tmp in files: print "yes" anFiles[file] = subdir+'/'+tmp ''' #splits wav files into acoustic feature files according to TextGrid content prog = 1 for key, value in anFiles.iteritems(): if len(value) > 0: print str(prog)+' /772' prog += 1 annot = TextGrid.load(key) origAudio = wave.open(value, 'r') frameRate = origAudio.getframerate() nChannels = origAudio.getnchannels() sampWidth = origAudio.getsampwidth() count = 0 #iterate over items/tiers #print key for i, tier in enumerate(annot): for (xmin, xmax, atype) in tier.simple_transcript: start = float(xmin) end = float(xmax)
def inspect_textgrid(path): """ Generate a :class:`~polyglotdb.io.parsers.textgrid.TextgridParser` for a specified TextGrid file Parameters ---------- path : str Full path to TextGrid file Returns ------- :class:`~polyglotdb.io.parsers.textgrid.TextgridParser` Autodetected parser for the TextGrid file """ trans_delimiters = [".", " ", ";", ","] textgrids = [] if os.path.isdir(path): for root, subdirs, files in os.walk(path): for filename in files: if not filename.lower().endswith(".textgrid"): continue textgrids.append(os.path.join(root, filename)) else: textgrids.append(path) anno_types = [] for t in textgrids: tg = TextGrid() tg.read(t) if len(anno_types) == 0: tier_guesses, hierarchy = guess_tiers(tg) for ti in tg.tiers: if ti.name not in tier_guesses: a = OrthographyTier("word", "word") a.ignored = True elif tier_guesses[ti.name] == "segment": a = SegmentTier(ti.name, tier_guesses[ti.name]) else: labels = uniqueLabels(ti) cat = guess_type(labels, trans_delimiters) if cat == "transcription": a = TranscriptionTier(ti.name, tier_guesses[ti.name]) a.trans_delimiter = guess_trans_delimiter(labels) elif cat == "numeric": if isinstance(ti, IntervalTier): raise (NotImplementedError) else: a = BreakIndexTier(ti.name, tier_guesses[ti.name]) elif cat == "orthography": if isinstance(ti, IntervalTier): a = OrthographyTier(ti.name, tier_guesses[ti.name]) else: a = TextOrthographyTier(ti.name, tier_guesses[ti.name]) elif cat == "tobi": a = TobiTier(ti.name, tier_guesses[ti.name]) elif cat == "grouping": a = GroupingTier(ti.name, tier_guesses[ti.name]) else: print(ti.name) print(cat) raise (NotImplementedError) if not a.ignored: try: a.add(((x.mark.strip(), x.minTime, x.maxTime) for x in ti), save=False) except AttributeError: a.add(((x.mark.strip(), x.time) for x in ti), save=False) anno_types.append(a) else: for i, ti in enumerate(tg.tiers): if anno_types[i].ignored: continue try: anno_types[i].add(((x.mark.strip(), x.minTime, x.maxTime) for x in ti), save=False) except AttributeError: anno_types[i].add(((x.mark.strip(), x.time) for x in ti), save=False) parser = TextgridParser(anno_types, hierarchy) return parser
def ctm_to_textgrid(word_ctm, phone_ctm, out_directory, corpus): if not os.path.exists(out_directory): os.makedirs(out_directory, exist_ok=True) if not corpus.segments: for i,(k,v) in enumerate(word_ctm.items()): maxtime = corpus.get_wav_duration(k) try: tg = TextGrid(maxTime = maxtime) wordtier = IntervalTier(name = 'words', maxTime = maxtime) phonetier = IntervalTier(name = 'phones', maxTime = maxtime) for interval in v: wordtier.add(*interval) for interval in phone_ctm[k]: phonetier.add(*interval) tg.append(wordtier) tg.append(phonetier) if corpus.speaker_directories: speaker_directory = os.path.join(out_directory, corpus.utt_speak_mapping[k]) else: speaker_directory = out_directory os.makedirs(speaker_directory, exist_ok=True) outpath = os.path.join(speaker_directory, k + '.TextGrid') tg.write(outpath) except ValueError as e: print('Could not write textgrid for {}'.format(k)) print(e) else: tgs = {} for i,(k,v) in enumerate(word_ctm.items()): rec = corpus.segments[k] rec, begin, end = rec.split(' ') maxtime = corpus.get_wav_duration(k) if rec not in tgs: tgs[rec] = TextGrid(maxTime = maxtime) tg = tgs[rec] begin = float(begin) speaker = corpus.utt_speak_mapping[k] word_tier_name = '{} - words'.format(speaker) phone_tier_name = '{} - phones'.format(speaker) wordtier = tg.getFirst(word_tier_name) if wordtier is None: wordtier = IntervalTier(name = word_tier_name, maxTime = maxtime) tg.append(wordtier) phonetier = tg.getFirst(phone_tier_name) if phonetier is None: phonetier = IntervalTier(name = phone_tier_name, maxTime = maxtime) tg.append(phonetier) for interval in v: interval = interval[0] + begin, interval[1] + begin, interval[2] wordtier.add(*interval) for interval in phone_ctm[k]: interval = interval[0] + begin, interval[1] + begin, interval[2] phonetier.add(*interval) for k,v in tgs.items(): outpath = os.path.join(out_directory, k + '.TextGrid') try: v.write(outpath) except ValueError as e: print('Could not write textgrid for {}'.format(k)) print(e)
bit = 31 snd = snd / float((2.**bit)) duration = snd.shape[0] / float(sampFreq) # Returns in milliseconds return duration if __name__ == '__main__': for file in os.listdir(wav_dir): if file.endswith(".wav"): print("Processing", file, "...") file_id = file.split(".")[0] speaker = file_id[:-2] duration = getWavLength(file, wav_dir) vowel = file_id[-2:] word = "h" + vowel + "d" tg = TextGrid(maxTime=duration) # Make a textgrid else: continue # Get the timeframe for the vowel with open(time_data, 'r') as times: time_list = times.readlines()[6:] for line in time_list: line_split = line.split() #print(line_split[0], file_id) if line_split[0] in file_id: start = float(line_split[1])/1000 end = float(line_split[2])/1000 break else: continue
def __init__(self, directory, output_directory, speaker_characters = 0, num_jobs = 3): log_dir = os.path.join(output_directory, 'logging') os.makedirs(log_dir, exist_ok = True) self.log_file = os.path.join(log_dir, 'corpus.log') logging.basicConfig(filename = self.log_file, level = logging.INFO) if not os.path.exists(directory): raise(CorpusError('The directory \'{}\' does not exist.'.format(directory))) if num_jobs < 1: num_jobs = 1 print('Setting up corpus information...') logging.info('Setting up corpus information...') self.directory = directory self.output_directory = os.path.join(output_directory, 'train') self.temp_directory = os.path.join(self.output_directory, 'temp') os.makedirs(self.temp_directory, exist_ok = True) self.num_jobs = num_jobs # Set up mapping dictionaries self.speak_utt_mapping = defaultdict(list) self.utt_speak_mapping = {} self.utt_wav_mapping = {} self.text_mapping = {} self.segments = {} self.feat_mapping = {} self.cmvn_mapping = {} self.ignored_utterances = [] self.word_set = set() feat_path = os.path.join(self.output_directory, 'feats.scp') if os.path.exists(feat_path): self.feat_mapping = load_scp(feat_path) if speaker_characters > 0: self.speaker_directories = False else: self.speaker_directories = True self.sample_rates = defaultdict(set) no_transcription_files = [] unsupported_sample_rate = [] for root, dirs, files in os.walk(self.directory, followlinks = True): for f in sorted(files): file_name, ext = os.path.splitext(f) if ext.lower() != '.wav': continue lab_name = find_lab(f, files) wav_path = os.path.join(root, f) sr = get_sample_rate(wav_path) if sr < 16000: unsupported_sample_rate.append(wav_path) continue if lab_name is not None: utt_name = file_name if self.feat_mapping and utt_name not in self.feat_mapping: self.ignored_utterances.append(utt_name) continue lab_path = os.path.join(root, lab_name) self.text_mapping[utt_name] = load_text(lab_path) self.word_set.update(self.text_mapping[utt_name].split()) if self.speaker_directories: speaker_id = os.path.basename(root) else: speaker_id = f[:speaker_characters] self.speak_utt_mapping[speaker_id].append(utt_name) self.utt_wav_mapping[utt_name] = wav_path self.sample_rates[get_sample_rate(wav_path)].add(speaker_id) self.utt_speak_mapping[utt_name] = speaker_id else: tg_name = find_textgrid(f, files) if tg_name is None: no_transcription_files.append(wav_path) continue tg_path = os.path.join(root, tg_name) tg = TextGrid() tg.read(tg_path) n_channels = get_n_channels(wav_path) num_tiers = len(tg.tiers) if n_channels == 2: A_name = file_name + "_A" B_name = file_name + "_B" A_path, B_path = extract_temp_channels(wav_path, self.temp_directory) elif n_channels > 2: raise(Exception('More than two channels')) if not self.speaker_directories: speaker_name = f[:speaker_characters] for i, ti in enumerate(tg.tiers): if ti.name.lower() == 'notes': continue if not isinstance(ti, IntervalTier): continue if self.speaker_directories: speaker_name = ti.name self.sample_rates[get_sample_rate(wav_path)].add(speaker_name) for interval in ti: label = interval.mark.lower().strip() if label == '': continue begin, end = round(interval.minTime, 4), round(interval.maxTime, 4) utt_name = '{}_{}_{}_{}'.format(speaker_name, file_name, begin, end) utt_name = utt_name.replace('.','_') if n_channels == 1: if self.feat_mapping and utt_name not in self.feat_mapping: self.ignored_utterances.append(utt_name) continue self.segments[utt_name] = '{} {} {}'.format(file_name, begin, end) self.utt_wav_mapping[file_name] = wav_path else: if i < num_tiers / 2: utt_name += '_A' if self.feat_mapping and utt_name not in self.feat_mapping: self.ignored_utterances.append(utt_name) continue self.segments[utt_name] = '{} {} {}'.format(A_name, begin, end) self.utt_wav_mapping[A_name] = A_path else: utt_name += '_B' if self.feat_mapping and utt_name not in self.feat_mapping: self.ignored_utterances.append(utt_name) continue self.segments[utt_name] = '{} {} {}'.format(B_name, begin, end) self.utt_wav_mapping[B_name] = B_path self.text_mapping[utt_name] = label self.word_set.update(label.split()) self.utt_speak_mapping[utt_name] = speaker_name self.speak_utt_mapping[speaker_name].append(utt_name) if len(self.ignored_utterances) > 0: print('{} utterance(s) were ignored due to lack of features, please see {} for more information.'.format(len(self.ignored_utterances), self.log_file)) logging.warning('The following utterances were ignored due to lack of features: {}. See relevant logs for more information'.format(', '.join(self.ignored_utterances))) if len(no_transcription_files) > 0: print('{} wav file(s) were ignored because neither a .lab file or a .TextGrid file could be found, please see {} for more information'.format(len(no_transcription_files), self.log_file)) logging.warning('The following wav files were ignored due to lack of of a .lab or a .TextGrid file: {}.'.format(', '.join(no_transcription_files))) if len(unsupported_sample_rate) > 0: print('{} wav file(s) were ignored because they had a sample rate less than 16000, which is not currently supported, please see {} for more information'.format(len(unsupported_sample_rate), self.log_file)) logging.warning('The following wav files were ignored due to a sample rate lower than 16000: {}.'.format(', '.join(unsupported_sample_rate))) bad_speakers = [] for speaker in self.speak_utt_mapping.keys(): count = 0 for k, v in self.sample_rates.items(): if speaker in v: count += 1 if count > 1: bad_speakers.append(speaker) if bad_speakers: msg = 'The following speakers had multiple speaking rates: {}. Please make sure that each speaker has a consistent sampling rate.'.format(', '.join(bad_speakers)) logging.error(msg) raise(SampleRateError(msg)) if len(self.speak_utt_mapping) < self.num_jobs: self.num_jobs = len(self.speak_utt_mapping) if self.num_jobs < len(self.sample_rates.keys()): self.num_jobs = len(self.sample_rates.keys()) msg = 'The number of jobs was set to {}, due to the different sample rates in the dataset. If you would like to use fewer parallel jobs, please resample all wav files to the same sample rate.'.format(self.num_jobs) print(msg) logging.warning(msg) self.find_best_groupings()
def __init__(self, directory, output_directory, speaker_characters=0, num_jobs=3, debug=False, ignore_exceptions=False): self.debug = debug log_dir = os.path.join(output_directory, 'logging') os.makedirs(log_dir, exist_ok=True) self.log_file = os.path.join(log_dir, 'corpus.log') root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) handler = logging.FileHandler(self.log_file, 'w', 'utf-8') handler.setFormatter = logging.Formatter('%(name)s %(message)s') root_logger.addHandler(handler) if not os.path.exists(directory): raise CorpusError('The directory \'{}\' does not exist.'.format(directory)) if not os.path.isdir(directory): raise CorpusError('The specified path for the corpus ({}) is not a directory.'.format(directory)) if num_jobs < 1: num_jobs = 1 print('Setting up corpus information...') root_logger.info('Setting up corpus information...') self.directory = directory self.output_directory = os.path.join(output_directory, 'corpus_data') self.temp_directory = os.path.join(self.output_directory, 'temp') os.makedirs(self.temp_directory, exist_ok=True) self.num_jobs = num_jobs # Set up mapping dictionaries self.speak_utt_mapping = defaultdict(list) self.utt_speak_mapping = {} self.utt_wav_mapping = {} self.text_mapping = {} self.word_counts = Counter() self.segments = {} self.feat_mapping = {} self.cmvn_mapping = {} self.ignored_utterances = [] self.wav_files = [] self.wav_durations = {} self.utterance_lengths = {} self.utterance_oovs = {} feat_path = os.path.join(self.output_directory, 'feats.scp') if os.path.exists(feat_path): self.feat_mapping = load_scp(feat_path) if speaker_characters == 0: self.speaker_directories = True else: self.speaker_directories = False self.sample_rates = defaultdict(set) self.no_transcription_files = [] self.decode_error_files = [] self.unsupported_sample_rate = [] self.wav_read_errors = [] self.textgrid_read_errors = {} self.transcriptions_without_wavs = [] self.file_directory_mapping = {} self.speaker_ordering = {} self.tg_count = 0 self.lab_count = 0 for root, dirs, files in os.walk(self.directory, followlinks=True): for f in sorted(files): file_name, ext = os.path.splitext(f) if ext.lower() != '.wav': if ext.lower() in ['.lab', '.textgrid']: wav_path = find_wav(f, files) if wav_path is None: self.transcriptions_without_wavs.append(os.path.join(root, f)) continue lab_name = find_lab(f, files) wav_path = os.path.join(root, f) try: sr = get_sample_rate(wav_path) except wave.Error: self.wav_read_errors.append(wav_path) continue if sr < 16000: self.unsupported_sample_rate.append(wav_path) if lab_name is not None: utt_name = file_name if utt_name in self.utt_wav_mapping: ind = 0 fixed_utt_name = utt_name while fixed_utt_name not in self.utt_wav_mapping: ind += 1 fixed_utt_name = utt_name + '_{}'.format(ind) utt_name = fixed_utt_name if self.feat_mapping and utt_name not in self.feat_mapping: self.ignored_utterances.append(utt_name) continue lab_path = os.path.join(root, lab_name) try: text = load_text(lab_path) except UnicodeDecodeError: self.decode_error_files.append(lab_path) continue words = parse_transcription(text) if not words: continue self.word_counts.update(words) self.text_mapping[utt_name] = ' '.join(words) if self.speaker_directories: speaker_name = os.path.basename(root) else: if isinstance(speaker_characters, int): speaker_name = f[:speaker_characters] elif speaker_characters == 'prosodylab': speaker_name = f.split('_')[1] speaker_name = speaker_name.strip().replace(' ', '_') utt_name = utt_name.strip().replace(' ', '_') self.speak_utt_mapping[speaker_name].append(utt_name) self.utt_wav_mapping[utt_name] = wav_path self.sample_rates[get_sample_rate(wav_path)].add(speaker_name) self.utt_speak_mapping[utt_name] = speaker_name self.file_directory_mapping[utt_name] = root.replace(self.directory, '').lstrip('/').lstrip('\\') self.lab_count += 1 else: tg_name = find_textgrid(f, files) if tg_name is None: self.no_transcription_files.append(wav_path) continue self.wav_files.append(file_name) self.wav_durations[file_name] = get_wav_duration(wav_path) tg_path = os.path.join(root, tg_name) tg = TextGrid() try: tg.read(tg_path) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() self.textgrid_read_errors[tg_path] = '\n'.join( traceback.format_exception(exc_type, exc_value, exc_traceback)) n_channels = get_n_channels(wav_path) num_tiers = len(tg.tiers) if n_channels == 2: A_name = file_name + "_A" B_name = file_name + "_B" A_path, B_path = extract_temp_channels(wav_path, self.temp_directory) elif n_channels > 2: raise (Exception('More than two channels')) self.speaker_ordering[file_name] = [] if not self.speaker_directories: if isinstance(speaker_characters, int): speaker_name = f[:speaker_characters] elif speaker_characters == 'prosodylab': speaker_name = f.split('_')[1] speaker_name = speaker_name.strip().replace(' ', '_') self.speaker_ordering[file_name].append(speaker_name) for i, ti in enumerate(tg.tiers): if ti.name.lower() == 'notes': continue if not isinstance(ti, IntervalTier): continue if self.speaker_directories: speaker_name = ti.name.strip().replace(' ', '_') self.speaker_ordering[file_name].append(speaker_name) self.sample_rates[get_sample_rate(wav_path)].add(speaker_name) for interval in ti: text = interval.mark.lower().strip() words = parse_transcription(text) if not words: continue begin, end = round(interval.minTime, 4), round(interval.maxTime, 4) utt_name = '{}_{}_{}_{}'.format(speaker_name, file_name, begin, end) utt_name = utt_name.strip().replace(' ', '_').replace('.', '_') if n_channels == 1: if self.feat_mapping and utt_name not in self.feat_mapping: self.ignored_utterances.append(utt_name) self.segments[utt_name] = '{} {} {}'.format(file_name, begin, end) self.utt_wav_mapping[file_name] = wav_path else: if i < num_tiers / 2: utt_name += '_A' if self.feat_mapping and utt_name not in self.feat_mapping: self.ignored_utterances.append(utt_name) self.segments[utt_name] = '{} {} {}'.format(A_name, begin, end) self.utt_wav_mapping[A_name] = A_path else: utt_name += '_B' if self.feat_mapping and utt_name not in self.feat_mapping: self.ignored_utterances.append(utt_name) self.segments[utt_name] = '{} {} {}'.format(B_name, begin, end) self.utt_wav_mapping[B_name] = B_path self.text_mapping[utt_name] = ' '.join(words) self.word_counts.update(words) self.utt_speak_mapping[utt_name] = speaker_name self.speak_utt_mapping[speaker_name].append(utt_name) if n_channels == 2: self.file_directory_mapping[A_name] = root.replace(self.directory, '').lstrip('/').lstrip('\\') self.file_directory_mapping[B_name] = root.replace(self.directory, '').lstrip('/').lstrip('\\') else: self.file_directory_mapping[file_name] = root.replace(self.directory, '').lstrip('/').lstrip('\\') self.tg_count += 1 self.issues_check = self.ignored_utterances or self.no_transcription_files or \ self.textgrid_read_errors or self.unsupported_sample_rate or self.decode_error_files bad_speakers = [] for speaker in self.speak_utt_mapping.keys(): count = 0 for k, v in self.sample_rates.items(): if speaker in v: count += 1 if count > 1: bad_speakers.append(speaker) if bad_speakers: msg = 'The following speakers had multiple speaking rates: {}. ' \ 'Please make sure that each speaker has a consistent sampling rate.'.format(', '.join(bad_speakers)) root_logger.error(msg) raise (SampleRateError(msg)) if len(self.speak_utt_mapping) < self.num_jobs: self.num_jobs = len(self.speak_utt_mapping) if self.num_jobs < len(self.sample_rates.keys()): self.num_jobs = len(self.sample_rates.keys()) msg = 'The number of jobs was set to {}, due to the different sample rates in the dataset. ' \ 'If you would like to use fewer parallel jobs, ' \ 'please resample all wav files to the same sample rate.'.format(self.num_jobs) print('WARNING: ' + msg) root_logger.warning(msg) self.find_best_groupings()
def norm(value, min, max): return (value - min)/(max - min) def unnorm(norm_value, min, max): return norm_value * (max - min) + min seg_ind = 0 for f in sorted(os.listdir(data_dir)): if not f.endswith('.TextGrid'): continue print(f) wav_file = f.replace('.TextGrid', '.adc.wav') textgrid_path = os.path.join(data_dir, f) wav_path = os.path.join(data_dir, wav_file) tg = TextGrid() tg.read(textgrid_path) word_tier = tg.getFirst('words') segmentation_tier = IntervalTier('segments', 0, word_tier.maxTime) durations = [] for interval in word_tier: if interval.mark == '': continue durations.append(interval.maxTime - interval.minTime) max_duration = max(durations) min_duration = min(durations) min_thresh = 0.01 max_thresh = 0.05