def maus_annotations(tgfile, corpusid, itemid): """Read annotations from a MAUS generated TextGrid file and generate a collection of annotation objects""" collection = AnnotationCollection([], corpusid, itemid, SecondAnnotation) tiers = {'MAU': MAUS.phonetic, 'ORT': MAUS.orthographic, 'KAN': MAUS.canonical, } tg = TextGrid.load(tgfile) for i, tier in enumerate(tg): # generate annotations for this tier last = None for row in tier.simple_transcript: (start, end, label) = row if label == "": label = "#" ann = collection.add_annotation(tiers[tier.tier_name()], label, start, end) if last != None: last.set_next(ann) last = ann collection.link_children(tiers['ORT'], tiers['KAN']) collection.link_children(tiers['ORT'], tiers['MAU']) return collection
for subdir, dirs, files in os.walk(root): for file in unmatched: tmp = file[file.rfind('/')+1:] tmp = tmp[:-9]+'.flac' if tmp in files: print "yes" anFiles[file] = subdir+'/'+tmp ''' #splits wav files into acoustic feature files according to TextGrid content prog = 1 for key, value in anFiles.iteritems(): if len(value) > 0: print str(prog)+' /772' prog += 1 annot = TextGrid.load(key) origAudio = wave.open(value, 'r') frameRate = origAudio.getframerate() nChannels = origAudio.getnchannels() sampWidth = origAudio.getsampwidth() count = 0 #iterate over items/tiers #print key for i, tier in enumerate(annot): for (xmin, xmax, atype) in tier.simple_transcript: start = float(xmin) end = float(xmax)