Esempio n. 1
0
def main():
	argparser = argparse.ArgumentParser(description='Assign a score based on similarity of two .TextGrid files.')
	argparser.add_argument('-first', '--first', nargs=2, metavar=('PATH', 'TIER_NAME'), help='Path to the first .TextGrid file.')
	argparser.add_argument('-second', '--second', nargs=2, metavar=('PATH', 'TIER_NAME'), help='Path to the second .TextGrid file.')
	argparser.add_argument('-d', '--d', nargs=1, help='Path to dictionary file.')
	argparser.add_argument('-close', '--close', nargs='?', default=500, const=500, help='Define how close, close is in milliseconds. Used for equality test.')

	args = argparser.parse_args()

	if not op.isfile(args.first[0]):
		print ("First file '%s' does not exist." % args.first[0])
	if not op.isfile(args.second[0]):
		print ("Second file '%s' does not exist." % args.second[0])
	if not op.isfile(args.d[0]):
		print ("Dictionary file '%s' does not exist." % args.d[0])

	first = TextGrid.fromFile(args.first[0])
	second = TextGrid.fromFile(args.second[0])

	first_clean = createNew(first, args.first[1])
	second_clean = createNew(second, args.second[1])

	dictionary = getDictionary(args.d[0])
	try:
		closeness = int(args.close) / float(1000)
	except:
		print ("(-close or --close) argument should be integer.\nDefaulting to 500.")
		closeness = 0.5

#	result_skips = evaluate_skip_some(first_clean, second_clean, dictionary, closeness)
	result_smart = smart_evaluate(first_clean, second_clean, closeness)
	print ("Evaluation with skips:\n\tR=%.4f " % result_skips)	

	return 0
Esempio n. 2
0
 def fromFile(self, filename):
     if LIBS_INSTALLED:
         try:
             return TextGridFile.fromFile(filename)
         except (TextGridError, UnicodeDecodeError) as e:
             error(e)
             f = open(filename, 'rb')
             bytes = f.read()
             f.close()
             tmp = tempfile.NamedTemporaryFile()
             found = False
             for encoding in ['Windows-1251', 'Windows-1252', 'ISO-8859-1']:
                 try:
                     s = bytes.decode(encoding)
                     tmp.write(s.encode('utf-8'))
                     tmp.seek(0)
                     found = True
                     break
                 except Exception as e:
                     error(e)
             if not found:
                 raise
             else:
                 try:
                     ret = TextGridFile.fromFile(tmp.name)
                     tmp.close()
                     return ret
                 except TextGridError as e:
                     error(e)
                     return None
     else:
         error("can't load from file: textgrid lib not installed")
         return None
Esempio n. 3
0
def open_str_textgrid(textgrid_str: str) -> TextGrid:
    """Since the textgrid librairy only can open TextGrid from an actual file
    (and not a TextIOWrapper type of object), this function enables us to parse
    a TextGrid directly from a string, using a temporary file."""
    with NamedTemporaryFile(mode="w") as temptg:
        temptg.write(textgrid_str)
        temptg.flush()
        return TextGrid.fromFile(temptg.name)
def main():
	argparser = ArgumentParser(description="Remove pauses from a specific tier of .TextGrid files.")
	argparser.add_argument('-r', '--r', nargs=2, metavar=('PATH', 'TIER_NAME'), help='Path to .TextGrid file and name of tier.')
	argparser.add_argument('-t', '--t', nargs=1, metavar='target', help='Path to target file for cleaned textgrid.')
	argparser.add_argument('-v', '--v', dest='isVerbose', action='store_const', const=True, default=False, help='Make verbose.')
	args = argparser.parse_args()
	txtgrid = TextGrid.fromFile(args.r[0])
	clean_txtgrid = createNew(txtgrid, args.r[1], args.isVerbose)
	target = args.t[0]
	print (target)
	if (op.isfile(target)):
		clean_txtgrid.write(target)
	else:
		print ("Target file '%s' does not exist." % (target))
def compareAll(available, closeness=0.02):
	results = []
	names = []
	dictionary = getDictionary(dirParent + '/' + dirParams + '/dictionaryDKMapped1.dict')
	for quad in available:
		list1 = getWithExtension(quad[0], 'TextGrid')
		list2 = getWithExtension(quad[1], 'TextGrid')
		total = 0
		amount = 0
		for file in list1:
			if file in list2:
				first = TextGrid.fromFile(quad[0]+file)
				second = TextGrid.fromFile(quad[1]+file)
#				result = evaluate_skip_some(first, second, dictionary, closeness)
				result = smart_evaluate(first, second, closeness)
				results.append(result)
				names.append(file)
				print ("-----------\n%s\n%s\n%sResult=%.4f%s\n-----------" % (quad[0]+file, quad[1]+file, bcolors.OKGREEN, result, bcolors.ENDC))
				total = total + result
				amount = amount + 1
		print ("%s\nvs.\n%s" % (quad[0], quad[1]))
		print ("%sAverage result=%.4f%s" % (bcolors.OKBLUE, (total / float(amount)), bcolors.ENDC))
	return results, names
Esempio n. 6
0
    def process_files(self, inputs, outputs, verbose=False):
        input_textgrids = Path(inputs["textgrids_path"])
        output_durations_path = Path(outputs["durations_path"])

        for textgrid_file_path in input_textgrids.glob("*.TextGrid"):
            text_grid = TextGrid.fromFile(str(textgrid_file_path))

            phones_intervals = None

            for intervals in text_grid:
                if intervals.name == "phones":
                    phones_intervals = intervals
                    break

            assert intervals is not None, f"Phones intervals not found in textgrid file {str(textgrid_file_path)}"
Esempio n. 7
0
 def fromFile(self, filename):
     if LIBS_INSTALLED:
         try:
             return TextGridFile.fromFile(filename)
         except (TextGridError, UnicodeDecodeError) as e:
             f = open(filename, 'rb')
             contents = util.decode_bytes(f.read())
             f.close()
             if contents:
                 tmp = tempfile.NamedTemporaryFile()
                 tmp.write(contents.encode('utf-8'))
                 tmp.seek(0)
                 try:
                     return TextGridFile.fromFile(tmp.name)
                 except TextGridError as e:
                     error(e)
                     return None
             else:
                 error(
                     "can't load from file: unable to decode non-Unicode textgrid",
                     filename)
     else:
         error("can't load from file: textgrid lib not installed")
         return None
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser(description='.')
    parser.add_argument('index', type=str, help='')
    parser.add_argument('alignments', type=str, help='')
    parser.add_argument('output', type=str, help='')
    parser.add_argument('--max-padding', type=int, default=300, help='')
    parser.add_argument('--max-pause', type=int, default=300, help='')
    parser.add_argument('--rate', type=int, default=44100, help='')

    args = parser.parse_args()

    max_padding = args.max_padding / 1000
    max_pause = args.max_pause / 1000

    with open(args.index) as fi:
        index = csv.reader(fi, delimiter="\t")
        for line in index:
            try:
                rid = line[0]
                tg = TextGrid.fromFile(
                    os.path.join(args.alignments, rid + ".TextGrid"))
                first = tg[0][0]
                last = tg[0][-1]
                assert first.mark == "", "ERROR: Alignment does not start with a silence"
                assert last.mark == "", "ERROR: Alignment does not end with a silence"
                start = max(0, first.maxTime - max_padding)
                end = min(last.minTime + max_padding, last.maxTime)
                cuts = []
                for interval in tg[0][1:-1]:
                    time = (interval.maxTime - interval.minTime)
                    if interval.mark == "" and time > max_pause:
                        cuts.extend((
                            interval.minTime + max_pause / 2,
                            interval.maxTime - max_pause / 2,
                        ))
                cuts = [start] + cuts + [end]
                lengths = [cuts[0]
                           ] + [b - a for a, b in zip(cuts[:-1], cuts[1:])]
                print(rid, cuts)
                assert len(cuts) % 2 == 0, "ERROR: Odd number of cuts"
                c = " ".join(map(lambda s: f"{s:.2f}", lengths))
                output = os.path.join(args.output, rid + ".wav")
                sox_call = f"sox {line[1]} {output} rate {args.rate} trim {c}"
                print(sox_call)
                os.system(sox_call)
            except Exception as e:
                print("TextGrid not found probably", e)
Esempio n. 9
0
    def add_textgrid(self,
                     annotator: Annotator,
                     tg_path: Union[str, Path],
                     selected_tiers: Optional[List[str]] = None,
                     use_tier_as_annotation: bool = False):
        """
        Add a textgrid file's content to the Continuum

        Parameters
        ----------
        annotator: str
            A string id for the annotator who produced that TextGrid.
        tg_path: `Path` or str
            Path to the textgrid file.
        selected_tiers: optional list of str
            If set, will drop tiers that are not contained in this list.
        use_tier_as_annotation: optional bool
            If True, the annotation for each non-empty interval will be the name
            of its parent Tier.
        """
        from textgrid import TextGrid, IntervalTier
        tg = TextGrid.fromFile(str(tg_path))
        for tier_name in tg.getNames():
            if selected_tiers is not None and tier_name not in selected_tiers:
                continue
            tier: IntervalTier = tg.getFirst(tier_name)
            for interval in tier:
                if not interval.mark:
                    continue

                if use_tier_as_annotation:
                    self.add(annotator,
                             Segment(interval.minTime, interval.maxTime),
                             tier_name)
                else:
                    self.add(annotator,
                             Segment(interval.minTime, interval.maxTime),
                             interval.mark)
Esempio n. 10
0
    def genFramesTier(self):
        debug('generating frames tier for %s' %
              self.app.Data.getCurrentFilename())
        self.frameTierName = 'frames'
        times = self.app.Dicom.getFrameTimes()
        self.app.Data.setFileLevel("NumberOfFrames", len(times))
        try:
            maxTime = max(self.app.Audio.duration, times[-1])
        except AttributeError:
            maxTime = times[-1]
        tier = PointTier('frames', maxTime=maxTime)
        for f, t in enumerate(times):
            tier.addPoint(Point(t, str(f)))
        if not self.TextGrid.maxTime or maxTime > self.TextGrid.maxTime:
            self.TextGrid.maxTime = maxTime
        self.TextGrid.append(tier)

        keys = self.app.Data.getFileLevel('all')
        if '.ult' in keys and '.txt' in keys:
            fname = self.app.Data.unrelativize(
                self.app.Data.getFileLevel('.txt'))
            f = open(fname, 'rb')
            s = util.decode_bytes(f.read())
            f.close()
            if s:
                line = s.splitlines()[0]
                sentenceTier = IntervalTier("sentence")
                sentenceTier.add(0, self.app.Audio.duration, line)
                self.TextGrid.append(sentenceTier)
                self.TextGrid.tiers = [self.TextGrid.tiers[-1]
                                       ] + self.TextGrid.tiers[:-1]

        path = self.app.Data.unrelativize(
            self.app.Data.getFileLevel('.TextGrid'))
        self.TextGrid.write(path)
        self.TextGrid = TextGridFile.fromFile(path)
Esempio n. 11
0
from argparse import ArgumentParser
from pathlib import Path

import numpy as np
from scipy.io import wavfile
from textgrid import TextGrid
from tqdm.auto import tqdm

from .config import FLAGS

parser = ArgumentParser()

parser.add_argument('-o', '--output-dir', type=Path, required=True)
args = parser.parse_args()

files = sorted(FLAGS.data_dir.glob('*.TextGrid'))
for fn in tqdm(files):
    tg = TextGrid.fromFile(str(fn.resolve()))
    wav_fn = FLAGS.data_dir / f'{fn.stem}.wav'
    sr, y = wavfile.read(wav_fn)
    y = np.copy(y)
    for phone in tg[1]:
        if phone.mark in FLAGS.special_phonemes:
            l = int(phone.minTime * sr)
            r = int(phone.maxTime * sr)
            y[l:r] = 0
    out_file = args.output_dir / f'{fn.stem}.wav'
    wavfile.write(out_file, sr, y)
Esempio n. 12
0
def load_textgrid(path):
    tg = TextGrid.fromFile(path)
    return tg
Esempio n. 13
0
with tempfile.NamedTemporaryFile() as g2pfh:
    d_gen = PhonetisaurusDictionaryGenerator(g2p_model, WORDS, g2pfh.name)
    d_gen.generate()

    dictionary = Dictionary(g2pfh.name, dict_dir)

acoustic_model.validate(dictionary)

aligner = PretrainedAligner(corpus,
                            dictionary,
                            acoustic_model,
                            outdir,
                            temp_directory=corpse_dir_tmp)

check = aligner.test_utterance_transcriptions()

aligner.do_align()
aligner.export_textgrids()

grid = TextGrid.fromFile(os.path.join(outdir, 'in', '1.TextGrid'))

# Create a simple JSON output
words = [{
    "word": X.mark,
    "start": float(X.minTime),
    "end": float(X.maxTime)
} for X in grid.tiers[0]]

json.dump({"words": words}, open(JSON_OUT, 'w'))
Esempio n. 14
0
import wave
import sys,os
from textgrid import TextGrid

if len(sys.argv)<3:
    sys.exit()

grid=os.path.abspath(sys.argv[1])
grid=TextGrid.fromFile(grid)
wavPath=os.path.abspath(sys.argv[2])

print grid
grid=grid[len(grid)-1]
counter=0
print grid
print "files loaded"
for interval in grid: 
    intervalStr=str(interval)
    start,end,mark=intervalStr[intervalStr.find("(")+1:intervalStr.find(")")].split(',')
    print start,end,mark
    start=float(start)
    end=float(end)
    mark=mark.split()[0]
    if mark!='sil' and mark!='sp':
        print "extracting :%s" %mark
        win= wave.open(wavPath, 'rb')
        t0,t1=start,end
        #t0, t1= 1.0, 2.0 # cut audio between one and two seconds
        s0, s1= int(t0*win.getframerate()), int(t1*win.getframerate())

        win.readframes(s0) # discard
Esempio n. 15
0
from textgrid import TextGrid
from midiutil import MIDIFile
file = MIDIFile(deinterleave=False)
file.addTempo(0, 0, 60)
x = TextGrid.fromFile("test.TextGrid")
for z in x:
    if z.name == "phone":
        for l in z:
            file.addNote(0, 0, 69, l.minTime, l.duration(), 127)

file.writeFile(open("myfile.mid", "wb"))
def doCleanUp(sourceDirectory, fileName, tierName, targetDirectory):
	txtgrid = TextGrid.fromFile(sourceDirectory+fileName)
	cleanTxtgrid = createNew(txtgrid, tierName)	
	cleanTxtgrid.write(targetDirectory+fileName)
Esempio n. 17
0
 close_enough = CLOSE_ENOUGH / 1000
 argparser = ArgumentParser(description="Alignment quality evaluation")
 argparser.add_argument("-f",
                        "--fudge",
                        type=int,
                        help="Fudge factor in milliseconds")
 argparser.add_argument("-t", "--tier", help="Name of tier to use")
 argparser.add_argument("OneGrid")
 argparser.add_argument("TwoGrid")
 args = argparser.parse_args()
 if args.fudge:
     close_enough = args.fudge / 1000
 if args.tier:
     tier_name = args.tier
 # read in
 first = boundaries(TextGrid.fromFile(args.OneGrid), tier_name)
 secnd = boundaries(TextGrid.fromFile(args.TwoGrid), tier_name)
 # count concordant and discordant boundaries
 if len(first) != len(secnd):
     exit("Tiers lengths do not match.")
 concordant = 0
 discordant = 0
 for (boundary1, boundary2) in zip(first, secnd):
     if boundary1.transition != boundary2.transition:
         exit("Tier labels do not match.")
     if is_close_enough(boundary1.time, boundary2.time, close_enough):
         concordant += 1
     else:
         discordant += 1
 # print out
 agreement = concordant / (concordant + discordant)
Esempio n. 18
0
 tier_name = TIER_NAME
 close_enough = CLOSE_ENOUGH / 1000
 argparser = ArgumentParser(description="Alignment quality evaluation")
 argparser.add_argument("-f", "--fudge", type=int,
                        help="Fudge factor in milliseconds")
 argparser.add_argument("-t", "--tier",
                        help="Name of tier to use")
 argparser.add_argument("OneGrid")
 argparser.add_argument("TwoGrid")
 args = argparser.parse_args()
 if args.fudge:
     close_enough = args.fudge / 1000
 if args.tier:
     tier_name = args.tier
 # read in
 first = boundaries(TextGrid.fromFile(args.OneGrid), tier_name)
 secnd = boundaries(TextGrid.fromFile(args.TwoGrid), tier_name)
 # count concordant and discordant boundaries
 if len(first) != len(secnd):
     exit("Tiers lengths do not match.")
 concordant = 0
 discordant = 0
 for (boundary1, boundary2) in zip(first, secnd):
     if boundary1.transition != boundary2.transition:
         exit("Tier labels do not match.")
     if is_close_enough(boundary1.time, boundary2.time, close_enough):
         concordant += 1
     else:
         discordant += 1
 # print out
 agreement = concordant / (concordant + discordant)