def trainvad(): """Driver to train a Voice Activation Detection (VAD) classifier. """ from nktrainutils import SGDSVM, splitTrainEval, evalSVM import numpy as np import librosa from cPickle import dump, load if len(sys.argv) < 3: print 'Usage: python %s <input movie names> <output classifier>' % ( sys.argv[0]) sys.exit() posfeats, negfeats = [], [] rate = 22050 LogAmp = librosa.util.FeatureExtractor(librosa.logamplitude, ref_power=np.max) for i, fname in enumerate(open(sys.argv[1])): fname = fname.rstrip('\n') subfname = fname.rsplit('.', 2)[0] + '.srt' try: # read subtitles and get training segments lines = [l.strip() for l in open(subfname) if l.strip()] dialogs = getdialogs(lines) pos, neg = getTrainingSegments(dialogs, 100, 600, seglen=0.5) print fname, len(dialogs), len(pos), len(neg) # read features feats = load(open(fname)) def featfunc(a, b): """concats feats from the given times""" a, b = int(a * 10), int(b * 10) ret = feats[:, a:b].transpose().flatten() return ret curpos = [featfunc(a, b) for a, b in pos] curneg = [featfunc(a, b) for a, b in neg] posfeats.extend( f for f in curpos if f is not None and len(f) > 0 and not np.isnan(f).any()) negfeats.extend( f for f in curneg if f is not None and len(f) > 0 and not np.isnan(f).any()) print len(posfeats), len(negfeats) if len(posfeats) >= 5000: break except IOError: continue # split into train and eval subsets and then train and run svm (trainpos, trainneg), (evalpos, evalneg) = splitTrainEval(posfeats, negfeats, -20) svm = SGDSVM() t1 = time.time() model, score = svm.train(trainpos + trainneg, [1] * len(trainpos) + [-1] * len(trainneg), ncv=0, n_iter=50) try: score = evalSVM(svm, model, evalpos + evalneg, [1] * len(evalpos) + [-1] * len(evalneg)) except Exception, e: print set(map(lambda x: x.shape, trainpos + trainneg)) raise
def oldresyncmain(): """Main driver for subtitle resyncing""" from nktrainutils import SGDSVM, splitTrainEval, evalSVM import librosa from cPickle import dump, load rate = 22050 if len(sys.argv) < 1: print 'Usage: python %s <video or audio file> <subtitle file> <vad model>' % ( sys.argv[0]) sys.exit() fname, subfname, modelfname = sys.argv[1:4] model = SGDSVM.load(modelfname) allfeats = getmel(fname) def featfunc(a, b): """concats feats from the given times""" a, b = int(a * 10), int(b * 10) ret = allfeats[:, a:b].transpose().flatten() return ret seglen = 0.5 subels = [] A, B = 0, 300 for start in range(A, B): feats = featfunc(start, start + seglen) cls = SGDSVM().classify(model, [feats])[0] subels.append(('Cls: %0.4f' % (cls), start, start + 1)) print start, cls sys.stdout.flush() #print subels playpreview(fname.replace('.mel', ''), subels, A, B)
def trainvad(): """Driver to train a Voice Activation Detection (VAD) classifier. """ from nktrainutils import SGDSVM, splitTrainEval, evalSVM import numpy as np import librosa from cPickle import dump, load if len(sys.argv) < 3: print "Usage: python %s <input movie names> <output classifier>" % (sys.argv[0]) sys.exit() posfeats, negfeats = [], [] rate = 22050 LogAmp = librosa.util.FeatureExtractor(librosa.logamplitude, ref_power=np.max) for i, fname in enumerate(open(sys.argv[1])): fname = fname.rstrip("\n") subfname = fname.rsplit(".", 2)[0] + ".srt" try: # read subtitles and get training segments lines = [l.strip() for l in open(subfname) if l.strip()] dialogs = getdialogs(lines) pos, neg = getTrainingSegments(dialogs, 100, 600, seglen=0.5) print fname, len(dialogs), len(pos), len(neg) # read features feats = load(open(fname)) def featfunc(a, b): """concats feats from the given times""" a, b = int(a * 10), int(b * 10) ret = feats[:, a:b].transpose().flatten() return ret curpos = [featfunc(a, b) for a, b in pos] curneg = [featfunc(a, b) for a, b in neg] posfeats.extend(f for f in curpos if f is not None and len(f) > 0 and not np.isnan(f).any()) negfeats.extend(f for f in curneg if f is not None and len(f) > 0 and not np.isnan(f).any()) print len(posfeats), len(negfeats) if len(posfeats) >= 5000: break except IOError: continue # split into train and eval subsets and then train and run svm (trainpos, trainneg), (evalpos, evalneg) = splitTrainEval(posfeats, negfeats, -20) svm = SGDSVM() t1 = time.time() model, score = svm.train(trainpos + trainneg, [1] * len(trainpos) + [-1] * len(trainneg), ncv=0, n_iter=50) try: score = evalSVM(svm, model, evalpos + evalneg, [1] * len(evalpos) + [-1] * len(evalneg)) except Exception, e: print set(map(lambda x: x.shape, trainpos + trainneg)) raise
def oldresyncmain(): """Main driver for subtitle resyncing""" from nktrainutils import SGDSVM, splitTrainEval, evalSVM import librosa from cPickle import dump, load rate = 22050 if len(sys.argv) < 1: print "Usage: python %s <video or audio file> <subtitle file> <vad model>" % (sys.argv[0]) sys.exit() fname, subfname, modelfname = sys.argv[1:4] model = SGDSVM.load(modelfname) allfeats = getmel(fname) def featfunc(a, b): """concats feats from the given times""" a, b = int(a * 10), int(b * 10) ret = allfeats[:, a:b].transpose().flatten() return ret seglen = 0.5 subels = [] A, B = 0, 300 for start in range(A, B): feats = featfunc(start, start + seglen) cls = SGDSVM().classify(model, [feats])[0] subels.append(("Cls: %0.4f" % (cls), start, start + 1)) print start, cls sys.stdout.flush() # print subels playpreview(fname.replace(".mel", ""), subels, A, B)
posi += 1 elif i == -1: ret.append(-negvals[negi]) negi += 1 elif i == 0: ret.append(0.0) return ret # testing main, uses SGDSVM from nktrainutils (which uses scikit-learn) if __name__ == '__main__': from nktrainutils import SGDSVM import numpy as np import numpy.random as npr from pprint import pprint svm = SGDSVM() size = (1000, 100) posm = 0.8 negm = -0.9 # train trainpos = npr.normal(posm, size=size) trainneg = npr.normal(negm, size=size) model, _ = svm.train(np.vstack((trainpos, trainneg)), [1] * len(trainpos) + [-1] * len(trainneg), ncv=0) # eval evalpos = npr.normal(posm, size=size) evalneg = npr.normal(negm, size=size) outs = svm.classify(model, np.vstack((evalpos, evalneg))) # fit params = easyfitsvm(outs)
elif i == -1: ret.append(-negvals[negi]) negi += 1 elif i == 0: ret.append(0.0) return ret # testing main, uses SGDSVM from nktrainutils (which uses scikit-learn) if __name__ == "__main__": from nktrainutils import SGDSVM import numpy as np import numpy.random as npr from pprint import pprint svm = SGDSVM() size = (1000, 100) posm = 0.8 negm = -0.9 # train trainpos = npr.normal(posm, size=size) trainneg = npr.normal(negm, size=size) model, _ = svm.train(np.vstack((trainpos, trainneg)), [1] * len(trainpos) + [-1] * len(trainneg), ncv=0) # eval evalpos = npr.normal(posm, size=size) evalneg = npr.normal(negm, size=size) outs = svm.classify(model, np.vstack((evalpos, evalneg))) # fit params = easyfitsvm(outs) # norm outs = sorted(outs[::40])