def trainvad(): """Driver to train a Voice Activation Detection (VAD) classifier. """ from nktrainutils import SGDSVM, splitTrainEval, evalSVM import numpy as np import librosa from cPickle import dump, load if len(sys.argv) < 3: print 'Usage: python %s <input movie names> <output classifier>' % ( sys.argv[0]) sys.exit() posfeats, negfeats = [], [] rate = 22050 LogAmp = librosa.util.FeatureExtractor(librosa.logamplitude, ref_power=np.max) for i, fname in enumerate(open(sys.argv[1])): fname = fname.rstrip('\n') subfname = fname.rsplit('.', 2)[0] + '.srt' try: # read subtitles and get training segments lines = [l.strip() for l in open(subfname) if l.strip()] dialogs = getdialogs(lines) pos, neg = getTrainingSegments(dialogs, 100, 600, seglen=0.5) print fname, len(dialogs), len(pos), len(neg) # read features feats = load(open(fname)) def featfunc(a, b): """concats feats from the given times""" a, b = int(a * 10), int(b * 10) ret = feats[:, a:b].transpose().flatten() return ret curpos = [featfunc(a, b) for a, b in pos] curneg = [featfunc(a, b) for a, b in neg] posfeats.extend( f for f in curpos if f is not None and len(f) > 0 and not np.isnan(f).any()) negfeats.extend( f for f in curneg if f is not None and len(f) > 0 and not np.isnan(f).any()) print len(posfeats), len(negfeats) if len(posfeats) >= 5000: break except IOError: continue # split into train and eval subsets and then train and run svm (trainpos, trainneg), (evalpos, evalneg) = splitTrainEval(posfeats, negfeats, -20) svm = SGDSVM() t1 = time.time() model, score = svm.train(trainpos + trainneg, [1] * len(trainpos) + [-1] * len(trainneg), ncv=0, n_iter=50) try: score = evalSVM(svm, model, evalpos + evalneg, [1] * len(evalpos) + [-1] * len(evalneg)) except Exception, e: print set(map(lambda x: x.shape, trainpos + trainneg)) raise
def trainvad(): """Driver to train a Voice Activation Detection (VAD) classifier. """ from nktrainutils import SGDSVM, splitTrainEval, evalSVM import numpy as np import librosa from cPickle import dump, load if len(sys.argv) < 3: print "Usage: python %s <input movie names> <output classifier>" % (sys.argv[0]) sys.exit() posfeats, negfeats = [], [] rate = 22050 LogAmp = librosa.util.FeatureExtractor(librosa.logamplitude, ref_power=np.max) for i, fname in enumerate(open(sys.argv[1])): fname = fname.rstrip("\n") subfname = fname.rsplit(".", 2)[0] + ".srt" try: # read subtitles and get training segments lines = [l.strip() for l in open(subfname) if l.strip()] dialogs = getdialogs(lines) pos, neg = getTrainingSegments(dialogs, 100, 600, seglen=0.5) print fname, len(dialogs), len(pos), len(neg) # read features feats = load(open(fname)) def featfunc(a, b): """concats feats from the given times""" a, b = int(a * 10), int(b * 10) ret = feats[:, a:b].transpose().flatten() return ret curpos = [featfunc(a, b) for a, b in pos] curneg = [featfunc(a, b) for a, b in neg] posfeats.extend(f for f in curpos if f is not None and len(f) > 0 and not np.isnan(f).any()) negfeats.extend(f for f in curneg if f is not None and len(f) > 0 and not np.isnan(f).any()) print len(posfeats), len(negfeats) if len(posfeats) >= 5000: break except IOError: continue # split into train and eval subsets and then train and run svm (trainpos, trainneg), (evalpos, evalneg) = splitTrainEval(posfeats, negfeats, -20) svm = SGDSVM() t1 = time.time() model, score = svm.train(trainpos + trainneg, [1] * len(trainpos) + [-1] * len(trainneg), ncv=0, n_iter=50) try: score = evalSVM(svm, model, evalpos + evalneg, [1] * len(evalpos) + [-1] * len(evalneg)) except Exception, e: print set(map(lambda x: x.shape, trainpos + trainneg)) raise
# testing main, uses SGDSVM from nktrainutils (which uses scikit-learn) if __name__ == '__main__': from nktrainutils import SGDSVM import numpy as np import numpy.random as npr from pprint import pprint svm = SGDSVM() size = (1000, 100) posm = 0.8 negm = -0.9 # train trainpos = npr.normal(posm, size=size) trainneg = npr.normal(negm, size=size) model, _ = svm.train(np.vstack((trainpos, trainneg)), [1] * len(trainpos) + [-1] * len(trainneg), ncv=0) # eval evalpos = npr.normal(posm, size=size) evalneg = npr.normal(negm, size=size) outs = svm.classify(model, np.vstack((evalpos, evalneg))) # fit params = easyfitsvm(outs) # norm outs = sorted(outs[::40]) nouts = easynormsvm(outs, params) oldsort = np.argsort(outs) newsort = np.argsort(nouts) pprint(zip(outs, nouts, oldsort, newsort))
return ret # testing main, uses SGDSVM from nktrainutils (which uses scikit-learn) if __name__ == "__main__": from nktrainutils import SGDSVM import numpy as np import numpy.random as npr from pprint import pprint svm = SGDSVM() size = (1000, 100) posm = 0.8 negm = -0.9 # train trainpos = npr.normal(posm, size=size) trainneg = npr.normal(negm, size=size) model, _ = svm.train(np.vstack((trainpos, trainneg)), [1] * len(trainpos) + [-1] * len(trainneg), ncv=0) # eval evalpos = npr.normal(posm, size=size) evalneg = npr.normal(negm, size=size) outs = svm.classify(model, np.vstack((evalpos, evalneg))) # fit params = easyfitsvm(outs) # norm outs = sorted(outs[::40]) nouts = easynormsvm(outs, params) oldsort = np.argsort(outs) newsort = np.argsort(nouts) pprint(zip(outs, nouts, oldsort, newsort))