Esempio n. 1
0
def trainvad():
    """Driver to train a Voice Activation Detection (VAD) classifier.
    """
    from nktrainutils import SGDSVM, splitTrainEval, evalSVM
    import numpy as np
    import librosa
    from cPickle import dump, load
    if len(sys.argv) < 3:
        print 'Usage: python %s <input movie names> <output classifier>' % (
            sys.argv[0])
        sys.exit()
    posfeats, negfeats = [], []
    rate = 22050
    LogAmp = librosa.util.FeatureExtractor(librosa.logamplitude,
                                           ref_power=np.max)
    for i, fname in enumerate(open(sys.argv[1])):
        fname = fname.rstrip('\n')
        subfname = fname.rsplit('.', 2)[0] + '.srt'
        try:
            # read subtitles and get training segments
            lines = [l.strip() for l in open(subfname) if l.strip()]
            dialogs = getdialogs(lines)
            pos, neg = getTrainingSegments(dialogs, 100, 600, seglen=0.5)
            print fname, len(dialogs), len(pos), len(neg)
            # read features
            feats = load(open(fname))

            def featfunc(a, b):
                """concats feats from the given times"""
                a, b = int(a * 10), int(b * 10)
                ret = feats[:, a:b].transpose().flatten()
                return ret

            curpos = [featfunc(a, b) for a, b in pos]
            curneg = [featfunc(a, b) for a, b in neg]
            posfeats.extend(
                f for f in curpos
                if f is not None and len(f) > 0 and not np.isnan(f).any())
            negfeats.extend(
                f for f in curneg
                if f is not None and len(f) > 0 and not np.isnan(f).any())
            print len(posfeats), len(negfeats)
            if len(posfeats) >= 5000: break
        except IOError:
            continue
    # split into train and eval subsets and then train and run svm
    (trainpos, trainneg), (evalpos,
                           evalneg) = splitTrainEval(posfeats, negfeats, -20)
    svm = SGDSVM()
    t1 = time.time()
    model, score = svm.train(trainpos + trainneg,
                             [1] * len(trainpos) + [-1] * len(trainneg),
                             ncv=0,
                             n_iter=50)
    try:
        score = evalSVM(svm, model, evalpos + evalneg,
                        [1] * len(evalpos) + [-1] * len(evalneg))
    except Exception, e:
        print set(map(lambda x: x.shape, trainpos + trainneg))
        raise
Esempio n. 2
0
def oldresyncmain():
    """Main driver for subtitle resyncing"""
    from nktrainutils import SGDSVM, splitTrainEval, evalSVM
    import librosa
    from cPickle import dump, load
    rate = 22050
    if len(sys.argv) < 1:
        print 'Usage: python %s <video or audio file> <subtitle file> <vad model>' % (
            sys.argv[0])
        sys.exit()
    fname, subfname, modelfname = sys.argv[1:4]
    model = SGDSVM.load(modelfname)
    allfeats = getmel(fname)

    def featfunc(a, b):
        """concats feats from the given times"""
        a, b = int(a * 10), int(b * 10)
        ret = allfeats[:, a:b].transpose().flatten()
        return ret

    seglen = 0.5
    subels = []
    A, B = 0, 300
    for start in range(A, B):
        feats = featfunc(start, start + seglen)
        cls = SGDSVM().classify(model, [feats])[0]
        subels.append(('Cls: %0.4f' % (cls), start, start + 1))
        print start, cls
    sys.stdout.flush()
    #print subels
    playpreview(fname.replace('.mel', ''), subels, A, B)
Esempio n. 3
0
def trainvad():
    """Driver to train a Voice Activation Detection (VAD) classifier.
    """
    from nktrainutils import SGDSVM, splitTrainEval, evalSVM
    import numpy as np
    import librosa
    from cPickle import dump, load

    if len(sys.argv) < 3:
        print "Usage: python %s <input movie names> <output classifier>" % (sys.argv[0])
        sys.exit()
    posfeats, negfeats = [], []
    rate = 22050
    LogAmp = librosa.util.FeatureExtractor(librosa.logamplitude, ref_power=np.max)
    for i, fname in enumerate(open(sys.argv[1])):
        fname = fname.rstrip("\n")
        subfname = fname.rsplit(".", 2)[0] + ".srt"
        try:
            # read subtitles and get training segments
            lines = [l.strip() for l in open(subfname) if l.strip()]
            dialogs = getdialogs(lines)
            pos, neg = getTrainingSegments(dialogs, 100, 600, seglen=0.5)
            print fname, len(dialogs), len(pos), len(neg)
            # read features
            feats = load(open(fname))

            def featfunc(a, b):
                """concats feats from the given times"""
                a, b = int(a * 10), int(b * 10)
                ret = feats[:, a:b].transpose().flatten()
                return ret

            curpos = [featfunc(a, b) for a, b in pos]
            curneg = [featfunc(a, b) for a, b in neg]
            posfeats.extend(f for f in curpos if f is not None and len(f) > 0 and not np.isnan(f).any())
            negfeats.extend(f for f in curneg if f is not None and len(f) > 0 and not np.isnan(f).any())
            print len(posfeats), len(negfeats)
            if len(posfeats) >= 5000:
                break
        except IOError:
            continue
    # split into train and eval subsets and then train and run svm
    (trainpos, trainneg), (evalpos, evalneg) = splitTrainEval(posfeats, negfeats, -20)
    svm = SGDSVM()
    t1 = time.time()
    model, score = svm.train(trainpos + trainneg, [1] * len(trainpos) + [-1] * len(trainneg), ncv=0, n_iter=50)
    try:
        score = evalSVM(svm, model, evalpos + evalneg, [1] * len(evalpos) + [-1] * len(evalneg))
    except Exception, e:
        print set(map(lambda x: x.shape, trainpos + trainneg))
        raise
Esempio n. 4
0
def oldresyncmain():
    """Main driver for subtitle resyncing"""
    from nktrainutils import SGDSVM, splitTrainEval, evalSVM
    import librosa
    from cPickle import dump, load

    rate = 22050
    if len(sys.argv) < 1:
        print "Usage: python %s <video or audio file> <subtitle file> <vad model>" % (sys.argv[0])
        sys.exit()
    fname, subfname, modelfname = sys.argv[1:4]
    model = SGDSVM.load(modelfname)
    allfeats = getmel(fname)

    def featfunc(a, b):
        """concats feats from the given times"""
        a, b = int(a * 10), int(b * 10)
        ret = allfeats[:, a:b].transpose().flatten()
        return ret

    seglen = 0.5
    subels = []
    A, B = 0, 300
    for start in range(A, B):
        feats = featfunc(start, start + seglen)
        cls = SGDSVM().classify(model, [feats])[0]
        subels.append(("Cls: %0.4f" % (cls), start, start + 1))
        print start, cls
    sys.stdout.flush()
    # print subels
    playpreview(fname.replace(".mel", ""), subels, A, B)
Esempio n. 5
0
            posi += 1
        elif i == -1:
            ret.append(-negvals[negi])
            negi += 1
        elif i == 0:
            ret.append(0.0)
    return ret


# testing main, uses SGDSVM from nktrainutils (which uses scikit-learn)
if __name__ == '__main__':
    from nktrainutils import SGDSVM
    import numpy as np
    import numpy.random as npr
    from pprint import pprint
    svm = SGDSVM()
    size = (1000, 100)
    posm = 0.8
    negm = -0.9
    # train
    trainpos = npr.normal(posm, size=size)
    trainneg = npr.normal(negm, size=size)
    model, _ = svm.train(np.vstack((trainpos, trainneg)),
                         [1] * len(trainpos) + [-1] * len(trainneg),
                         ncv=0)
    # eval
    evalpos = npr.normal(posm, size=size)
    evalneg = npr.normal(negm, size=size)
    outs = svm.classify(model, np.vstack((evalpos, evalneg)))
    # fit
    params = easyfitsvm(outs)
Esempio n. 6
0
        elif i == -1:
            ret.append(-negvals[negi])
            negi += 1
        elif i == 0:
            ret.append(0.0)
    return ret


# testing main, uses SGDSVM from nktrainutils (which uses scikit-learn)
if __name__ == "__main__":
    from nktrainutils import SGDSVM
    import numpy as np
    import numpy.random as npr
    from pprint import pprint

    svm = SGDSVM()
    size = (1000, 100)
    posm = 0.8
    negm = -0.9
    # train
    trainpos = npr.normal(posm, size=size)
    trainneg = npr.normal(negm, size=size)
    model, _ = svm.train(np.vstack((trainpos, trainneg)), [1] * len(trainpos) + [-1] * len(trainneg), ncv=0)
    # eval
    evalpos = npr.normal(posm, size=size)
    evalneg = npr.normal(negm, size=size)
    outs = svm.classify(model, np.vstack((evalpos, evalneg)))
    # fit
    params = easyfitsvm(outs)
    # norm
    outs = sorted(outs[::40])