def main():
    input_dir = sys.argv[1]
    num_samples_per_site = int(sys.argv[2])
    target_sites = get_target_sites(input_dir)

    labels = dict(map(lambda (i, s): (s, i), enumerate(target_sites)))

    X, Y = load_feature_vectors(input_dir, num_samples_per_site, labels)
    X, Y, testX, testY = select_test_set(X, Y, (num_samples_per_site / 2) * len(target_sites))

    # X, testX = scale(X, testX)

    X = [[0.0, 0.0], [0.01, 0.01], [1.0, 1.0], [1.1, 1.1], [2.0, 2.0], [2.1, 2.1], [2.05, 2.05]]#, [2.05, 2.05], [2.05, 2.07], [2.01, 2.01]]
    Y=[0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 1.0]#, 1.0, 2.0, 2.0]
    labels = {0: 0, 1: 1, 2: 2}

    thetas, bs, slacks = SVM_fit(X, Y, len(labels), 5.0)

    print "thetas"
    print thetas[0]
    print thetas[1]
    print thetas[2]
    print "bs"
    print bs
    print "slacks"
    print slacks


    print "Should be class 0:"
    print SVM_classify([0.05, 0.05], thetas, bs)
    print "Should be class 1:"
    print SVM_classify([1.05, 1.05], thetas, bs)
    print "Should be class 2:"
    print SVM_classify([1.98, 1.98], thetas, bs)
def run(input_dir, num_samples_per_site, onevone = True):
    input_dir = sys.argv[1]
    num_samples_per_site = int(sys.argv[2])

    target_sites = get_target_sites(input_dir)

    labels = dict(map(lambda (i, s): (s, i), enumerate(target_sites)))
    
    X, Y = load_feature_vectors(input_dir, num_samples_per_site, labels)
    #X, Y, testX, testY = select_test_set(X, Y, (num_samples_per_site / 2) * len(target_sites))


    #X = [[100.0, 100.0], [101.0, 101.0], [200.0, 200.0], [201.0, 201.0], [300.0, 301.0], [300.0, 301.0]]
    #Y = [0.0, 0.0, 1.0, 1.0, 2.0, 2.0]

    iters = select_cross_validation_subsets(X, Y, 6)

    #crange = map(lambda c: c/10.0, range(1, 11))
    #crange = map(lambda c: c/1000.0, range(1, 11))
    #crange.extend(map(lambda c: c/100.0, range(2, 11)))
    #crange = [0.00001, 0.01, 1.0, 100.0, 10000.0]
    crange = [1.0, 100.0]
    for c in crange:
        print "C=%f"%c
        print "Linear kernel:"
        for d in iters:
            trainX, testX = scale(d["train"][0], d["test"][0])
            classify(trainX, d["train"][1], testX, d["test"][1], onevone, c)
        print "RBF kernel:"
        for d in iters:
            trainX, testX = scale(d["train"][0], d["test"][0])
            classify(trainX, d["train"][1], testX, d["test"][1], onevone, c, 'rbf')
Beispiel #3
0
def main():
    cmd = sys.argv[3]
    input_dir = sys.argv[1]
    num_samples_per_site = int(sys.argv[2])
    target_sites = get_target_sites(input_dir)

    labels = dict(map(lambda (i, s): (s, i), enumerate(target_sites)))
    X, Y = load_feature_vectors(input_dir, num_samples_per_site, labels)
    X, Y, testX, testY = select_test_set(X, Y,
                           (num_samples_per_site / 2) * len(target_sites))
    Y = map(lambda v: v*1.0, Y)
    testY = map(lambda v: v*1.0, testY)

    pca = PCA(n_components = 50)

    print "Fitting X"
    pca.fit(X)

    print "Transforming X and testX"
    Xnew = pca.transform(X)
    testXnew = pca.transform(testX)

    del X
    del testX

    if cmd == "monb":
        multiclass_on_binary_svms(Xnew, Y, testXnew, testY)
    elif cmd == "multiclass":
        multiclass_svm(Xnew, testXnew, Y, testY, labels)
    elif cmd == "anomdet":
        anomaly_detection(labels, Xnew, Y, testXnew, testY)
def main():
    cmd = sys.argv[5]
    sinput_dir = sys.argv[1]
    minput_dir = sys.argv[2]

    nssamples = int(sys.argv[3])
    nmsamples = int(sys.argv[4])

    target_sites = get_target_sites(sinput_dir)
    labels = dict(map(lambda (i, s): (s, i), enumerate(target_sites)))

    Xs, Ys = load_feature_vectors(sinput_dir, nssamples, labels)
    Xm, Ym = load_feature_vectors(minput_dir, nmsamples, labels)

    if cmd == "multiclass-crossvalidate":
        d = 50
        nb = 15
        e = 100
        c = 0.1
        Xm = reduce(Xm, Xs, d, nb, e)
        multiclass_svm_crossvalidate(Xm, Ym, labels, c)
    elif cmd == "multiclass-test":
        d = 50
        nb = 15
        e = 100
        c = 0.1
        Xm = reduce(Xm, Xs, d, nb, e)
        multiclass_svm_test(Xm, Ym, labels, c, nmsamples*len(labels) / 2)
    elif cmd == "onevone-test":
        c = 1.0
        print "Original dimensionality"
        monb_test(Xm, Ym, c, True, nmsamples*len(labels) / 2)
        d = 50
        nb = 15
        e = 100
        print "Reduced dimensionality"
        Xm = reduce(Xm, Xs, d, nb, e)
        monb_test(Xm, Ym, c, True, nmsamples*len(labels) / 2)
    elif cmd == "onevone-crossvalidate":
        c = 1.0
        d = 110
        nb = 15
        e = 100
        Xm = reduce(Xm, Xs, d, nb, e)
        monb_crossvalidate(Xm, Ym, c, True)
    elif cmd == "onevall-crossvalidate":
        c = 1.0
        d = 50
        nb = 15
        e = 100
        Xm = reduce(Xm, Xs, d, nb, e)
        monb_crossvalidate(Xm, Ym, c, False)
    elif cmd == "onevall-test":
        c = 1.0
        print "Original dimensionality"
        monb_test(Xm, Ym, c, False, nmsamples*len(labels) / 2)
        d = 50
        nb = 15
        e = 100
        print "Reduced dimensionality"
        Xm = reduce(Xm, Xs, d, nb, e)
        monb_test(Xm, Ym, c, False, nmsamples*len(labels) / 2)
    elif cmd == "anomdet-test":
        v = .1
        d = 50
        nb = 15
        e = 100
        X = reduce(Xm, Xs, d, nb, e)
        anom_det_test(labels, X, Ym, v, nmsamples*len(labels) / 2)
    elif cmd == "anomdet-crossvalidate":
        v = 0.1
        d = 50
        nb = 15
        e = 100
        X = reduce(Xm, Xs, d, nb, e)
        anom_det_crossvalidate(labels, X, Ym, v)