Esempio n. 1
0
def mainTest(X_train, X_test, y_train, y_test, k):
    print("--Test 1--")

    M = 3

    # PCA Work
    print("\nTraining data:")
    comp_1 = pca.pca(X_train, M)
    X_train_t = pca.transform(X_train, comp_1)

    print("\nTesting data:")
    comp_2 = pca.pca(X_test, M)
    X_test_t = pca.transform(X_test, comp_2)

    # Print base results.
    print("\nBefore PCA - Dim ", len(X_train[0]))

    classifier = svm.train(X_train, y_train, k, C=None)
    info = svm.classify(classifier, X_test, return_sums=True)

    printResults(info[1], y_test, info[0])

    # Print transformed results.
    print("After PCA - Dim ", M)
    X_train = X_train_t
    X_test = X_test_t

    classifier = svm.train(X_train, y_train, k, C=None)
    info = svm.classify(classifier, X_test, return_sums=True)

    printResults(info[1], y_test, info[0])
Esempio n. 2
0
def train_model(images, mask_list, k_size, probability):
    """
    Train model with list of images
    """
    logging.info('Calculating, normalizing feature vectors for %d image(s)',
                 len(images))
    vectors_list = [
        calculate_features(x.image, x.fov_mask, mask_list, k_size)
        for x in images
    ]
    truth_list = [x.truth for x in images]
    logging.info('Training model with %d image(s)', len(images))
    svm.train(vectors_list, truth_list,
              probability)  # Train SVM, lengthy process
Esempio n. 3
0
def main():
    m = 350
    random.seed(2)
    X = np.empty([m, 2])
    X[:, 0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)
    X[:, 1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)

    #not separable
    y = np.empty([m, 1])
    for i in range(X.shape[0]):
        y[i] = func2(X[i, :])

    #plot data and decision surface
    ax = pu.plot_data(X, y)
    pu.plot_surface(X, y, X[:, 0], X[:, 1], disc_func=func, ax=ax)
    plt.show()

    #train svm
    #change c to hard/soft margins
    w, w0, support_vectors_idx = svm.train(X, y, c=99999, eps=0.1)

    #plot result
    predicted_labels = svm.classify_all(X, w, w0)
    print("Accuracy: {}".format(svm.getAccuracy(y, predicted_labels)))

    ax = pu.plot_data(X, y, support_vectors_idx)
    pu.plot_surfaceSVM(X[:, 0], X[:, 1], w, w0, ax=ax)
    plt.show()
Esempio n. 4
0
def main():
    m=100
    X = np.empty([m,2])
    X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)
    X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)

    # preprocessing.scale(X)

    #linearly separable
    y = np.empty([m,1])
    for i in range(m):
        y[i] = func(X[i,])

    #plot data and decision surface
    ax = pu.plot_data(X,y)
    pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax)
    plt.show()

    #train svm

    w,w0, support_vectors_idx = svm.train(X,y,c=999999999999999, eps=10, type='gaussian')
    # w, w0, support_vectors_idx = svm.train(X, y, c=999999999999999, eps=10, type='polynomial')
    #plot result
    predicted_labels = svm.classify_all(X,w,w0)
    print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels)))


    ax = pu.plot_data(X,y, support_vectors_idx)
    pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax)
    plt.show()
Esempio n. 5
0
def main():

    dic = {'a':0, 'b':1, '?':-1}
    data = np.genfromtxt('Data/credits.data', skip_header=True, delimiter=',',
                         usecols=[0,1,2,7,10,15],converters={0: lambda s: dic[s]})

    use = [k for k in range(len(data)) if data[k][0] != -1 and (not math.isnan(data[k][1]))]
    data = data[use]

    X = np.empty([len(data),5])
    y = np.empty([len(data), 1])
    for i in range(len(data)):

        for j in range(len(data[i])-1):
            X[i,j] = data[i][j]
        y[i] = data[i][5]



    # preprocessing.scale(X[:,1])
    #train svm
    w,w0, support_vectors_idx = svm.train(X[:,range(1,5)],y,c=10, eps=1)

    #plot result
    predicted_labels = svm.classify_all(X[:,range(1,5)],w,w0)
    print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels)))
Esempio n. 6
0
def save_classifier():
    """
    Is used for saving classifier as pickle.
    """
    clf = train(x_train_points, train_data.labels)
    with open('./data/classifier.pkl', 'wb') as f:
        pickle.dump(clf, f)
def train(data, attributes, labels, k):

    trees = []
    for i in range(k):
        new_data = data
        sample = data.raw_data[np.random.choice(data.raw_data.shape[0],
                                                100,
                                                replace=True)]
        new_data.raw_data = sample
        numbers = np.random.randint(1, data.raw_data.shape[1] - 1, size=50)
        features = copy.deepcopy(attributes)
        for attr in attributes:
            if int(attr) not in numbers:
                del features[attr]
        new_data.attributes = features
        tree = dt.id3(new_data, features, labels)
        pruned = dt.pruning_tree(tree, 1)
        trees.append(pruned)
        err, depth = dt.report_error(new_data, pruned)

    transformed_data = np.zeros((data.raw_data.shape[0], k + 1))
    labels = []

    for row, test in enumerate(data.raw_data):
        transformed_data[row, 0] = test[0]
    for col, tree in enumerate(trees, 1):
        label = dt.predict(data, test, tree)
        transformed_data[row, col] = int(label)
        labels.append(int(label))

    labels.append(1)
    lbls = transformed_data[:, 0]
    w, a, lab = svm.train(transformed_data, lbls, k)
    return a, lab
Esempio n. 8
0
def main():
    data = pandas.read_csv("Data/car.data", sep=",", header=0, index_col=False)
    data = pandas.get_dummies(data)
    arr = data.as_matrix()
    use = [k for k in range(arr.shape[0]) if (arr[k, 0] == -1 or arr[k, 0] == 1)]
    arr = arr[use]
    X = arr[:, range(1, 22)]
    y = arr[:, 0]

    # normalize
    # X = preprocessing.scale(X)
    # shuffle
    p = np.random.permutation(len(X))

    X = X[p]
    y = y[p]

    # train svm
    w, w0, support_vectors_idx = svm.train(X, y, c=99, eps=0.00001)

    # get accuracy
    predicted_labels = svm.classify_all(X, w, w0)
    print("Accuracy: {}".format(svm.getAccuracy(y, predicted_labels)))
    #
    # evaluate performance
    kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=99, eps=0.00001)
    print(kfold)

    # evaluate performance with gaussina kernel function
    kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=99, eps=0.00001, type="gaussian")
    print(kfold)

    # evaluate performance with polynomial kernel function
    kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=99, eps=0.00001, type="polynomial")
    print(kfold)
Esempio n. 9
0
def train(train_x, train_y):
    ''' this is a function to train all classifiers '''
    tree_start = time.time()
    tree_clf = tree.train(train_x, train_y)
    print('Decision Tree - Training Time: ', round(time.time() - tree_start,
                                                   3), 's')

    svm_start = time.time()
    svm_clf = svm.train(train_x, train_y)
    print('SVM - Training Time: ', round(time.time() - svm_start, 3), 's')

    knn_start = time.time()
    knn_clf = knn.train(train_x, train_y)
    print('k-NN - Training Time: ', round(time.time() - knn_start, 3), 's')

    nn_start = time.time()
    nn_clf = nn.train(train_x, train_y)
    print('Neural Network - Training Time: ', round(time.time() - nn_start, 3),
          's')

    boost_start = time.time()
    boost_clf = boost.train(train_x, train_y)
    print('Boosted Tree - Training Time: ', round(time.time() - boost_start,
                                                  3), 's')

    return [tree_clf, svm_clf, knn_clf, nn_clf, boost_clf]
def main():
    m=350
    random.seed(2)
    X = np.empty([m,2])
    X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)
    X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)

    #not separable
    y = np.empty([m,1])
    for i in range(X.shape[0]):
        y[i] = func2(X[i,:])


    #plot data and decision surface
    ax = pu.plot_data(X,y)
    pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax)
    plt.show()

    #train svm
    #change c to hard/soft margins
    w,w0, support_vectors_idx = svm.train(X,y,c=99999,eps=0.1)

    #plot result
    predicted_labels = svm.classify_all(X,w,w0)
    print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels)))


    ax = pu.plot_data(X,y, support_vectors_idx)
    pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax)
    plt.show()
Esempio n. 11
0
def train(folds_x, folds_y):
    ''' this is a function to train all classifiers '''
    tree_clf = tree.train(folds_x, folds_y)
    svm_clf = svm.train(folds_x, folds_y)
    knn_clf = knn.train(folds_x, folds_y)
    nn_clf = nn.train(folds_x, folds_y)
    boost_clf = boost.train(folds_x, folds_y)

    return [tree_clf, svm_clf, knn_clf, nn_clf, boost_clf]  #
Esempio n. 12
0
def main():
    print 'program start:', datetime.datetime.now()
    #Define our connection string
    conn_string = "host='52.74.79.13' dbname='sammy' user='******' password='******'"

    # print the connection string we will use to connect
    print "Connecting to database\n	->%s" % (conn_string)

    # get a connection, if a connect cannot be made an exception will be
    # raised here
    conn = psycopg2.connect(conn_string)

    # conn.cursor will return a cursor object, you can use this cursor to
    # perform queries
    cursor = conn.cursor()
    print "Connected!\n"

    cursor.execute(
        "select vi.vid, tf.*, vi.duration, tl.grade from train_features tf inner join \
	video_info vi on tf.video_id = vi.video_id \
	inner join train_label tl on tl.user_id = tf.user_id \
	 order by user_id, event_time;")

    # where tf.user_id in ('ff930d24cbdeb11e6dde8ceb0da5ac64', 'eee1df0fff33a37873990992bed20e82') \
    records = cursor.fetchall()
    print('fetch train data done, ', datetime.datetime.now())
    svm_trainset = createFeatures(records, True)

    cursor.execute(
        "select vi.vid, tf.*, vi.duration from test_features tf inner join \
	video_info vi on tf.video_id = vi.video_id \
	 order by user_id, event_time;")
    # where tf.user_id in ('a74fe6d4812fa93a1afa1a6a334ebdda', '4ab9d6eadf7510198f468d10fc29f689', '55654c092cd47b64ec9860f6a9cf3b40') \
    records = cursor.fetchall()
    print('fetch test data done, ', datetime.datetime.now())
    svm_testset = createFeatures(records, False)

    svm.train(svm_trainset['featureList'], svm_trainset['labelList'])
    svm.classify(svm_testset['featureList'], svm_testset['userList'])

    print('program finish', datetime.datetime.now())
Esempio n. 13
0
def main():
    data = load_data()
    train, test = split(data)
    best_c = svm.optimize_regularization(data)
    print()
    print("Best C: %.5f" % best_c)

    theta = svm.train(train, c=best_c)
    result = svm.testing(test, theta)

    print("Total error: %.5f" % result['er'])
    print("Precision: %.5f" % result['pre'])
    print("Recall: %.5f" % result['rec'])
    print("F1-metric: %.5f" % result['f1'])
Esempio n. 14
0
def main():
    data = load_data()
    train, test = split(data)
    best_c = svm.optimize_regularization(data)
    print()
    print("Best C: %.5f" % best_c)

    theta = svm.train(train, c=best_c)
    result = svm.testing(test, theta)

    print("Total error: %.5f" % result['er'])
    print("Precision: %.5f" % result['pre'])
    print("Recall: %.5f" % result['rec'])
    print("F1-metric: %.5f" % result['f1'])
Esempio n. 15
0
def main():
    # Get training and testing data
    data = sp.io.loadmat('../data/mnist2.mat')
    # training
    n, p = np.shape(data['xtrain'])

    w0 = np.zeros(p)
    T = 200
    l = 0.1

    for t in range(1, T + 1):
        w = svm.train(w0, data['xtrain'], data['ytrain'], t * n, l)
        print('Train Accuracy: {0}, Test Accuracy: {1}'.format(
            accuracy(data['xtrain'], data['ytrain'], w),
            accuracy(data['xtest'], data['ytest'], w)))
def main():
	parser = argparse.ArgumentParser(description='Run SVM and Perceptron algorithms of Adult Data Set.')
	parser.add_argument('Training_filename', help='Training file')
	parser.add_argument('Test_filepath', help = 'Test File')
	args = parser.parse_args()
	dev = args.Training_filename
	test = args.Test_filepath

	print "Loading the data files...\n"
	X,Y = matrixbuild(args.Training_filename)
	DX,DY = matrixbuild(dev)
	TX, TY = matrixbuild(test)

	print "Training for the perceptron.\n"
	perc_weights = perceptron1.gradienttrain(X,Y,100)

	print "\n\nChecking accuracy on the test set.\n"
	perc_accuracy = perceptron1.classify(TX, TY, perc_weights)
	
	print ("The accuracy of the perceptron on the test set was %s%%\n" % perc_accuracy)
	

	#-------------Run the SVM algorithm------------#

	# find_c(dev_matrix,dev_classes, runs_each, learn)
	print "Finding best c from the dev set...\n"
	c, c_accuracy, c_list = svm.find_c(DX, DY, 20, 0.5)

	# train(data_matrix, real_classes, runs, learn, cost)

	print ("\n\nTraining for the SVM with C = %f\n" % c)
	acc, svm_weights, b = svm.train(X, Y, 100, 0.5, c, "train")
	
	# classify(test_matrix, test_class, weights, b)

	print "\n\nChecking accuracy on the test set.\n"
	svm_accuracy = svm.classify(TX,TY,svm_weights, b)

	print ("The accuracy of the SVM on the test set was %s%%" % svm_accuracy)

	plt.plot(c_list, c_accuracy)
	plt.xlabel("Cost value")
	plt.ylabel("Accuracy")
	plt.title("C vs. Accuracy")
	plt.show()
Esempio n. 17
0
def main():
    data = pandas.read_csv('Data/credits.data', sep=',', header=0, index_col=False)
    data = pandas.get_dummies(data)
    arr = data.as_matrix()
    X = arr[:,range(0,6) + range(7,47)]
    y = arr[:,6]

    # shuffle
    p = np.random.permutation(len(X))
    X = X[p]
    y = y[p]

    #train svm
    # w,w0, support_vectors_idx = svm.train(X[:,[0,1,2,3,4,5,6,7]],y,c=999, eps=0.000001)
    w, w0, support_vectors_idx = svm.train(X, y, c=99999, eps=0.000000001)
    #plot result
    predicted_labels = svm.classify_all(X,w,w0)
    print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels)))

    kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=99, eps=0.00001)
    print (kfold)
def main():
    m=150
    random.seed(2)
    X = np.empty([m,2])
    X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)
    X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)

    preprocessing.scale(X)



    #linearly separable
    y = np.empty([m,1])
    for i in range(m):
        y[i] = func(X[i,])

    # shuffle
    p = np.random.permutation(len(X))
    X = X[p]
    y = y[p]

    #plot data and decision surface
    ax = pu.plot_data(X,y)
    pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax)
    plt.show()

    #train svm
    w,w0, support_vectors_idx = svm.train(X,y,c=9999, eps=0.000001)

    #plot result
    predicted_labels = svm.classify_all(X,w,w0)
    print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels)))

    kfold = svm.kfoldCrossValidation(X,y,10,1,c=999999999,eps=0.000001)
    print (kfold)

    ax = pu.plot_data(X,y, support_vectors_idx)
    pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax)
    plt.show()
Esempio n. 19
0
def test(thrsh, k, i, da):

    print(i, end=" ")

    # Train and predict values.
    classifier = svm.train(da[0], da[2], k, threshold=thrsh)
    info = svm.classify(classifier, da[1], return_sums=True)

    y_pred = info[0]
    sums = info[1]

    # Print percentage success
    percent = 1 - np.mean(y_pred != da[3].T)
    if (percent > .5):
        print(colored("{:.2f}\t".format(percent), 'green'), end=" ")
    elif (percent > .01):
        print(colored("{:.2f}\t".format(percent), 'blue'), end=" ")
    else:
        print(colored("{:.2f}\t".format(percent), 'red'), end=" ")
    if i % 4 == 0:
        print()

    return percent, y_pred, sums
def main():
    m = 150
    random.seed(2)
    X = np.empty([m, 2])
    X[:, 0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)
    X[:, 1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)

    preprocessing.scale(X)

    #linearly separable
    y = np.empty([m, 1])
    for i in range(m):
        y[i] = func(X[i, ])

    # shuffle
    p = np.random.permutation(len(X))
    X = X[p]
    y = y[p]

    #plot data and decision surface
    ax = pu.plot_data(X, y)
    pu.plot_surface(X, y, X[:, 0], X[:, 1], disc_func=func, ax=ax)
    plt.show()

    #train svm
    w, w0, support_vectors_idx = svm.train(X, y, c=9999, eps=0.000001)

    #plot result
    predicted_labels = svm.classify_all(X, w, w0)
    print("Accuracy: {}".format(svm.getAccuracy(y, predicted_labels)))

    kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=999999999, eps=0.000001)
    print(kfold)

    ax = pu.plot_data(X, y, support_vectors_idx)
    pu.plot_surfaceSVM(X[:, 0], X[:, 1], w, w0, ax=ax)
    plt.show()
Esempio n. 21
0
def train(model,case_type,number=1):
    average_accuracy=0
    test_accuracy=list()
    if model=='svm':
        import svm
        x_train,y_train,x_test,y_test=svm.loadText(case_type)
        for _ in range(int(number)):
            test_accuracy.append(svm.train(x_train,y_train,x_test,y_test)) 
            average_accuracy=average_accuracy+test_accuracy[_]
        average_accuracy=average_accuracy/int(number)
        print("aveage accuract:" +str(average_accuracy))
    elif model=='cnn':
        import cnn
        train_data, test_data, train_label, test_label, vocab = cnn.get_data(case_type,mode='sequence')
        for _ in range(int(number)):
            test_accuracy.append(cnn.train_model(case_type,train_data, test_data, train_label, test_label, vocab))
            average_accuracy=average_accuracy+test_accuracy[_]
    elif model=='lstm':
        import lstm
        train_data, test_data, train_label, test_label, vocab = lstm.get_data(case_type,mode='sequence')
        for _ in range(int(number)):
            test_accuracy.append(lstm.train_model(case_type,train_data, test_data, train_label, test_label, vocab))
            average_accuracy=average_accuracy+test_accuracy[_]
    elif model=='keras_text_cnn':
        import keras_text_cnn as text_cnn
        train_data, test_data, train_label, test_label, vocab = text_cnn.get_data(case_type,mode='sequence')
        for _ in range(int(number)):
            test_accuracy.append(text_cnn.train_model(case_type,train_data, test_data, train_label, test_label, vocab))
            average_accuracy=average_accuracy+test_accuracy[_]
    average_accuracy=average_accuracy/int(number)
    print("aveage accuract:" +str(average_accuracy))
    with open(file='D:/judgement_prediction/judgement_prediction/'+case_type+'/information.txt', mode="a",encoding='utf-8') as target_file:
        target_file.write(case_type)
        for i in range(int(number)):
            target_file.write(str(test_accuracy[i])+' ')
        target_file.write(',average:'+str(average_accuracy)+'\n')
Esempio n. 22
0
import numpy as np
import svm
import kernel as k

# Test AND gate
clsfyr = svm.train([[1, 1], [1, -1], [-1, 1], [-1, -1]], [1, -1, -1, -1],
                   k.linear)
# should be [0,0,0,1,1,0,0,0]
print("classified: " + str(
    svm.classify(clsfyr, [[-1, -1], [1, -1], [-1, 1], [1, 1], [1, 1], [1, -1],
                          [-1, -1], [-1, 1]])))
print("\n\n\n\n\n")

X = np.array([[1.0, 0.0], [2.0, 0.0], [3.0, 0.0], [-1.0, 0.0], [-2.0, 0.0],
              [-3.0, 0.0]])
y = np.array([[1.0], [1.0], [1.0], [-1.0], [-1.0], [-1.0]])
clsfyr = svm.train(X, y, k.linear)
print("classified: " + str(svm.classify(clsfyr, X)))
Esempio n. 23
0
	return numpy.array(features)

def kim_tfidf_ngrams(filename):
	return uni_features, bi_features

def many_sentiment(filename):
	return sentiment.get_sentiment_counts(filename)

if __name__ == "__main__":
	train_file = '/home/ak/Courses/cs73/project/dataset/small_train.txt'
	kim = kim_pos(train_file) # 5 features
	zhang = zhang_pos(train_file) # 7 features
	sent = many_sentiment(train_file) # 2 features

	X_train = numpy.hstack((kim, zhang, sent))
	t_train = svm.compile_targets(train_file)

	model = svm.train(X_train, t_train)

	test_file = '/home/ak/Courses/cs73/project/dataset/small_test.txt'
	kim = kim_pos(test_file) # 5 features
	zhang = zhang_pos(test_file) # 7 features
	sent = many_sentiment(test_file) # 2 features

	X_test = numpy.hstack((kim, zhang, sent))
	t_test = svm.compile_targets(test_file)

	y_pred = svm.test(model, X_test)
	metrics.run_classification_metrics(t_test, y_pred)
Esempio n. 24
0
def main(args):
    w2v_model = encode.train(args.log_name, min_count=1)
    svm.train(args.log_name, args.normal_traces, w2v_model)
    cnn_test = cnndata['vin_testing']
    cnn_test_extracted = [cnn_test[vin] for vin in tstset]

    cnn_recordTest = cnndata['record_testing']
    cnn_rTdata = np.asarray(map(lambda x: x['data'], cnn_recordTest))
    cnn_rt_length = len(cnn_rTdata)
    cnn_rT_data = cnn_rTdata.reshape(cnn_rt_length, 576)
    cnn_rT_label = np.asarray(map(lambda x: x['label'],
                                  cnn_recordTest)).reshape(cnn_rt_length, 2)
    #print "training set"
    #print trset
    #print "testing set"
    #print tstset

    svm_tst = svm.train(svm_tr_set_feature, svm_tr_set_label,
                        svm_tst_set_feature, svm_tst_set_label, modelFolder,
                        svm_th)
    cnn_tst = cnn.train(cnn_train, cnn_test, cnn_rT_data, cnn_rT_label,
                        modelFolder, cnn_th)
    svm_th = max(svm_th, svm_tst)
    cnn_th = max(cnn_th, cnn_tst)
    print "=========testing phase========="
    s = svm.classify(svm_tst_set_feature, modelFolder)
    c = cnn.classify("trained/" + modelFolder + "/cnnmodel.ckpt",
                     cnn_test_extracted)
    #print "svm prediction: "
    #print s
    #print "cnn prediction"
    #print c

    compound = zip(s, c)
Esempio n. 26
0
        x[ix] = oldval - h
        fxmh = f(x)
        x[ix] = oldval
        grad_numerical = (fxph - fxmh) / (2 * h)
        grad_analytic = analytic_grad[ix]
        rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic))
        print('numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error))
#现在我们对加入了正则项的梯度进行检验
loss, grad = svm.svm_loss_naive(w,x_dev,y_dev,0.0)
f = lambda w:svm.svm_loss_naive(w,x_dev,y_dev,0.0)[0]
grad_numerical = grad_check_sparse(f,w,grad)

# 模型进行测试
svm = LinearSVM()    #创建对象,此时W为空
tic = time.time()
loss_hist = svm.train(x_train,y_train,learning_rate = 1e-7,reg = 2.5e4,num_iters = 1500,verbose = True)    #此时svm对象中有W
toc = time.time()
print('that took %fs' % (toc -tic))

plt.plot(loss_hist)
plt.xlabel('iteration number')
plt.ylabel('loss value')
plt.show()
#训练完成之后,将参数保存,使用参数进行预测,计算准确率
y_train_pred = svm.predict(x_train)
print('training accuracy: %f'%(np.mean(y_train==y_train_pred)))
y_val_pred = svm.predict(x_val)
print('validation accuracy: %f'%(np.mean(y_val==y_val_pred)))
'''
#在拿到一组数据时一般分为训练集,开发集(验证集),测试集。训练和测试集都知道是干吗的,验证集在除了做验证训练结果外
# 还可以做超参数调优,寻找最优模型。遍历每一种参数组合,训练SVM模型,然后在验证集上测试,寻找验证集上准确率最高的模型
def bootstrapping(B, X, y, C):

    accuracy = np.zeros(B)
    precision = np.zeros(B)
    recall = np.zeros(B)
    specificity = np.zeros(B)

    n, d = X.shape
    bs_err = np.zeros(B)
    for b in range(B):
        train_samples = list(np.random.randint(0, n, n))
        test_samples = list(set(range(n)) - set(train_samples))

        # train the model
        theta = svm.train(X[train_samples], y[train_samples], C)

        testSet = X[test_samples]
        testLabels = y[test_samples]
        n2, d2 = testSet.shape

        tp = 0
        tn = 0
        fp = 0
        fn = 0

        for j in xrange(n2):
            # extract the test point and test label
            test_point = testSet[j, :].T
            test_label = testLabels[j]
            # count if the test was good or not

            # test the model
            testResult = svm.test(theta, test_point)

            if testResult == 1 and test_label == 1:
                tp += 1
            if testResult == 1 and test_label == -1:
                fp += 1
            if testResult == -1 and test_label == 1:
                fn += 1
            if testResult == -1 and test_label == -1:
                tn += 1

        #print 'tp, tn, fp, fn'
        #print tp, tn, fp, fn
        #print ''

        try:
            accuracy[b] = float(tp + tn) / float(fn + fp + tp + tn)
        except ZeroDivisionError:
            accuracy[b] = 0.0

        try:
            recall[b] = float(tp) / float(tp + fn)
        except ZeroDivisionError:
            recall[b] = 0.0

        try:
            precision[b] = float(tp) / float(tp + fp)
        except ZeroDivisionError:
            precision[b] = 0.0

        try:
            specificity[b] = float(tn) / float(tn + fp)
        except ZeroDivisionError:
            specificity[b] = 0.0

        error = np.ones(B)
        error -= accuracy

    return accuracy, error, recall, precision, specificity

    return bs_err
Esempio n. 28
0
		word = email[i]
		count= int(email[i+1])
		arr.append(count)
		i+=2
	x[itr] = arr
	if(len(arr)>max_len):
		max_len=len(arr)
	
	line = f.readline()
	total-=1
	if(total==0):
		break
	
f.close()

trained = svm.train(lebel,x,'-t 0')



label_test ={}
x_test ={}
ftest = open('../data/test','r')

line = ftest.readline()
itr =0
max_len =0
while(line):
	itr=itr+1
	email = line.split(' ')
	if(email[1]=='ham'):
		label_test[itr] =-1
Esempio n. 29
0
        train_label_1 = y_1[0:s1]
        train_data_2 = x_2[0:s2]
        train_label_2 = y_2[0:s2]

        test_data_1 = x_1[s1:]
        test_label_1 = y_1[s1:]
        test_data_2 = x_2[s2:]
        test_label_2 = y_2[s2:]

        #generate traing data
        x = np.concatenate((train_data_1,train_data_2), axis=0)
        y = np.concatenate((train_label_1,train_label_2), axis=0)


        #traning a model
        svm_rbf = svm.train(x,y,"rbf")
        svm_linear = svm.train(x,y,"linear")
        w,mean = linear.train(x,y)

        #generate test data
        test_data  = np.concatenate((test_data_1,test_data_2), axis=0)
        test_label = np.concatenate((test_label_1,test_label_2), axis=0)

        #prediction
        svm_rbf_label = svm.test(test_data,svm_rbf)
        linear_label = linear.test(test_data,w,mean)
        svm_linear_label = svm.test(test_data,svm_linear)

        #get result
        svm_rbf_error = error_rate(test_label,svm_rbf_label)
        linear_error = error_rate(test_label,linear_label)
Esempio n. 30
0
grad_numerical=grad_check_sparse(f,w,grad)

tic=time.time()
loss_naive,grad_naive=svm.svm_loss_naive(w,x_dev,y_dev,0.00001)
toc=time.time()
print('naive loss: %e computed in %f s' % (loss_naive,toc-tic))

tic=time.time()
loss_vectorized,grad_vectorized=svm.svm_loss_vectorized(w,x_dev,y_dev,0.00001)
toc=time.time()
print('vectoried loss: %e computed in %f s' % (loss_vectorized,toc-tic))
print('difference: %f ' % (loss_naive-loss_vectorized))

svm=LinearSVM()
tic=time.time()
loss_hist=svm.train(x_train,y_train,learning_rate=1e-7, reg=5e4,num_iters=1500,verbose=True)
toc=time.time()
print('that took %f  s' % (toc-tic))

y_train_pred=svm.predict(x_train)
print('training accuracy: %f ' % (np.mean(y_train==y_train_pred)))
y_val_pred=svm.predict(x_val)
print('validation accuracy : %f '% (np.mean(y_val==y_val_pred)))

learning_rates=[1.4e-7,1.5e-7,1.6e-7]
regularization_strengths=[(1+i*0.1)*1e4 for i in range(-3,3)]+[(2+0.1*i)*1e4 for i in range(-3,3)]
results={}
best_val=-1
best_svm=None
for learning in learning_rates:
    for regularization in regularization_strengths:
        globals.test_feature_vec[1].extend([subject] * len(histograms))
    
print('Time:', timer, '\n', file = globals.file)

# Print
print('Done!\n')

# Print
print('Training Support Vector Machine Model\n')

# Train SVM Model
print('Training %s SVM Models\n' % arguments.descriptor, file = globals.file)

# SVM Model 
SVM = svm.train(gama = 0.001,
                descriptor_name = arguments.descriptor,
                model_name = 'SVM')

# Print
print('Done!\n')

# Print
print('Testing Support Vector Machine Model\n')

# Test SVM Model
print('Testing %s SVM Model\n' % arguments.descriptor, file = globals.file)

# SVM Model 
SVM_predict = svm.test(model = SVM,
                       descriptor_name = arguments.descriptor,
                       model_name = 'SVM')
Esempio n. 32
0

data = main()
train_x, train_y, test_x, test_y = split(data, 0.4)

c = nw.get_c(train_x, train_y)
w1, w2 = nw.train(train_x, train_y, c)
pnw, rnw = nw.test(test_x, test_y, w1, w2)
enw = 2 * pnw * rnw / (pnw + rnw)

print("nw:")
print("\tF1 %.3f " % enw)
print("\tprecision %.3f, recall %.3f" % (pnw, rnw))

c = svm.get_c(train_x, train_y)
tsvm = svm.train(train_x, train_y, c)
psvm, rsvm = svm.test(test_x, test_y, tsvm)
esvm = 2 * psvm * rsvm / (psvm + psvm)

print("svm:")
print("\tF1 %.3f " % esvm)
print("\tprecision %.3f, recall %.3f" % (psvm, rsvm))

tp = perceptrone.train(train_x, train_y)
pp, rp = perceptrone.test(test_x, test_y, tp)
ep = 2 * pp * rp / (pp + rp)

print("lp:")
print("\tF1 %.3f " % ep)
print("\tprecision %.3f, recall %.3f" % (pp, rp))
Esempio n. 33
0
	train_y = [1.0 if x[1] == 'M' else -1.0 for x in temp_data[:b]]
	test_x = [numpy.array([float(i) for i in x[2:]]) for x in temp_data[b:]]
	test_y = [1.0 if x[1] == 'M' else -1.0 for x in temp_data[b:]]
	return train_x, train_y, test_x, test_y

def main():
	f = open('wdbc.data')
	lines = f.readlines()
	data = [x for x in lines]
	return data

data = main()
train_x, train_y, test_x, test_y = split(data, 0.4)

c = svm.get_c(train_x, train_y)
tsvm = svm.train(train_x, train_y, c)
psvm, rsvm = svm.test(test_x, test_y, tsvm)
esvm = 2 * psvm * rsvm / (psvm + psvm)

print("svm:")
print("\tF1 %.3f " %esvm)
print("\tprecision %.3f, recall %.3f" %(psvm, rsvm))

tp = perceptrone.train(train_x, train_y)
pp, rp = perceptrone.test(test_x, test_y, tp) 
ep = 2 * pp * rp / (pp + rp)

print("lp:")
print("\tF1 %.3f " %ep)
print("\tprecision %.3f, recall %.3f" %(pp, rp))
Esempio n. 34
0
def cross_validation(X, y, foldcount, C):

    accuracy = np.zeros(foldcount)
    precision = np.zeros(foldcount)
    recall = np.zeros(foldcount)
    specificity = np.zeros(foldcount)
    n, d = X.shape

    # extract k folds from the data
    split = cross_validation_split(y, foldcount)

    # running k fold x validation
    for j in xrange(foldcount):

        # breaking up the folds into train and test
        trainInd = []
        testInd = split[j]
        for i in xrange(foldcount):
            if j == i:
                continue
            trainInd += split[i]

        # construct the training and testing sets

        trainSet = X[trainInd]
        trainLabels = y[trainInd]

        testSet = X[testInd]
        testLabels = y[testInd]

        # train the model
        theta = svm.train(trainSet, trainLabels, C)

        n = len(testInd)
        # Matt is terrible

        # getting information on the statistical results
        tp = 0
        tn = 0
        fp = 0
        fn = 0
        for i in xrange(n):
            # extract the test point and test label
            test_point = testSet[i]
            test_label = testLabels[i]
            # count if the test was good or not

            # test the model
            testResult = svm.test(theta, test_point)

            if testResult == 1 and test_label == 1:
                tp += 1
            if testResult == 1 and test_label == -1:
                fp += 1
            if testResult == -1 and test_label == 1:
                fn += 1
            if testResult == -1 and test_label == -1:
                tn += 1

        # making sure there are no zero denominators
        # probably unnecessary but just in case
        #print 'tp, tn, fp, fn'
        #print tp, tn, fp, fn
        #print ''

        try:
            accuracy[j] = float(tp + tn) / float(fn + fp + tp + tn)
        except ZeroDivisionError:
            accuracy[j] = 0.0

        try:
            recall[j] = float(tp) / float(tp + fn)
        except ZeroDivisionError:
            recall[j] = 0.0

        try:
            precision[j] = float(tp) / float(tp + fp)
        except ZeroDivisionError:
            precision[j] = 0.0

        try:
            specificity[j] = float(tn) / float(tn + fp)
        except ZeroDivisionError:
            specificity[j] = 0.0

        error = np.ones(foldcount)
        error -= accuracy

    return accuracy, error, recall, precision, specificity
Esempio n. 35
0
for tr, tst in kf:
    cv += 1
    print "cross validation fold %d" % (cv)
    trvin = vinlist[tr]
    tstvin = vinlist[tst]

    svmtrain = filter(lambda x: x['vin'] in trvin, svmdata)
    svmtest = filter(lambda x: x['vin'] in tstvin, svmdata)
    cnntrain = {}
    cnntest = {}
    for k in cnndata.keys():
        if (k in trvin):
            cnntrain[k] = cnndata[k]
        if (k in tstvin):
            cnntest[k] = cnndata[k]
    svm.train(svmtrain)
    cnn.train(cnntrain)

    svmclassify = svm.classify(svmtest)
    svmres = svmclassify['detail']
    svmacc = svmclassify['accuracy']
    cnnclassify = cnn.classify(cnntest)
    cnnres = cnnclassify['detail']
    cnnacc = cnnclassify['accuracy']
    print "standalone classifier accuracy: svm -- %f , cnn -- %f" % (svmacc,
                                                                     cnnacc)

    pred = {}
    for each in svmres:
        vin = each['vin']
        svm_proba = each['proba_predicted']
Esempio n. 36
0
def main():
    train_file = '/home/ak/Courses/cs73/project/dataset/small_train.txt'
    test_file = '/home/ak/Courses/cs73/project/dataset/small_test.txt'

    sent_included = False
    train_feats = []
    test_feats = []
    if 'k' in sys.argv:
        kim_train, kim_test = kim_features(train_file, test_file)
        train_feats.append(kim_train)
        test_feats.append(kim_test)
        if not sent_included:
            train_feats.append(many_sentiment(train_file))
            test_feats.append(many_sentiment(test_file))
            sent_included = True
    if 'o' in sys.argv:
        train_feats.append(omahony_features(train_file))
        test_feats.append(omahony_features(test_file))
        if not sent_included:
            train_feats.append(many_sentiment(train_file))
            test_feats.append(many_sentiment(test_file))
            sent_included = True
    if 'l' in sys.argv:
        train_feats.append(liu_features(train_file))
        test_feats.append(liu_features(test_file))
        if not sent_included:
            train_feats.append(many_sentiment(train_file))
            test_feats.append(many_sentiment(test_file))
            sent_included = True
    if 'z' in sys.argv:
        train_feats.append(zhang_features(train_file))
        test_feats.append(zhang_features(test_file))
        sent_included = True
        if not sent_included:
            train_feats.append(many_sentiment(train_file))
            test_feats.append(many_sentiment(test_file))
            sent_included = True
    if 't' in sys.argv:
        tfidf_train, tfidf_test = tfidf_ngrams(train_file,
                                               test_file,
                                               with_lsi=False)
        train_feats.append(tfidf_train)
        test_feats.append(tfidf_test)
    if 's' in sys.argv:
        train_feats.append(many_sentiment(train_file))
        test_feats.append(many_sentiment(test_file))
    if 'tl' in sys.argv:
        tfidf_train, tfidf_test = tfidf_ngrams(train_file,
                                               test_file,
                                               with_lsi=True)
        train_feats.append(tfidf_train)
        test_feats.append(tfidf_test)
    if 'bp' in sys.argv:
        train_feats.append(kim_pos(train_file))
        test_feats.append(kim_pos(test_file))

    X_train = None
    X_test = None
    if len(train_feats) > 1:
        X_train = scipy.sparse.hstack(train_feats)
        X_test = scipy.sparse.hstack(test_feats)
    else:
        X_train = train_feats[0]
        X_test = test_feats[0]

    svm.normalize(X_train)
    svm.normalize(X_test)

    # Classification
    # SV
    t_train_thresh = svm.compile_targets(train_file)
    t_test_thresh = svm.compile_targets(test_file)

    clf = ExtraTreesClassifier()
    X_new = clf.fit(X_train.toarray(), t_train_thresh).transform(X_train)
    if clf.feature_importances_.shape[0] < 500:
        for i in xrange(clf.feature_importances_.shape[0]):
            print i, clf.feature_importances_[i]
    '''bsvm = SVC(kernel="linear")
	selector = RFECV(bsvm, step=10)
	selector.fit(X_train, t_train_thresh)
	print selector.support_
	print selector.ranking_
	raw_input()'''

    class_model = None
    y_pred = None
    if 'rf' not in sys.argv:
        class_model = svm.train(X_train, t_train_thresh)
        y_pred = svm.test(class_model, X_test)
    else:
        class_model = rfc.train(X_train.todense(), t_train_thresh)
        y_pred = rfc.test(class_model, X_test.todense())
    metrics.run_classification_metrics(t_test_thresh, y_pred)
    print

    # Regression
    # SVR
    t_train = svr.compile_targets(train_file)
    t_test = svr.compile_targets(test_file)
    if 'rf' not in sys.argv:
        reg_model = svr.train(X_train, t_train)
        y_pred = svr.test(reg_model, X_test)
    else:
        reg_model = rfr.train(X_train.todense(), t_train)
        y_pred = rfr.test(reg_model, X_test.todense())

    #for i in xrange(X_test.shape[0]):
    #	print y_pred[i], t_train[i]
    metrics.run_regression_metrics(t_test, y_pred)

    show_regression(y_pred, t_test)