def test_svc_invalid_break_ties_param(SVCClass):
    X, y = make_blobs(random_state=42)

    svm = SVCClass(kernel="linear", decision_function_shape='ovo',
                   break_ties=True, random_state=42).fit(X, y)

    with pytest.raises(ValueError, match="break_ties must be False"):
        svm.predict(y)
	def testLinear(self):
		## 加载数据
		dataArr, labelArr = self.loadDataSet('data/dataset2svm/testSet.txt')
		svm = SVMLib()
		## 训练一个线性分类器
		ws, b = svm.fit(dataArr, labelArr, 0.6, 0.001, 40)
		print ws
		dataMat = mat(dataArr)
		## 前半部分计算值为分类结果,后面为实际结果
		## SVM分类器是个二元分类器,其结果为-1或1
		## 因此训练时,训练集的值也为-1或1
		print '-----------------'
		print svm.predict(dataMat[0], ws, b), labelArr[0]
	def testMultiLinear(self):
		## 加载数据
		dataArr, labelArr = self.loadMultiDataSet('data/dataset2svm/horseColicTest.txt')

		svm = SVMLib()
		## 训练一个线性分类器
		ws, b = svm.fit(dataArr, labelArr, 0.6, 0.001, 40)
		print ws
		dataMat = mat(dataArr)
		## 前半部分计算值为分类结果,后面为实际结果
		## SVM分类器是个二元分类器,其结果为-1或1
		## 因此训练时,训练集的值也为-1或1
		print '-----------------'
		## 根据SVM判断第4个数据的分类,大于0为1,小于0为-1
		print svm.predict(dataMat[3], ws, b), labelArr[3]
Beispiel #4
0
def leave_one_out_cv(gram_matrix, labels, alg = 'SVM'):
    """
    leave-one-out cross-validation
    """
    scores = []
    preds = []
    loo = sklearn.cross_validation.LeaveOneOut(len(labels))
    for train_index, test_index in loo:
        X_train, X_test = gram_matrix[train_index][:,train_index], gram_matrix[test_index][:, train_index]
        y_train, y_test = labels[train_index], labels[test_index]
        if(alg == 'SVM'):
            svm = sklearn.svm.SVC(kernel = 'precomputed')
            svm.fit(X_train, y_train)
            preds += svm.predict(X_test).tolist()
            score = svm.score(X_test, y_test)
        elif(alg == 'kNN'):
            knn = sklearn.neighbors.KNeighborsClassifier()
            knn.fit(X_train, y_train)
            preds += knn.predict(X_test).tolist()
            score = knn.score(X_test, y_test)
        scores.append(score)

    print "Mean accuracy: %f" %(np.mean(scores))
    print "Stdv: %f" %(np.std(scores))

    return preds, scores
Beispiel #5
0
def k_fold_cv(gram_matrix, labels, folds = 10, alg = 'SVM', shuffle = True):
    """
    K-fold cross-validation
    """
    pdb.set_trace()
    scores = []
    preds = []
    loo = sklearn.cross_validation.KFold(len(labels), folds, shuffle = shuffle, random_state = random.randint(0,100))
    #loo = sklearn.cross_validation.LeaveOneOut(len(labels))
    for train_index, test_index in loo:
        X_train, X_test = gram_matrix[train_index][:,train_index], gram_matrix[test_index][:, train_index]
        y_train, y_test = labels[train_index], labels[test_index]
        if(alg == 'SVM'):
            svm = sklearn.svm.SVC(kernel = 'precomputed')
            svm.fit(X_train, y_train)
            preds += svm.predict(X_test).tolist()
            score = svm.score(X_test, y_test)
        elif(alg == 'kNN'):
            knn = sklearn.neighbors.KNeighborsClassifier()
            knn.fit(X_train, y_train)
            preds += knn.predict(X_test).tolist()
            score = knn.score(X_test, y_test)

        scores.append(score)

    print "Mean accuracy: %f" %(np.mean(scores))
    print "Stdv: %f" %(np.std(scores))

    return preds, scores
Beispiel #6
0
def run_model(train_data, train_labels, test_data, test_labels):
    '''
    Algorithm which will take in a set of training text and labels to train a bag of words model
    This model is then used with a logistic regression algorithm to predict the labels for a second set of text
    Method modified from code available at:
    https://www.kaggle.com/c/word2vec-nlp-tutorial/details/part-1-for-beginners-bag-of-words
    Args:
        train_data_text: Text training set.  Needs to be iterable
        train_labels: Training set labels
        test_data_text: The text to
    Returns:
        pred_labels: The predicted labels as determined by logistic regression
    '''

    #use Logistic Regression to train a model
    svm = SVC()

    # we create an instance of Neighbours Classifier and fit the data.
    svm.fit(train_data, train_labels)

    #Now that we have something trained we can check if it is accurate with the test set
    pred_labels = svm.predict(test_data)
    perform_results = performance_metrics.get_perform_metrics(test_labels, pred_labels)

    #Perform_results is a dictionary, so we should add other pertinent information to the run
    perform_results['vector'] = 'Bag_of_Words'
    perform_results['alg'] = 'Support_Vector_Machine'

    return pred_labels, perform_results
def svm_iterkernel(train_data, train_labels, test_data, test_labels, op_name_dir):


	label_set=np.unique(train_labels)

	if op_name_dir != ('None' or 'none'):
		fo=open(op_name_dir,'a')

	predict_list={}
	for kernel in ['linear']: #, 'poly', 'rbf']:
		t0=time.time()
		svm = SVC(C=1., kernel=kernel, cache_size=10240)
		svm.fit(train_data, train_labels)
		prediction=svm.predict(test_data)
		predict_list[kernel]=prediction
		pred_acc_tot =(float(np.sum(prediction == test_labels)))/len(test_labels)
		print time.time() - t0, ',kernel = '+kernel, ',pred acc = '+str(round(pred_acc_tot*100))
		if op_name_dir != ('None' or 'none'):
			fo.write('time='+str(time.time() - t0)+'sec,kernel='+kernel+',pred acc='+str(round(pred_acc_tot*100))+'\n')
		for lab_unq in label_set:	
			pred_acc=(prediction == lab_unq) & (test_labels == lab_unq)
			pred_acc=float(pred_acc.sum())/(len(test_labels[test_labels == lab_unq]))
			print 'pred_'+str(lab_unq)+','+str(round(pred_acc*100))	
			if op_name_dir != ('None' or 'none'):
				fo.write('pred_'+str(lab_unq)+','+str(round(pred_acc*100))+'\n')

	if op_name_dir != ('None' or 'none'):
		fo.close()

	return predict_list
Beispiel #8
0
def get_error(svm, X, y):
    err = 0
    N = y.shape[0]
    for i in range(N):
        if y[i] != svm.predict(X[i])[0]:
            err += 1
    return err*1. / N
Beispiel #9
0
def plotSVM(svm,n,title):
    plt.subplot(2,2,n)
    Z = svm.predict(np.c_[xx.ravel(), yy.ravel()])

    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
    plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
    plt.title(title)
def test_accuracy(svm,x,y):
    """determines the accuracy of a svm classifier on validation set"""
    hypothesis = svm.predict(x)
    flat_y = y.ravel()
    misclassification_count = 0
    for i in xrange(len(flat_y)):
        if not( hypothesis[i] == flat_y[i] ):
            misclassification_count += 1
    return misclassification_count
def increment_svm(svm, L_ids, baseline_accuracy):
    
    L = X[L_ids]
    y_l = y[L_ids]
    
    U_ids = np.array(list((set(instance_ids) - set(L_ids))))
    U = X[U_ids]
    y_u = y[U_ids]

    ordered_indices = np.argsort(svm.decision_function(U))
    smallest_indices = ordered_indices[:500]
    smallest_ids = U_ids[smallest_indices]
    largest_indices = ordered_indices[-500:]
    largest_ids = U_ids[largest_indices]
    
    high_confidence_unlabeled = scipy.sparse.vstack([U[smallest_indices], U[largest_indices]])
    high_confidence_ids = np.concatenate([smallest_ids, largest_ids])
    high_confidence_predicted_labels = svm.predict(high_confidence_unlabeled)
    high_confidence_true_labels = y[high_confidence_ids]
    
    splits = sklearn.cross_validation.StratifiedShuffleSplit(high_confidence_predicted_labels, n_iter=2, test_size=0.9)

    saved_L_primes = []
    saved_L_prime_ids = []
    saved_cv_accuracies = []

    for augment_indices, test_indices in splits:

        augment = high_confidence_unlabeled[augment_indices]
        test = high_confidence_unlabeled[test_indices]

        augment_ids = high_confidence_ids[augment_indices]
        test_ids = high_confidence_ids[test_indices]

        augment_labels = high_confidence_predicted_labels[augment_indices] 
        test_labels = high_confidence_predicted_labels[test_indices]

        L_prime = scipy.sparse.vstack([L, augment])

        y_l_prime = np.concatenate([y_l, augment_labels])
        L_prime_ids = np.concatenate([L_ids, augment_ids])

        saved_L_primes.append(L_prime)
        saved_L_prime_ids.append(L_prime_ids)    

        svm_prime = sklearn.svm.LinearSVC(penalty='l2', C=10, dual=False)
        accuracy = sklearn.cross_validation.cross_val_score(svm_prime, L_prime, y_l_prime, cv=5, n_jobs=7).mean()

        saved_cv_accuracies.append(accuracy)
            
    best_index = np.argmax(saved_cv_accuracies)
    best_L_prime_ids = saved_L_prime_ids[best_index]
    best_accuracy = saved_cv_accuracies[best_index]
    
    return best_L_prime_ids, best_accuracy
def predict_embedded_attributes_labels(data_mat, svms):
    """
    Calculate class label predictions for each feature vector (=row) in data_mat.

    @return: Matrix with each column containing class labels for one feature vector.
    """
    num_attributes = len(svms)
    num_examples = data_mat.shape[0]
    A = np.zeros(shape=(num_attributes, num_examples))
    log.d("Classifying {} examples...".format(num_examples))
    for att_idx, svm in enumerate(svms):
        log.update_progress(att_idx + 1, num_attributes)
        if svm is not None:
            if sklearn.__version__ == '0.14.1':
                A[att_idx] = svm.predict(data_mat)
            else:
                # the return format of this function was changed in 0.15...
                A[att_idx] = svm.predict(data_mat).T
    print("")
    return A
def test_svc_ovr_tie_breaking(SVCClass):
    """Test if predict breaks ties in OVR mode.
    Related issue: https://github.com/scikit-learn/scikit-learn/issues/8277
    """
    X, y = make_blobs(random_state=27)

    xs = np.linspace(X[:, 0].min(), X[:, 0].max(), 1000)
    ys = np.linspace(X[:, 1].min(), X[:, 1].max(), 1000)
    xx, yy = np.meshgrid(xs, ys)

    svm = SVCClass(kernel="linear", decision_function_shape='ovr',
                   break_ties=False, random_state=42).fit(X, y)
    pred = svm.predict(np.c_[xx.ravel(), yy.ravel()])
    dv = svm.decision_function(np.c_[xx.ravel(), yy.ravel()])
    assert not np.all(pred == np.argmax(dv, axis=1))

    svm = SVCClass(kernel="linear", decision_function_shape='ovr',
                   break_ties=True, random_state=42).fit(X, y)
    pred = svm.predict(np.c_[xx.ravel(), yy.ravel()])
    dv = svm.decision_function(np.c_[xx.ravel(), yy.ravel()])
    assert np.all(pred == np.argmax(dv, axis=1))
def hw1q18():
    print "----------------------------------------"
    print "         Homework 1 Question 18         "
    print "----------------------------------------"

    Y_train_0 = (Y_train == 0).astype(int)
    Y_test_0 = (Y_test == 0).astype(int)

    print "in the training set:"
    print "n(+) =", np.count_nonzero(Y_train_0 == 1), "n(-) =", np.count_nonzero(Y_train_0 == 0)

    print "in the test set:"
    print "n(+) =", np.count_nonzero(Y_test_0 == 1), "n(-) =", np.count_nonzero(Y_test_0 == 0)

    for C in (0.001, 0.01, 0.1, 1, 10):
        svm = sklearn.svm.SVC(C=C, kernel="rbf", gamma=100, tol=1e-7, shrinking=True, verbose=False)
        svm.fit(X_train, Y_train_0)

        print "----------------------------------------"
        print "C =", C

        support = svm.support_
        coef = svm.dual_coef_[0]
        b = svm.intercept_[0]

        print "nSV =", len(support)
        Y_predict = svm.predict(X_test)

        print "in the prediction:"
        print "n(+) =", np.count_nonzero(Y_predict == 1), "n(-) =", np.count_nonzero(Y_predict == 0)

        print "E_out =", np.count_nonzero(Y_test_0 != Y_predict)
        print

        fig = plt.figure()
        plt.suptitle("C =" + str(C))
        plt.subplot(311)
        plt.title("Training data: green +, red -")
        plot_01(X_train, Y_train_0)
        plt.tick_params(axis="x", labelbottom="off")

        plt.subplot(312)
        plt.title("Prediction on test data: green +, red -")
        plot_01(X_test, Y_predict)
        plt.tick_params(axis="x", labelbottom="off")

        plt.subplot(313)
        plt.title("Support vectors: blue")
        plt.plot(X_train[:, 0], X_train[:, 1], "r.")
        plt.plot(X_train[support, 0], X_train[support, 1], "b.")

    plt.show()
Beispiel #15
0
def testSVM(svm,zero,one):
    numcorrect = 0
    numwrong = 0
    for correct,testing in ((0,zero),(1,one)):
        for d in testing:
            import pdb;pdb.set_trace()
            r = svm.predict(d)[0]
            if(r==correct):
                numcorrect += 1
            else:
                numwrong += 1
    print "Correct",numcorrect
    print "Wrong",numwrong
    def runSVM(self):
        """
        Runs the SVM on 5 different splits of cross validation data
        """
        for train, test in self.kf:
            svm = self.models["SVM"]

            train_set, train_labels = self.getCurrFoldTrainData(train)
            test_set, test_labels = self.getCurrFoldTestData(test)
            svm.fit(train_set, train_labels)

            preds = svm.predict(test_set)
            acc = self.getAccuracy(test_labels, preds)
            print "(SVM) Percent correct is", acc
Beispiel #17
0
def test_svm(svm, testing_dict, name):
    num_correct = 0
    num_wrong = 0
    for correct, testing in testing_dict.items():
        for test in testing:
            r = svm.predict(test)[0]
            if r == correct:
                num_correct += 1
            else:
                num_wrong += 1
    print("\n{1} - Correct:{0}".format(num_correct, name), end="")
    print("\n{1} - Wrong:{0}".format(num_wrong, name), end="")
    accuracy = float(num_correct)/(num_correct+num_wrong)*100
    print("\n{1} - Accuracy:{0:.2f}%".format(round(accuracy,2), name), end="")
Beispiel #18
0
def plotSVM(svm, n, title):
    X = np.array(training_0[plot_num:] + 
                 training_1[plot_num:] + 
                 training_2[plot_num:])

    colors = np.array(["g" for i in training_2d_0][plot_num:] + 
                      ["r" for i in training_2d_1][plot_num:] + 
                      ["b" for i in training_2d_2][plot_num:])

    plt.subplot(2, 2, n)
    Z = svm.predict(np.c_[xx.ravel(), yy.ravel()])

    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap = plt.cm.Paired, alpha = 0.8)
    plt.scatter(X[:, 0], X[:, 1], c = colors, cmap = plt.cm.Paired)
    plt.title(title)
Beispiel #19
0
    def repare():
        conn = pymysql.connect(host='localhost', port=3306, user='******', passwd='', db='small_rekomendacyjny')
        cur = conn.cursor()
        cur.execute("SELECT * FROM help WHERE checked is NULL AND correct = 0")
        svm = Helper.train_svm()
        for row in cur:
            print(row)
            vector = [Helper.prepare_vector(mov_id=row[1], us_id=row[0], rat=float(row[2]))]
            repared = svm.predict(vector)
            repared = float(repared[0])

            new_curr = conn.cursor()
            print('stara: ' + str(float(row[2])) + ' poprawiona: ' + str(repared))
            new_curr.execute(
                "UPDATE help SET rating=%s, checked = 1, correct = 2, where user_id = %s AND movie_id = %s",
                [repared, row[0], row[1]])
        conn.commit()
Beispiel #20
0
def hog_svm_logo_cls_rm():
    # tlogos, imgs = load_delogo_dataset("tests", (373, 54))  # 从图片dir加载数据
    tlogos, imgs = load_delogo_dataset("cls_rm_tests", (373, 54))  # 从图片dir加载数据
    hog = get_hog()
    svm = joblib.load("save/logo_svm_cls.pkl")
    pngp = "ilogor.png"
    for tlogo, img in zip(tlogos, imgs):
        hogs = hog_extractor(hog, tlogo)  # hog 特征提取
        cls = svm.predict([hogs])  # svm 分类器
        print("svm 分类:", cls)
        if cls[0] == 1:
            print("识别到logo水印,消去水印...")
            logo_clean(img, pngp, savp="xout")  # 去核心水印
        else:
            Image.open(img).show()
            print("没有logo水印,pass")
    print("over!")
Beispiel #21
0
def trainTest():

    data2010, labels2010 = read_tac('2010')
    data2011, labels2011 = read_tac("2011")

    #classifiers
    gnb = naive_bayes.GaussianNB()
    svm = svm.SVC(kernel = "linear")
    logReg = linear_model.LogisticRegression()


    gnb.fit(data2010, labels2010)
    svm.fit(data2010, labels2010)
    logReg.fit(data2010, labels2010)

    gnbPrediction = gnb.predict(data2011)
    svmPrediction = svm.predict(data2011)
    logRegPrediction = logReg.predict(data2011)

    gnbAccuracy = accuracy(labels2011, gnbPrediction)
    svmAccuracy = accuracy(labels2011, svmPrediction)
    logRegAccuracy = accuracy(labels2011, logRegPrediction)

    confusionMatrix = metrics.confusion_matrix(labels2011, logRegPrediction)

    print "Results:"
    print "Gaussian Naive Bayes: " 
    print gnbAccuracy
    print "Support Vector Machine: " 
    print svmAccuracy
    print "Logistic Regression: " 
    print logRegAccuracy
    print confusionMatrix

    fh.write("Results:" + "\n")
    fh.write("Gaussian Naive Bayes: "  + "\n")
    fh.write(gnbAccuracy + "\n")
    fh.write("Support Vector Machine: "  + "\n")
    fh.write(svmAccuracy + "\n")
    fh.write("Logistic Regression: "  + "\n")
    fh.write(logRegAccuracy + "\n")
    for i in confusionMatrix:
        fh.write(str(i))
        fh.write("\n")
    fh.write("-------------------------------------------------\n")
    fh.write("\n\n")    
Beispiel #22
0
def test1 ():
    # Set up toy problem
    X = np.array([ [1,1], [2,1], [1,2], [2,3], [1,4], [2,4] ])
    y = np.array([-1,-1,-1,1,1,1])

    # Train your model
    svm453X = SVM453X()
    svm453X.fit(X, y)
    print(svm453X.w, svm453X.b)

    # Compare with sklearn
    svm = sklearn.svm.SVC(kernel='linear', C=1e15)  # 1e15 -- approximate hard-margin
    svm.fit(X, y)
    print(svm.coef_, svm.intercept_)

    acc = np.mean(svm453X.predict(X) == svm.predict(X))
    print("Acc={}".format(acc))
Beispiel #23
0
def main():

    hog = cv2.HOGDescriptor()

    #GET IMAGES AND THEIR LABEL
    print("Loading pictures...")
    train_img, train_labels = get_images("train", "train_labels.csv")
    test_img, test_labels = get_images("test", "test_labels.csv")
    print("Loaded...")
    #RESIZE
    print("Resizing images...")
    train_img = resize_images(train_img)
    test_img = resize_images(test_img)
    print("Resized...")
    #MAP LABEL TO INT
    train_labels = list(map(strToNumberLabels, train_labels))
    test_labels = list(map(strToNumberLabels, test_labels))
    #EXTRACT FEATURES
    print("Before hog extraction")
    features_train = hog_compute(hog, train_img)
    features_test = hog_compute(hog, test_img)

    print("passed hog extraction")
    #
    trainingDataMat = np.array(features_train)
    labelsMat = np.array(train_labels)

    svm = cv2.ml.SVM_create()
    svm.setType(cv2.ml.SVM_C_SVC)
    svm.setKernel(cv2.ml.SVM_LINEAR)

    svm.setTermCriteria((cv2.TERM_CRITERIA_COUNT, 100, 1.e-10))

    svm.train(trainingDataMat, cv2.ml.ROW_SAMPLE, labelsMat)
    sample_data = np.array(features_test, np.float32)

    svm.setC(100)
    #svm.setGamma(0.1)
    print("Training model...")
    svm.train(trainingDataMat, cv2.ml.ROW_SAMPLE, labelsMat)
    response = svm.predict(sample_data)
    final = []
    for y in response[1]:
        final.append(int(y[0]))
    countAccuracy(final, test_labels)
def hw1q16():
    print "----------------------------------------"
    print "         Homework 1 Question 16         "
    print "----------------------------------------"

    # polynomial kernel: (coef0 + gamma * x1.T * x2) ** degree

    for idx in (0, 2, 4, 6, 8):
        svm = sklearn.svm.SVC(
            C=0.01, kernel="poly", degree=2, gamma=1, coef0=1, tol=1e-4, shrinking=True, verbose=False
        )

        Y_train_i = (Y_train == idx).astype(int)

        svm.fit(X_train, Y_train_i)
        Y_predict_i = svm.predict(X_train)

        support = svm.support_
        coef = svm.dual_coef_[0]
        b = svm.intercept_[0]
        E_in = np.count_nonzero(Y_train_i != Y_predict_i)

        print "For class %d:" % (idx)
        print "sum(alpha) =", np.sum(np.abs(coef))
        print "b =", b
        print "E_in =", E_in

        fig = plt.figure()
        # plt.suptitle('%d vs rest' % (idx))
        plt.subplot(311)
        plt.title("Training data: green +, red -")
        plot_01(X_train, Y_train_i)
        plt.tick_params(axis="x", labelbottom="off")

        plt.subplot(312)
        plt.title("Prediction: green +, red -")
        plot_01(X_train, Y_predict_i)
        plt.tick_params(axis="x", labelbottom="off")

        plt.subplot(313)
        plt.title("Support vectors: blue")
        plt.plot(X_train[:, 0], X_train[:, 1], "r.")
        plt.plot(X_train[support, 0], X_train[support, 1], "b.")

    plt.show()
Beispiel #25
0
def OpenCvSVM(X,y):
    X_train,X_test,y_train,y_test=train_test_split(X, y, test_size=0.2,random_state=1)
    X_train=np.array(X_train,dtype=np.float32)
    X_test=np.array(X_test,dtype=np.float32)
    y_test=np.array(y_test,dtype=np.int32)
    sc=StandardScaler()
    sc.fit(X_train)
    X_train=sc.transform(X_train)
    X_test=sc.transform(X_test)
    svm = cv2.ml.SVM_create()
    svm.setType(cv2.ml.SVM_C_SVC)
    svm.setKernel(cv2.ml.SVM_LINEAR)
    svm.train(X_train, cv2.ml.ROW_SAMPLE, y_train)
    y_predict = svm.predict(X_test)
    y_pred=np.zeros(len(y_predict[1]))
    for i in range(len(y_predict[1])):
        y_pred[i]=y_predict[1][i][0]
    return y_pred,y_test
Beispiel #26
0
 def classify(self,X):
     x,y = X[-2:]
     xbin,ybin = pos_to_xybin(x,y)
     
     try:
         svm = self.svms[xbin][ybin]
         guess = svm.predict(np.ravel(X[:-2]))
     except:
         guess = self.svms[xbin][ybin]
     if guess == self.labels[0]:
         return {self.labels[0]: 1,
                 self.labels[1]: 0}
     elif guess == self.labels[1]:
         return {self.labels[0]: 0,
                 self.labels[1]: 1}
     else:
         return {self.labels[0]: .5,
                 self.labels[1]: .5}
Beispiel #27
0
def find_better_hour(thetime, theday, theweather, thenbhd, svm_pred, kde_pred):
    hours = list(range(8, 18))

    best_hour = -1
    best_kde = -100

    for hour in hours:
        proc = process(theday, hour, theweather, thenbhd)
        this_svm = svm.predict(proc)[0]
        this_kde = kde.score_samples(proc)[0]

        if this_svm == -1 and this_kde > best_kde:
            best_hour = hour
            best_kde = this_kde

    if best_hour == -1 or best_hour == thetime:
        return best_hour
    else:
        return str(hour) + ':00'
Beispiel #28
0
def svm_test():
    X_train = np.array([[0, 0], [1, 0], [0, 2], [-2, 0]])
    Y_train = np.array([1, 1, 0, 0])
    svm = sklearn.svm.SVC(C=100000, kernel='linear', shrinking=False, verbose=False)
    svm.fit(X_train, Y_train)
    Y_predict = svm.predict(X_train)
    print Y_predict
    b = svm.intercept_[0]
    print b

    plt.figure()
    plt.suptitle('svm test')
    plt.subplot(211)
    plot_01(X_train, Y_train)
    plt.subplot(212)
    plot_01(X_train, Y_predict)
    plt.plot(X_train[Y_predict == 0, 0], X_train[Y_predict == 0, 1], 'ro')
    plt.plot(X_train[Y_predict == 1, 0], X_train[Y_predict == 1, 1], 'go')
    plt.show()
Beispiel #29
0
def hw1q16():
    print '----------------------------------------'
    print '         Homework 1 Question 16         '
    print '----------------------------------------'

    # polynomial kernel: (coef0 + gamma * x1.T * x2) ** degree

    for idx in (0, 2, 4, 6, 8):
        svm = sklearn.svm.SVC(C=0.01, kernel='poly', degree=2, gamma=1, coef0=1, tol=1e-4, shrinking=True, verbose=False)

        Y_train_i = (Y_train == idx).astype(int)

        svm.fit(X_train, Y_train_i)
        Y_predict_i = svm.predict(X_train)

        support = svm.support_
        coef = svm.dual_coef_[0]
        b = svm.intercept_[0]
        E_in = np.count_nonzero(Y_train_i != Y_predict_i)

        print 'For class %d:' % (idx)        
        print 'sum(alpha) =', np.sum(np.abs(coef))
        print 'b =', b
        print 'E_in =', E_in

        fig = plt.figure()
        # plt.suptitle('%d vs rest' % (idx))
        plt.subplot(311)
        plt.title('Training data: green +, red -')
        plot_01(X_train, Y_train_i)
        plt.tick_params(axis='x', labelbottom='off')
        
        plt.subplot(312)
        plt.title('Prediction: green +, red -')
        plot_01(X_train, Y_predict_i)
        plt.tick_params(axis='x', labelbottom='off')

        plt.subplot(313)
        plt.title('Support vectors: blue')
        plt.plot(X_train[:, 0], X_train[:, 1], 'r.')
        plt.plot(X_train[support, 0], X_train[support, 1], 'b.')

    plt.show()
Beispiel #30
0
def hw1q19():
    print '----------------------------------------'
    print '         Homework 1 Question 19         '
    print '----------------------------------------'

    Y_train_0 = (Y_train == 0).astype(int)
    Y_test_0 = (Y_test == 0).astype(int)

    for gamma in (1, 10, 100, 1000, 10000):
        svm = sklearn.svm.SVC(C=0.1, kernel='rbf', gamma=gamma, tol=1e-7, shrinking=True, verbose=False)
        svm.fit(X_train, Y_train_0)
        print '----------------------------------------'
        print 'gamma =', gamma
        Y_predict_0 = svm.predict(X_test)
        print 'in the prediction:'
        print 'n(+) =', np.count_nonzero(Y_predict_0 == 1), 'n(-) =', np.count_nonzero(Y_predict_0 == 0)

        print 'E_out =', np.count_nonzero(Y_test_0 != Y_predict_0)
        print
def hw1q19():
    print "----------------------------------------"
    print "         Homework 1 Question 19         "
    print "----------------------------------------"

    Y_train_0 = (Y_train == 0).astype(int)
    Y_test_0 = (Y_test == 0).astype(int)

    for gamma in (1, 10, 100, 1000, 10000):
        svm = sklearn.svm.SVC(C=0.1, kernel="rbf", gamma=gamma, tol=1e-7, shrinking=True, verbose=False)
        svm.fit(X_train, Y_train_0)
        print "----------------------------------------"
        print "gamma =", gamma
        Y_predict_0 = svm.predict(X_test)
        print "in the prediction:"
        print "n(+) =", np.count_nonzero(Y_predict_0 == 1), "n(-) =", np.count_nonzero(Y_predict_0 == 0)

        print "E_out =", np.count_nonzero(Y_test_0 != Y_predict_0)
        print
Beispiel #32
0
def plot_decision_boundary(X, y, clf, test_ind = None, resolution = 0.02):
    '''
    x: 2D array, size [batch, features] , features = 2
    '''
    
    markers = ('s', 'x', 'v') # markers for plot
    colors = ('red', 'green', 'blue', 'gray')
    n_class = len(np.unique(y))
    cmap = ListedColormap(colors[:n_class])
    
    x1min, x1max = X[:, 0].min(), X[:, 0].max()
    x2min, x2max = X[:, 1].min(), X[:, 1].max()
    
    xx, yy = np.meshgrid(np.arange(x1min, x1max, resolution), np.arange(x2min, x2max, resolution))
    grid_point = np.c_[xx.ravel(), yy.ravel()] # [feature, sampples]

    z = svm.predict(grid_point).reshape(xx.shape)
    plt.contour(xx, yy, z, alpha = 0.4, cmap = cmap)
    plt.xlim(x1min, x1max)
    plt.ylim(x2min, x2max)
    
    # plot data points
    for idx, c1 in enumerate(np.unique(y)): # for class 1, 2, 3
        plt.scatter(
                x = X[y == c1, 0], # data points of each class separately
                y = X[y == c1, 1],
                c = cmap(idx), # use index of class to get from cmap
                alpha = 0.4,
                edgecolor = 'black',
                marker = markers[idx],
                )
    # highlight test samples
    if test_ind:
       plt.scatter(
                x = x_test[:, 0],
                y = x_test[:, 1],
                c = '',
                alpha = 1.0,  #透明度of markder
                marker = 'o',
                edgecolor = 'black',
                linewidths = 2,
                s = 55 # size of marker
                )
Beispiel #33
0
 def repare_one(svc, movie_id, user_id, rating):
     conn = pymysql.connect(host='localhost', port=3306, user='******', passwd='', db='small_rekomendacyjny')
     # cur = conn.cursor()
     # cur.execute("SELECT * FROM help WHERE checked is NULL AND correct = 0")
     svm = svc
     # for row in cur:
     # print(row)
     vector = [Helper.prepare_vector(mov_id=movie_id, us_id=user_id, rat=rating)]
     print(vector)
     repared = svm.predict(vector)
     print(repared)
     repared = float(repared[0])
     new_one = round(repared + rating) / 2.0
     new_curr = conn.cursor()
     print('stara: ' + str(rating) + ' poprawiona: ' + str(new_one))
     new_curr.execute(
         "UPDATE help SET rating=%s, checked = 1, correct = 2 WHERE user_id = %s AND movie_id = %s",
         [new_one, user_id, movie_id])
     conn.commit()
Beispiel #34
0
def trainTestSvm(data,labels,lblvec,B,L,T,split = 0.8,initializer = np.zeros,use_bias = True,kernel = None):
    
    #sectioning data into only two labels and shuffling them
    data1,data2 = getData(data,labels,lblvec)
    label1,label2 = np.ones([data1.shape[0],1])*1,np.ones([data2.shape[0],1])*-1
    data12 = np.concatenate((data1,data2))
    label12 = np.concatenate((label1,label2))
    perm = np.random.permutation(data12.shape[0])
    data12 = data12[perm]
    label12 = label12[perm]
    
    #split into training and testing datasets
    sp = int(split * data12.shape[0])
    train,trainlbl,test,testlbl = data12[:sp],label12[:sp],data12[sp:],label12[sp:]
    
    #create and train the svm on the training set
    svm = pegasos_solver()
    bias = initializer([1]) if use_bias else None
    svm.init(train,trainlbl,initializer([1,data12.shape[1]]),bias)
    
    #train and test the svm either with primal subgradient descent or mercer kernels 
    if kernel is None:
        errs = svm.train(B,L,T)
        tres = svm.predict(test)
    else:
        svm.kernelTrain(L,T,kernel)
        tres = svm.predictKernel(kernel,test)
        
    tp,fp,tn,fn = 0,0,0,0
    for i,j in zip(tres,testlbl):
        if i > 0:
            if j > 0:
                tp += 1
            else:
                fp += 1
        else:
            if j < 0:
                tn += 1
            else:
                fn += 1
          
    print("Accuracy",(tp + tn)/(tres.shape[0]),"TPR",tp/(tp + fn),           "FPR",fp/(tn + fp))
    return (tp + tn)/(tres.shape[0])
def svm_test():
    X_train = np.array([[0, 0], [1, 0], [0, 2], [-2, 0]])
    Y_train = np.array([1, 1, 0, 0])
    svm = sklearn.svm.SVC(C=100000, kernel="linear", shrinking=False, verbose=False)
    svm.fit(X_train, Y_train)
    Y_predict = svm.predict(X_train)
    print Y_predict
    b = svm.intercept_[0]
    print b

    plt.figure()
    plt.suptitle("svm test")
    plt.subplot(211)
    plot_01(X_train, Y_train)
    plt.subplot(212)
    plot_01(X_train, Y_predict)
    plt.plot(X_train[Y_predict == 0, 0], X_train[Y_predict == 0, 1], "ro")
    plt.plot(X_train[Y_predict == 1, 0], X_train[Y_predict == 1, 1], "go")
    plt.show()
Beispiel #36
0
    def predict(self, svm, test):
        """
        Subroutine to predict the value of a landmark encoding.
        NOTE: First the SVM needs to be trained


        Input
        -----
        svm: SVM object for detection.
        test: Landmark array for which the SVM will give a prediction, .

        Output
        ------
        prediction: The prediction of the SVM.

        TODO: This can be the same for both cv2 and sklearn
        """
        prediction = svm.predict(test)
        return prediction
def plot_decision_boundary(svm,X,border_size):
    """colors decision boundaries for two classes"""
    min_x0 = X[:,0].min()
    max_x0 = X[:,0].max()
    min_x1 = X[:,1].min()
    max_x1 = X[:,1].max()
    num_samples = 100
    x0 = np.linspace(min_x0,max_x0,num_samples)
    x1 = np.linspace(min_x1,max_x1,num_samples)
    xx0,xx1 = np.meshgrid(x0,x1)
    xx0_flat = xx0.ravel()
    xx1_flat = xx1.ravel()
    hypotheses = np.empty(xx0.shape).ravel()
    for i in xrange(len(xx0_flat)):
        x0 = xx0_flat[i]
        x1 = xx1_flat[i]
        hypotheses[i] = svm.predict([x0,x1])
    hypotheses = np.reshape(hypotheses, (num_samples,num_samples) )
    plt.contourf(xx0,xx1,hypotheses, cmap=plt.cm.Paired, alpha=0.8)
    plt.axis([min_x0 - border_size,max_x0 + border_size,min_x1 - border_size,max_x1 + border_size])
Beispiel #38
0
def LinearSVC(X_train, y_train, X_test, y_test):
    from sklearn import svm
    from sklearn.model_selection import cross_val_score

    clf = svm.LinearSVC(C=1.0,
                        class_weight=None,
                        dual=True,
                        fit_intercept=True,
                        intercept_scaling=1,
                        loss='squared_hinge',
                        max_iter=1000,
                        multi_class='ovr',
                        penalty='l2',
                        random_state=None,
                        tol=0.0001,
                        verbose=0)
    svm = clf.fit(X_train, y_train)

    y_pred = svm.predict(X_test)
    return y_pred, cross_val_score(svm, X_test, y_test).mean()
Beispiel #39
0
def results():
    if request.method == 'POST':
        result = request.form
        thetime = result['time']
        theday = result['weekday']
        theweather = result['weather']
        thenbhd = result['nbhd']

        processed = process(theday, thetime, theweather, thenbhd)

        #[[theday, thetime] + enc.transform([[theweather, thenbhd]]).toarray()[0].tolist()]

        weather_scale = {
            'Clear or Partly Cloudy': 0,
            'Fog/Smog/Smoke': 5.25,
            'Snowing': 4.79,
            'Raining': 1.2
        }

        svm_pred = svm.predict(processed)[0]
        kde_pred = kde.score_samples(processed)[0] + weather_scale[theweather]

        better_day = find_better_day(thetime, theday, theweather, thenbhd,
                                     svm_pred, kde_pred)
        better_hour = find_better_hour(thetime, theday, theweather, thenbhd,
                                       svm_pred, kde_pred)
        better_nbhd = find_better_nbhd(thetime, theday, theweather, thenbhd,
                                       svm_pred, kde_pred)

        # plt.scatter(x=range(10), y=[num**2 for num in range(10)])
        # plt.savefig('/home/amaurer/Documents/Insight/flask_app/static/out.png')
        # plt.close()
        return render_template("results.html",
                               result=result,
                               svm_pred=svm_pred,
                               kde_pred=kde_pred,
                               processed=processed,
                               better_day=better_day,
                               better_hour=better_hour,
                               better_nbhd=better_nbhd)
    return render_template('results.html')
Beispiel #40
0
    def scores_ovo_student(self, X):
        '''
        Compute class scores for OVO.

        Arguments:
            X: Features to predict.

        Returns:
            scores: a numpy ndarray with scores.
        '''
        pred = []
        for x in X:
            scores = np.zeros(len(self.labels))
            for (l1, l2), svm in self.binary_svm.items():
                p = svm.predict([x])
                if p:
                    scores[l1] += 1
                else:
                    scores[l2] += 1
            pred.append(scores)
        return np.array(pred)
def predict(feature):
    manmade_test = os.getcwd() + "/Images/manmade_test/"
    natural_test = os.getcwd() + "/Images/natural_test/"
    index, responses = calc_features_and_labels(manmade_test, natural_test,
                                                feature)

    results_knn = []
    results_svm = []
    for feat in index:
        results_knn.append(knn.predict([feat])[0])
        results_svm.append(svm.predict([feat])[0])

    target_names = ['manmade', 'natural']
    print 'KNN Classifier'
    print classification_report(responses,
                                results_knn,
                                target_names=target_names)
    print 'SVM Classifier'
    print classification_report(responses,
                                results_svm,
                                target_names=target_names)
Beispiel #42
0
def main(args):
    raw = pandas.read_csv(args.raw_data_file)
    features = extract_features.generate_features(raw).values
    features_labels = features[:, 0]
    feature_data = features[:, 1:]

    data = mkf.load_files(args.feature_dir)
    target = data[:, 0]
    fvs = data[:, 1:]
    svm = mkf.SVM(500)
    forest = RandomForestClassifier(max_depth=3)

    svm.fit(fvs, target)
    forest.fit(fvs, target)

    svm_res = svm.predict(feature_data)
    forest_res = forest.predict(feature_data)
    print("Accuracy of svm: {}".format(accuracy_score(svm_res,
                                                      features_labels)))
    print("Accuracy of random forest: {}".format(
        accuracy_score(forest_res, features_labels)))
class predict:
    input_file = pd.read_csv('sample30DataAllInt.csv')
    dataframe = pd.DataFrame(input_file)
    dataframe['diff'] = dataframe['endtime'] - dataframe['starttime']
    dataframe = dataframe.drop('station', axis=1)
    dataframe = dataframe.drop('starttime', axis=1)
    dataframe = dataframe.drop('endtime', axis=1)
    dataframe = dataframe.drop('Status', axis=1)
    dataframe = dataframe.drop('channel', axis=1)
    print(dataframe)
    y = input_file['Status']
    X_train, X_test, y_train, y_test = train_test_split(dataframe,
                                                        y,
                                                        random_state=1)
    svm = svm.SVC()
    svm = svm.fit(X_train, y_train)
    y_predict = svm.predict(X_test)
    joblib.dump(svm, 'svm.pkl')
    print(y_predict)
    s = [[460, 34567, 876545678765, 60]]
    accuracy_score(y_test, y_predict)
def hw1q20():
    print "----------------------------------------"
    print "         Homework 1 Question 20         "
    print "----------------------------------------"

    Y_train_0 = (Y_train == 0).astype(int)

    C = 0.1
    m = len(Y_train_0)
    gammas = [1, 10, 100, 1000, 10000]
    counts = [0] * len(gammas)

    for nrun in range(10):
        print "run", nrun

        # generate a random order of m indices
        arr = np.arange(m)
        np.random.shuffle(arr)

        # pick 1000 for cross validation
        X_curval_0 = X_train[arr[:1000]]
        Y_curval_0 = Y_train_0[arr[:1000]]
        X_curtrain_0 = X_train[arr[1000:]]
        Y_curtrain_0 = Y_train_0[arr[1000:]]

        E_vals = [0.0] * len(gammas)
        for i in range(len(gammas)):
            gamma = gammas[i]

            svm = sklearn.svm.SVC(C=C, kernel="rbf", gamma=gamma, tol=1e-3, shrinking=True, verbose=False)
            svm.fit(X_curtrain_0, Y_curtrain_0)
            Y_curpredict_0 = svm.predict(X_curval_0)
            E_val = np.count_nonzero(Y_curval_0 != Y_curpredict_0)

            E_vals[i] = E_val

        counts[np.argmin(E_vals)] += 1

    for i in range(len(gammas)):
        print "gamma", gammas[i], "got picked", counts[i], "times"
Beispiel #45
0
def hw1q20():
    print '----------------------------------------'
    print '         Homework 1 Question 20         '
    print '----------------------------------------'

    Y_train_0 = (Y_train == 0).astype(int)

    C = 0.1
    m = len(Y_train_0)
    gammas = [1, 10, 100, 1000, 10000]
    counts = [0] * len(gammas)

    for nrun in range(10):
        print 'run', nrun

        # generate a random order of m indices
        arr = np.arange(m)
        np.random.shuffle(arr)

        # pick 1000 for cross validation
        X_curval_0 = X_train[arr[:1000]]
        Y_curval_0 = Y_train_0[arr[:1000]]
        X_curtrain_0 = X_train[arr[1000:]]
        Y_curtrain_0 = Y_train_0[arr[1000:]]

        E_vals = [0.0] * len(gammas)
        for i in range(len(gammas)):
            gamma = gammas[i]

            svm = sklearn.svm.SVC(C=C, kernel='rbf', gamma=gamma, tol=1e-3, shrinking=True, verbose=False)
            svm.fit(X_curtrain_0, Y_curtrain_0)
            Y_curpredict_0 = svm.predict(X_curval_0)
            E_val = np.count_nonzero(Y_curval_0 != Y_curpredict_0)

            E_vals[i] = E_val

        counts[np.argmin(E_vals)] += 1

    for i in range(len(gammas)):
        print 'gamma', gammas[i], 'got picked', counts[i], 'times'
def test(path):
    cap = cv2.VideoCapture(path_video)
    testing = []
    while (True):
        ret, frame = cap.read()
        res = cv2.resize(frame, (250, 250))

        gray_image = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
        xarr = np.squeeze(np.array(gray_image).astype(np.float32))
        m, v = cv2.PCACompute(xarr)
        arr = np.array(v)
        flat_arr = arr.ravel()
        testing.append(flat_arr)
        #cv2.imshow('frame', frame)
        #if cv2.waitKey(1) & 0xFF == ord("q"):
        #   break
    #cap.release()
    #cv2.destroyAllWindows()
    logos = svm.predict(testing)
    uniqlogos = list(set(logos))
    for i in uniqlogos:
        print(i)
def test(path):
	cap = cv2.VideoCapture(path_video)
	testing=[]
	while(True):
		ret, frame = cap.read()
		res=cv2.resize(frame,(250,250))
		
		gray_image = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
		xarr=np.squeeze(np.array(gray_image).astype(np.float32))
		m,v=cv2.PCACompute(xarr)
		arr= np.array(v)
		flat_arr= arr.ravel()
		testing.append(flat_arr)
		#cv2.imshow('frame', frame)
		#if cv2.waitKey(1) & 0xFF == ord("q"):
         #   break
	#cap.release()
    #cv2.destroyAllWindows()
	logos=svm.predict(testing)
	uniqlogos=list(set(logos))
	for i in uniqlogos:
		print(i)
Beispiel #48
0
def rbfSVM():

    # generate fake data

    np.random.seed(0)
    X_xor = np.random.randn(200, 2)
    y_xor = np.logical_xor(X_xor[:, 0] > 0,
                        X_xor[:, 1] > 0)
    y_xor = np.where(y_xor, 1, -1)

    # split the data into training and testing data
    # 70% training and 30% test
    X_train, X_test, y_train, y_test = \
        train_test_split(X_xor, y_xor, test_size=0.3,random_state=109) 

    # find best C and gamma parms

    C_range = np.logspace(-2, 10, 13)
    gamma_range = np.logspace(-9, 3, 13)
    param_grid = dict(gamma=gamma_range, C=C_range)
    cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
    grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
    grid.fit(X_train, y_train)

    print("The best parameters are %s with a score of %0.2f"
        % (grid.best_params_, grid.best_score_))

    # create classifier

    C = grid.best_params_['C']
    gamma = grid.best_params_['gamma']
    svm = SVC(kernel='rbf', random_state=0, gamma=gamma, C=C)
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    plot_decision_regions(X_train, X_test, y_train, y_pred, classifier=svm)

    # evaluate accuracy
    evaluateSVM(y_test, y_pred)
Beispiel #49
0
def predict(df, hate=10, threshold=0.6):
    '''This function takes a dataset with comments labels as tweet and predicts the hatefulness.
    Optional parameters are hate and threshold.
    It returns 5 elements in this order [first_hate, count_hate, count_comments, hate_ratio]
    - first_hate: First hate comments as dict
    - count_hate: Total amount of hate comments= sum(y_pred_svm)
    - count_comments: Total amount of comments and subcomments
    - hate_ratio = count_hate/count_comments '''
    test = pd.DataFrame(df)
    test_tweet = clean_tweets(test["tweet"])

    test["clean_tweet"] = test_tweet

    x_test_vec = vectorizer.transform(test_tweet)

    y_pred_svm = svm.predict(x_test_vec)
    test["prediction"] = y_pred_svm

    y_pred_proba = svm.predict_proba(x_test_vec)
    proba = []
    for i in y_pred_proba:
        a = i[0]
        proba.append(a)
    test["proba"] = proba
    test_sort = test.sort_values(by=["proba"], ascending=False)

    hateful_comments = test_sort[(test_sort["prediction"] == 1)
                                 & (test_sort["proba"] >= threshold)]

    count_hate = len(hateful_comments)
    count_comments = len(y_pred_svm)
    hate_ratio = count_hate / count_comments
    percentage = round(hate_ratio * 100, 2)
    first_hate = hateful_comments[["tweet", "proba"]][:hate]

    return [
        first_hate.to_dict('records'), count_hate, count_comments, percentage
    ]
Beispiel #50
0
def main(argv):
    reader.read_labels()
    reader.read_data_files()
    reader.read_other_data_file()

    convo_ids = reader.get_all_convos()
    harrass_convos = reader.get_harrassment_convos()

    words = reader.read_bad_words()
    labels, tokens, true_class = text_mining.tokenize_words(
        convo_ids, reader.conversation_text, reader.conversation_labels,
        set(words))
    train_labels, train_tokens, test_labels, test_tokens, test_true_class, train_true_labels = get_splits(
        convo_ids, harrass_convos, labels, tokens, true_class)
    for idx in range(10):
        svm = get_svm(train_tokens[idx], train_true_labels[idx])
        c_tokens = test_tokens[idx]
        tfidf_test = tfidf_vect.transform(c_tokens)

        predicted = svm.predict(tfidf_test)
        true_classes = test_true_class[idx]
        evaluator.evalute(true_classes, predicted)
    evaluator.average()
def test_and_print_svm_regression(test_x, test_t, svm):

	test_size = len(test_t) 

	predictions = svm.predict(test_x)

	rounded_predictions = [round(pred, 0) for pred in predictions]

	num_correct = 0
	value = 0
	for i in range(test_size):
		temp = rounded_predictions[i]
		if temp > 5.0:
			temp = 5.0
		if temp < 1.0:
			temp = 1.0
		value += abs(temp - test_t[i])
		if rounded_predictions[i] == test_t[i]:
			num_correct += 1

	# print value/test_size #avg error
	print "Average  Error: " + str(value/test_size) #avg error
	print "Accuracy: " + str(float(num_correct)/test_size) #accuracy
def score(x, y, svm, interval):
    guesses = []
    for i in range(interval, len(x)):
        guesses.append(int(svm.predict([x[i]])))
    tp = 0.0
    tn = 0.0
    totalp = 0.0
    totaln = 0.0
    for i in range(dataSplit, len(resultY)):
        guessesIndex = i - dataSplit
        if (resultY[i] == 1):
            totalp += 1
            if (guesses[guessesIndex] == 1):
                tp += 1
        else:
            totaln += 1
            if (resultY[i] == guesses[guessesIndex]):
                tn += 1

    if ((tn == 0.0) and (tp == totalp)):
        return 0.0
    else:
        return (float((tp + tn) / (totaln + totalp)))
def predict(filename):
    with open(filename, "rt") as data_file:
        reader = csv.reader(data_file)
        iterator = iter(reader)
        next(iterator)
        results = list(iterator)
        for result in results:
            teamA = result[0]
            teamB = result[1]
            date = result[2]
            teamA_data = result
            for row2 in results:
                if row2[0] == teamB and row2[2] == date:
                    teamB_data = row2
            data = teamA_data[4:] + teamB_data[4:]
            try:
                print("bayes: " +
                      bayes.predict({'attributes': dict(enumerate(data))}))
                print("svm: " +
                      svm.predict({'attributes': dict(enumerate(data))}))
                print("actual: " + result[3])
            except:
                pass
Beispiel #54
0
def testSVM(title, svm, zero, one, two):
    numcorrect = 0
    numwrong = 0
    for correct, testing in ((0, zero),(1, one), (2, two)):
        for d in testing:
            r = svm.predict(d)[0]
            if(r == correct):
                numcorrect += 1
            else:
                numwrong += 1
    
    print title
    print "Correct", numcorrect
    print "Wrong", numwrong
    print numcorrect * 100 / (numcorrect + numwrong), '%', "\n"

    if write_results:
        f = open('results.txt', 'a')
        f.write(title + "\n")
        f.write("Correct: " + str(numcorrect) + "\n")
        f.write("Wrong: " + str(numwrong) + "\n")
        f.write(str(numcorrect * 100 / (numcorrect + numwrong)) + '%' + "\n\n")
        f.close()
Beispiel #55
0
def get_hard_negatives(svm, negative_set):
    hard_negatives = []
    descriptors = []
    print("Getting hard features")
    i = 0
    for image in negative_set:
        print(i, image.shape)
        i += 1
        subWindowsIndexes = getSubWindows(image)
        percentIndexes = 0.1
        subWindowsIndexes = random.sample(
            subWindowsIndexes, int(len(subWindowsIndexes) * percentIndexes))
        gradientImage = computeCenteredGradient(
            image, (image.shape[0], image.shape[1]))
        integralHistogram = getIntegralHistogram(gradientImage)
        # Create sub image
        print(len(subWindowsIndexes))
        for index in subWindowsIndexes:
            top_left = index[0]
            bottom_right = index[1]
            sub_image = np.array(image[top_left[0]:bottom_right[0],
                                       top_left[1]:bottom_right[1]])
            # Save the descriptor of the sub image.

            #cells_matrix = getOrientationBinMatrix(gradientImage, integralHistogram, top_left, bottom_right)
            #descriptor = getHogDescriptor(cells_matrix)
            descriptor = train_image(sub_image)

            descriptors.append(descriptor)

    print("Testing on ", len(descriptors), "negative inputs")
    svm_result = svm.predict(descriptors)
    print("Result size:", len(svm_result))
    for i in range(len(svm_result)):
        if svm_result[i] == "pos":
            hard_negatives.append(descriptors[i])
    return hard_negatives
    def runEnsemble(self):
        """
        Predicts the target label for a feature vector by combining and 
        weighting the predictions of the individual classifiers
        """
        for train, test in self.kf:
            # Extract models
            knn = self.models["KNN"]
            kmeans = self.models["KMEANS"]
            svm = self.models["SVM"]
            gmm = self.models["GMM"]
            
            # Set up training and test data
            train_set, train_labels = self.getCurrFoldTrainData(train)
            test_set, test_labels = self.getCurrFoldTestData(test)
            
            if increase:
                train_set, train_labels=self.subsetData(train_set, train_labels)
            
            # Fit the models
            knn.fit(train_set, train_labels)
            kmeans.fit(train_set, train_labels)
            svm.fit(train_set, train_labels)
            gmm.fit(train_set, train_labels)

            # Generate predictions by weighting each model using accuracies 
            # created from earlier runs
            knn_pred = knn.predict(test_set)
            kmeans_pred = kmeans.predict(test_set)
            svm_pred = svm.predict(test_set)
            gmm_pred = gmm.predict(test_set)
            
            preds = self.weightPredictions(knn_pred, kmeans_pred, \
                                           svm_pred, gmm_pred)
            acc = self.getAccuracy(test_labels, preds)
            print "(ENSEMBLE) Percent correct is", acc
def run(attrib_idx):
    results = np.load("principal_directions/wspace_att_%d.npy" %
                      attrib_idx).item()

    pruned_indices = list(range(results['latents'].shape[0]))
    # pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i]))
    # keep = int(results['latents'].shape[0] * 0.95)
    # print('Keeping: %d' % keep)
    # pruned_indices = pruned_indices[:keep]

    # Fit SVM to the remaining samples.
    svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1)
    space = 'dlatents'

    svm_inputs = results[space][pruned_indices]

    svm = sklearn.svm.LinearSVC(C=1.0, dual=False, max_iter=10000)
    svm.fit(svm_inputs, svm_targets)
    svm.score(svm_inputs, svm_targets)
    svm_outputs = svm.predict(svm_inputs)

    w = svm.coef_[0]

    np.save("principal_directions/direction_%d" % attrib_idx, w)
Beispiel #58
0
#Split the date and labels
X_train, X_test, y_train, y_test = train_test_split(df_data,
                                                    df_labels,
                                                    test_size=0.2,
                                                    random_state=1377)
print('ya')

#Selecting paramaters
#param = {'criterion':['gini','entropy'], 'splitter':['best','random'], 'min_samples_split':[2,5,10,15], 'min_weight_fraction_leaf':[0,.5], 'min_impurity_decrease':[0,2,3,5]}

#Make the models
#clf = GridSearchCV(tree.DecisionTreeClassifier(random_state=420), param)
clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=420)

#Train the model
start = time()
clf.fit(X_train, y_train)
print('yeet with a time of:', time() - start, 'seconds')

#Esting the model
y_pred = clf.predict(X_test)

#Evaluation
print("Accuracy Score:", accuracy_score(y_test, y_pred))

#Saveing the model
#pkl_filename = "DecisionTree.pkl"
#with open(pkl_filename, 'wb') as file:
#    pickle.dump(clf, file)
#print('Model saved')
# see http://cs.nyu.edu/~rostami/presentations/L1_vs_L2.pdf for some more info.
"""
Note : "Choosing between scaling and standardizing is a confusing choice, 
you have to dive deeper in your data and learner that you are going to use to 
reach the decision. For starters, you can try both the methods and check cross 
validation score for making a choice." 
"""

#==============================================================================
# Support Vector Machine
#==============================================================================
svm = svm.SVC(
    C=1.0,  # let's look at all the param's I should be thinking about.
    cache_size=10,
    class_weight=None,
    coef0=0.0,
    decision_function_shape=None,
    degree=3,
    gamma=100,
    kernel='rbf',
    max_iter=-1,
    probability=True,
    random_state=None,
    shrinking=True,
    tol=0.001,
    verbose=1)

svm.fit(X_train_scale, Y_train.values.ravel())
print "Accuracy_score of SVM with standardized data = ", \
accuracy_score(Y_test,svm.predict(X_test_scale)), '-'*60
Beispiel #60
0
plt.show()
#%%
svm = sklearn.svm.SVC(C = 1.0, gamma = numpy.power(10.0, -4.0))#C = numpy.power(10.0, 1.0)

print numpy.mean(sklearn.cross_validation.cross_val_score(svm, Xs4[:10000], labels4[:10000]))
svm.fit(Xs4[:10000], labels4[:10000])
#%%
import sklearn.linear_model

sgdsvm = sklearn.linear_model.SGDClassifier()

#print numpy.mean(sklearn.cross_validation.cross_val_score(svm, Xs3[idxs], labels3[idxs]))
idxs = range(numpy.random.randint(0, len(Xs3), 10000)
sgdsvm.fit(Xs3[idxs], labels3[idxs])
#%%
out = svm.predict(Xs3[0:10000]) - labels3[0:10000]
print numpy.count_nonzero(out)
#%%

for i, (index, row) in enumerate(df.iterrows()):#df.loc[87:88]
    im = skimage.io.imread(row['f'], as_grey = True)

    im = im[:(im.shape[0] / 8) * 8, :(im.shape[1] / 8) * 8]
    hogs = hog.run(im)

    hogs -= mean
    hogs /= std

    labels = gmm.predict(hogs.reshape((-1, hogs.shape[-1]))).reshape((hogs.shape[0], hogs.shape[1]))

    boxes = labels2boxes(labels, 20, b = 15, padding_mode = 'reflect')