def CPLELearningWrapper(X_train, y_train, X_test): from frameworks.CPLELearning import CPLELearningModel #clf = RandomForestClassifier() from sklearn.linear_model.stochastic_gradient import SGDClassifier clf = SGDClassifier(loss='log', penalty='l1') ssmodel = CPLELearningModel(clf) newlabels = np.concatenate((np.array(y_train), -np.ones(len(X_test)))) ssmodel.fit(np.concatenate((X_train, X_test)), newlabels) return ssmodel.predict(X_test)
lbl = "S3VM (Gieseke et al. 2012):" model = scikitTSVM.SKTSVM(kernel=kernel) elif i == 2: lbl = "CPLE(pessimistic) SVM:" model = CPLELearningModel(sklearn.svm.SVC(kernel=kernel, probability=True)) elif i == 3: lbl = "CPLE(optimistic) SVM:" CPLELearningModel.pessimistic = False model = CPLELearningModel(sklearn.svm.SVC(kernel=kernel, probability=True)) model.fit(Xs, ys.astype(int)) print "" print lbl print "Model training time: ", round(time.time()-t1, 3) # predict, and evaluate pred = model.predict(Xs) acc = np.mean(pred==ytrue) print "accuracy:", round(acc, 3) # plot probabilities [minx, maxx] = [np.min(Xs[:, 0]), np.max(Xs[:, 0])] [miny, maxy] = [np.min(Xs[:, 1]), np.max(Xs[:, 1])] gridsize = 100 xx = np.linspace(minx, maxx, gridsize) yy = np.linspace(miny, maxy, gridsize).T xx, yy = np.meshgrid(xx, yy) Xfull = np.c_[xx.ravel(), yy.ravel()] probas = model.predict_proba(Xfull) plt.imshow(probas[:, 1].reshape((gridsize, gridsize)), extent=(minx, maxx, miny, maxy), origin='lower')
print 'No. of training data:', len_train #Final Traning labels train_labels = np.concatenate((y_labelled, y_minusone), axis = 0) len_labels = len(train_labels) print 'No. of training labels:', len_labels ##Print the number of test data print 'No. of test data:', len(y_extra) ################################################################################ ################################################################################ lbl = "CPLE(pessimistic) SVM:" print lbl model = CPLELearningModel(svm.SVC(kernel="rbf", probability=True), predict_from_probabilities=True, max_iter = 5000 ) model.fit(train_data, train_labels) y_predict = model.predict(X_extra) accuracy = accuracy_score(y_extra, y_predict) print accuracy error_rate_svm[i] = 1 - accuracy logLik_svm[i] = -np.sum( stats.norm.logpdf(y_extra, loc=y_predict, scale=sd) ) print 'CPLE Error Rate:', error_rate_svm[i], logLik_svm[i] ############################################################################### ################################################################################ #Create the semi supervised KNN classifier lbl = "Label Propagation(KNN):" print lbl knn_model = label_propagation.LabelSpreading(kernel='knn', alpha=0.0001, max_iter=3000) knn_model.fit(train_data, train_labels) y_predict = knn_model.predict(X_extra)