ys = np.array([-1] * len(ytrue)) # -1 denotes unlabeled point random_labeled_points = random.sample(np.where(ytrue == 0)[0], labeled_N/2)+\ random.sample(np.where(ytrue == 1)[0], labeled_N/2) ys[random_labeled_points] = ytrue[random_labeled_points] # supervised score #basemodel = WQDA() # weighted Quadratic Discriminant Analysis basemodel = SGDClassifier(loss='log', penalty='l1') # scikit logistic regression basemodel.fit(X[random_labeled_points, :], ys[random_labeled_points]) print("supervised log.reg. score", basemodel.score(X, ytrue)) # fast (but naive, unsafe) self learning framework ssmodel = SelfLearningModel(basemodel) ssmodel.fit(X, ys) print("self-learning log.reg. score", ssmodel.score(X, ytrue)) # semi-supervised score (base model has to be able to take weighted samples) ssmodel = CPLELearningModel(basemodel) ssmodel.fit(X, ys) print("CPLE semi-supervised log.reg. score", ssmodel.score(X, ytrue)) # semi-supervised score, WQDA model ssmodel = CPLELearningModel(WQDA(), predict_from_probabilities=True ) # weighted Quadratic Discriminant Analysis ssmodel.fit(X, ys) print("CPLE semi-supervised WQDA score", ssmodel.score(X, ytrue)) # semi-supervised score, RBF SVM model ssmodel = CPLELearningModel(sklearn.svm.SVC(kernel="rbf", probability=True), predict_from_probabilities=True) # RBF SVM
ys = np.array([-1] * len(ytrue)) # -1 denotes unlabeled point random_labeled_points = random.sample(np.where(ytrue == 0)[0], labeled_N / 2) + random.sample( np.where(ytrue == 1)[0], labeled_N / 2 ) ys[random_labeled_points] = ytrue[random_labeled_points] # supervised score # basemodel = WQDA() # weighted Quadratic Discriminant Analysis basemodel = SGDClassifier(loss="log", penalty="l1") # scikit logistic regression basemodel.fit(X[random_labeled_points, :], ys[random_labeled_points]) print "supervised log.reg. score", basemodel.score(X, ytrue) # fast (but naive, unsafe) self learning framework ssmodel = SelfLearningModel(basemodel) ssmodel.fit(X, ys) print "self-learning log.reg. score", ssmodel.score(X, ytrue) # semi-supervised score (base model has to be able to take weighted samples) ssmodel = CPLELearningModel(basemodel) ssmodel.fit(X, ys) print "CPLE semi-supervised log.reg. score", ssmodel.score(X, ytrue) # semi-supervised score, WQDA model ssmodel = CPLELearningModel(WQDA(), predict_from_probabilities=True) # weighted Quadratic Discriminant Analysis ssmodel.fit(X, ys) print "CPLE semi-supervised WQDA score", ssmodel.score(X, ytrue) # semi-supervised score, RBF SVM model ssmodel = CPLELearningModel(sklearn.svm.SVC(kernel="rbf", probability=True), predict_from_probabilities=True) # RBF SVM ssmodel.fit(X, ys) print "CPLE semi-supervised RBF SVM score", ssmodel.score(X, ytrue)
ysupervised = ys[ys!=-1] lbl = "Base model SVM(kernel=rbf):" print (lbl) basemodel = sklearn.svm.SVC(kernel=kernel, probability=True) basemodel.fit(Xsupervised, ysupervised) evaluate(basemodel, X, ys, ytrue, lbl) # basemodel = SGDClassifier(loss='hinge', penalty='l1', tol=1e-3, max_iter=1000) # scikit logistic regression # basemodel.fit(X[random_labeled_points, :], ys[random_labeled_points]) # print ("supervised log.reg. score", basemodel.score(X, ytrue)) # # fast (but naive, unsafe) self learning framework ssmodel = SelfLearningModel(basemodel) ssmodel.fit(X, ys) print ("self-learning log.reg. score", ssmodel.score(X, ytrue)) kernel = "rbf" Xsupervised = X[ys!=-1, :] ysupervised = ys[ys!=-1] lbl = "Purely supervised SVM:" print (lbl) model = sklearn.svm.SVC(kernel=kernel, probability=True) model.fit(Xsupervised, ysupervised) evaluate(model, X, ys, ytrue, lbl) lbl = "S3VM (Gieseke et al. 2012):" print (lbl) model = scikitTSVM.SKTSVM(kernel=kernel)
# supervised score basemodel = SGDClassifier(loss='hinge', penalty='l1', tol=1e-3, max_iter=1000) # scikit logistic regression basemodel.fit(X[random_labeled_points, :], ys[random_labeled_points]) acc = basemodel.score(X, ytrue) if acc: sgd_active.append(acc) kernel = "rbf" svm_model = sklearn.svm.SVC(kernel=kernel, probability=True) ssmodel = SelfLearningModel(svm_model) ssmodel.fit(X, ys) acc = ssmodel.score(X, ytrue) if acc: self_learning_active.append(acc) Xsupervised = X[ys != -1, :] ysupervised = ys[ys != -1] lbl = "Purely supervised SVM:" model = sklearn.svm.SVC(kernel=kernel, probability=True) model.fit(Xsupervised, ysupervised) acc = evaluate(model, X, ys, ytrue, lbl) print("SVM Accuracy:{}".format(acc)) if acc: svm_active.append(acc) lbl = "S3VM (Gieseke et al. 2012):"
sum_super += basemodel.score(X_test, y_test) super_acc[i] = basemodel.score(X_test, y_test) sum_super_err += 1.96 * np.sqrt(super_acc[i] * (1 - super_acc[i]) / X_test.shape[0]) # fast (but naive, unsafe) self learning framework ssmodel = SelfLearningModel(basemodel) ssmodel.fit(X_model, ys) #print "self-learning log.reg. score", ssmodel.score(X_test, y_test) #if j == 2: #Plot the ssmodel # evaluate_and_plot(ssmodel, X_model, ys, ytrue, "Self-Learning", subplot = 2, block=True) #Calculate accuracy sum_semi += ssmodel.score(X_test, y_test) semi_acc[i] = ssmodel.score(X_test, y_test) sum_semi_err += 1.96 * np.sqrt(semi_acc[i] * (1 - semi_acc[i]) / X_test.shape[0]) #if j==2: #Save the figure # plt.savefig(('comparisons_' + str(j) + '_' + str(i) + '.png')) print "average supervised accuracy: ", sum_super / num_trials sup_accs[cnt] = sum_super / num_trials #print "standard deviation of supervised accuracy: ", np.std(super_acc, ddof=1) print "standard error of supervised: ", sum_super_err / num_trials print "average semi-supervised accuracy: ", sum_semi / num_trials semi_accs[cnt] = sum_semi / num_trials
# supervised score basemodel = SGDClassifier(loss='hinge', penalty='l1', tol=1e-3, max_iter=1000) # scikit logistic regression basemodel.fit(X[random_labeled_points, :], ys[random_labeled_points]) sgd_active.append(basemodel.score(X, ytrue)) kernel = "rbf" svm_model = sklearn.svm.SVC(kernel=kernel, probability=True) ssmodel = SelfLearningModel(svm_model) ssmodel.fit(X, ys) self_learning_active.append(ssmodel.score(X, ytrue)) Xsupervised = X[ys != -1, :] ysupervised = ys[ys != -1] lbl = "Purely supervised SVM:" model = sklearn.svm.SVC(kernel=kernel, probability=True) model.fit(Xsupervised, ysupervised) acc = evaluate(model, X, ys, ytrue, lbl) svm_active.append(acc) lbl = "S3VM (Gieseke et al. 2012):" model = scikitTSVM.SKTSVM(kernel=kernel) model.fit(X, ys) acc = evaluate(model, X, ys, ytrue, lbl) s3vm_active.append(acc)