ys = np.array([-1] * (2 * N)) for i in range(supevised_data_points / 2): ys[np.random.randint(0, N)] = 0 for i in range(supevised_data_points / 2): ys[np.random.randint(N, 2 * N)] = 1 Xsupervised = Xs[ys != -1, :] ysupervised = ys[ys != -1] # compare models lbl = "Purely supervised QDA:" print lbl model = WQDA() model.fit(Xsupervised, ysupervised) evaluate_and_plot(model, Xs, ys, ytrue, lbl, 1) lbl = "SelfLearning QDA:" print lbl model = SelfLearningModel(WQDA()) model.fit(Xs, ys) evaluate_and_plot(model, Xs, ys, ytrue, lbl, 2) lbl = "CPLE(pessimistic) QDA:" print lbl model = CPLELearningModel(WQDA(), predict_from_probabilities=True) model.fit(Xs, ys) evaluate_and_plot(model, Xs, ys, ytrue, lbl, 3) lbl = "CPLE(optimistic) QDA:"
Xsupervised = Xs[ys!=-1, :] ysupervised = ys[ys!=-1] plt.figure() cols = [np.array([1,0,0]),np.array([0,1,0])] # colors # loop through and compare methods for i in range(4): plt.subplot(2,2,i+1) t1=time.time() # train model if i == 0: lbl= "Purely supervised QDA:" model = WQDA() model.fit(Xsupervised, ysupervised) else: if i == 1: lbl= "SelfLearning QDA:" model = SelfLearningModel(WQDA()) if i == 2: lbl= "CPLE(pessimistic) QDA:" model = CPLELearningModel(WQDA(), predict_from_probabilities=True) elif i == 3: lbl= "CPLE(optimistic) QDA:" CPLELearningModel.pessimistic = False model = CPLELearningModel(WQDA(), predict_from_probabilities=True) model.fit(Xs, ys) print "" print lbl print "Model training time: ", round(time.time()-t1, 3)
# Split the data into test and train data test_pcnt = 0.15 X_train = X[:int(len(X) * (1 - test_pcnt)), :] X_test = X[int(len(X) * (1 - test_pcnt)):, :] y_train = y[:int(len(X) * (1 - test_pcnt))] y_test = y[int(len(X) * (1 - test_pcnt)):] ytrue = y_train print X_train.shape, y_train.shape # Just supervised score basemodel = WQDA() # weighted Quadratic Discriminant Analysis #basemodel = SGDClassifier(loss='log', penalty='l1') # scikit logistic regression basemodel.fit(X_train, ytrue) print "full labeled wqda score", basemodel.score(X_test, y_test) print "standard error of wqda", 1.96 * np.sqrt( basemodel.score(X_test, y_test) * (1 - basemodel.score(X_test, y_test)) / X_test.shape[0]) # Just supervised score #basemodel = WQDA() # weighted Quadratic Discriminant Analysis basemodel = SGDClassifier(loss='log', penalty='l1') # scikit logistic regression basemodel.fit(X_train, ytrue) print "full labeled log.reg. score", basemodel.score(X_test, y_test) print "standard error of log reg", 1.96 * np.sqrt( basemodel.score(X_test, y_test) * (1 - basemodel.score(X_test, y_test)) / X_test.shape[0])
heart = fetch_mldata("heart") X = heart.data ytrue = np.copy(heart.target) ytrue[ytrue==-1]=0 # label a few points labeled_N = 30 ys = np.array([-1]*len(ytrue)) # -1 denotes unlabeled point random_labeled_points = random.sample(np.where(ytrue == 0)[0], labeled_N/2)+\ random.sample(np.where(ytrue == 1)[0], labeled_N/2) ys[random_labeled_points] = ytrue[random_labeled_points] # supervised score basemodel = WQDA() # weighted Quadratic Discriminant Analysis #basemodel = SGDClassifier(loss='log', penalty='l1') # scikit logistic regression basemodel.fit(X[random_labeled_points, :], ys[random_labeled_points]) #print "supervised log.reg. score", basemodel.score(X, ytrue) # fast (but naive, unsafe) self learning framework ssmodel = SelfLearningModel(basemodel) ssmodel.fit(X, ys) print("this is the fitted thing", ssmodel.fit(X,ys)) y_score = ssmodel.predict(heart.data) #print "heart.target", heart.target #print "this is the prediction", y_score print("self-learning log.reg. score", ssmodel.score(X, ytrue)) fpr = dict() tpr = dict() roc_auc = dict() for i in range(2):