ys = np.array([-1] * (2 * N))
for i in range(supevised_data_points / 2):
    ys[np.random.randint(0, N)] = 0
for i in range(supevised_data_points / 2):
    ys[np.random.randint(N, 2 * N)] = 1

Xsupervised = Xs[ys != -1, :]
ysupervised = ys[ys != -1]

# compare models

lbl = "Purely supervised QDA:"
print lbl
model = WQDA()
model.fit(Xsupervised, ysupervised)
evaluate_and_plot(model, Xs, ys, ytrue, lbl, 1)

lbl = "SelfLearning QDA:"
print lbl
model = SelfLearningModel(WQDA())
model.fit(Xs, ys)
evaluate_and_plot(model, Xs, ys, ytrue, lbl, 2)

lbl = "CPLE(pessimistic) QDA:"
print lbl
model = CPLELearningModel(WQDA(), predict_from_probabilities=True)
model.fit(Xs, ys)
evaluate_and_plot(model, Xs, ys, ytrue, lbl, 3)

lbl = "CPLE(optimistic) QDA:"
Xsupervised = Xs[ys!=-1, :]
ysupervised = ys[ys!=-1]

plt.figure()
cols = [np.array([1,0,0]),np.array([0,1,0])] # colors

# loop through and compare methods     
for i in range(4):
    plt.subplot(2,2,i+1)
    
    t1=time.time()
    # train model
    if i == 0:
        lbl= "Purely supervised QDA:"
        model = WQDA()
        model.fit(Xsupervised, ysupervised)
    else:
        if i == 1:
            lbl= "SelfLearning QDA:"
            model = SelfLearningModel(WQDA())
        if i == 2:
            lbl= "CPLE(pessimistic) QDA:"
            model = CPLELearningModel(WQDA(), predict_from_probabilities=True)
        elif i == 3:
            lbl= "CPLE(optimistic) QDA:"
            CPLELearningModel.pessimistic = False
            model = CPLELearningModel(WQDA(), predict_from_probabilities=True)
        model.fit(Xs, ys)
    print ""
    print lbl
    print "Model training time: ", round(time.time()-t1, 3)
Ejemplo n.º 3
0
    #   Split the data into test and train data
    test_pcnt = 0.15
    X_train = X[:int(len(X) * (1 - test_pcnt)), :]
    X_test = X[int(len(X) * (1 - test_pcnt)):, :]
    y_train = y[:int(len(X) * (1 - test_pcnt))]
    y_test = y[int(len(X) * (1 - test_pcnt)):]

    ytrue = y_train

    print X_train.shape, y_train.shape

    # Just supervised score
    basemodel = WQDA()  # weighted Quadratic Discriminant Analysis
    #basemodel = SGDClassifier(loss='log', penalty='l1') # scikit logistic regression
    basemodel.fit(X_train, ytrue)
    print "full labeled wqda score", basemodel.score(X_test, y_test)
    print "standard error of wqda", 1.96 * np.sqrt(
        basemodel.score(X_test, y_test) *
        (1 - basemodel.score(X_test, y_test)) / X_test.shape[0])

    # Just supervised score
    #basemodel = WQDA() # weighted Quadratic Discriminant Analysis
    basemodel = SGDClassifier(loss='log',
                              penalty='l1')  # scikit logistic regression
    basemodel.fit(X_train, ytrue)
    print "full labeled log.reg. score", basemodel.score(X_test, y_test)
    print "standard error of log reg", 1.96 * np.sqrt(
        basemodel.score(X_test, y_test) *
        (1 - basemodel.score(X_test, y_test)) / X_test.shape[0])
Ejemplo n.º 4
0
heart = fetch_mldata("heart")
X = heart.data
ytrue = np.copy(heart.target)
ytrue[ytrue==-1]=0

# label a few points 
labeled_N = 30
ys = np.array([-1]*len(ytrue)) # -1 denotes unlabeled point
random_labeled_points = random.sample(np.where(ytrue == 0)[0], labeled_N/2)+\
                        random.sample(np.where(ytrue == 1)[0], labeled_N/2)
ys[random_labeled_points] = ytrue[random_labeled_points]

# supervised score 
basemodel = WQDA() # weighted Quadratic Discriminant Analysis
#basemodel = SGDClassifier(loss='log', penalty='l1') # scikit logistic regression
basemodel.fit(X[random_labeled_points, :], ys[random_labeled_points])
#print "supervised log.reg. score", basemodel.score(X, ytrue)

# fast (but naive, unsafe) self learning framework
ssmodel = SelfLearningModel(basemodel)
ssmodel.fit(X, ys)
print("this is the fitted thing", ssmodel.fit(X,ys))
y_score = ssmodel.predict(heart.data)
#print "heart.target", heart.target
#print "this is the prediction", y_score
print("self-learning log.reg. score", ssmodel.score(X, ytrue))

fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(2):