def curve_per_subject(subject, data_path, test_labels):
    d = load_train_data(data_path, subject)
    x, y_10m = d['x'], d['y']
    n_train_examples = x.shape[0]
    n_timesteps = x.shape[-1]
    print 'n_preictal', np.sum(y_10m)
    print 'n_inetrictal', np.sum(y_10m - 1)

    x, y = reshape_data(x, y_10m)
    data_scaler = StandardScaler()
    x = data_scaler.fit_transform(x)

    lda = LDA()
    lda.fit(x, y)

    pred_1m = lda.predict_proba(x)[:, 1]
    pred_10m = np.reshape(pred_1m, (n_train_examples, n_timesteps))
    pred_10m = np.mean(pred_10m, axis=1)
    fpr, tpr, threshold = roc_curve(y_10m, pred_10m)
    c = np.sqrt((1 - tpr) ** 2 + fpr ** 2)
    opt_threshold = threshold[np.where(c == np.min(c))[0]][-1]
    print opt_threshold

    # ------- TEST ---------------

    d = load_test_data(data_path, subject)
    x_test, id = d['x'], d['id']
    n_test_examples = x_test.shape[0]
    n_timesteps = x_test.shape[3]
    x_test = reshape_data(x_test)
    x_test = data_scaler.transform(x_test)

    pred_1m = lda.predict_proba(x_test)[:, 1]
    pred_10m = np.reshape(pred_1m, (n_test_examples, n_timesteps))
    pred_10m = np.mean(pred_10m, axis=1)

    y_pred = np.zeros_like(test_labels)
    y_pred[np.where(pred_10m >= opt_threshold)] = 1
    cm = confusion_matrix(test_labels, y_pred)
    print print_cm(cm, labels=['interictal', 'preictal'])
    sn = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0])
    sp = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1])
    print sn, sp

    sn, sp = [], []
    t_list = np.arange(0.0, 1.0, 0.01)
    for t in t_list:
        y_pred = np.zeros_like(test_labels)
        y_pred[np.where(pred_10m >= t)] = 1
        cm = confusion_matrix(test_labels, y_pred)
        sn_t = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0])
        sp_t = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1])
        sn.append(sn_t)
        sp.append(sp_t)

    return t_list, sn, sp
Beispiel #2
0
def curve_per_subject(subject, data_path, test_labels):
    d = load_train_data(data_path, subject)
    x, y_10m = d['x'], d['y']
    n_train_examples = x.shape[0]
    n_timesteps = x.shape[-1]
    print('n_preictal', np.sum(y_10m))
    print('n_inetrictal', np.sum(y_10m - 1))

    x, y = reshape_data(x, y_10m)
    data_scaler = StandardScaler()
    x = data_scaler.fit_transform(x)

    lda = LDA()
    lda.fit(x, y)

    pred_1m = lda.predict_proba(x)[:, 1]
    pred_10m = np.reshape(pred_1m, (n_train_examples, n_timesteps))
    pred_10m = np.mean(pred_10m, axis=1)
    fpr, tpr, threshold = roc_curve(y_10m, pred_10m)
    c = np.sqrt((1 - tpr) ** 2 + fpr ** 2)
    opt_threshold = threshold[np.where(c == np.min(c))[0]][-1]
    print(opt_threshold)

    # ------- TEST ---------------

    d = load_test_data(data_path, subject)
    x_test, id = d['x'], d['id']
    n_test_examples = x_test.shape[0]
    n_timesteps = x_test.shape[3]
    x_test = reshape_data(x_test)
    x_test = data_scaler.transform(x_test)

    pred_1m = lda.predict_proba(x_test)[:, 1]
    pred_10m = np.reshape(pred_1m, (n_test_examples, n_timesteps))
    pred_10m = np.mean(pred_10m, axis=1)

    y_pred = np.zeros_like(test_labels)
    y_pred[np.where(pred_10m >= opt_threshold)] = 1
    cm = confusion_matrix(test_labels, y_pred)
    print(print_cm(cm, labels=['interictal', 'preictal']))
    sn = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0])
    sp = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1])
    print(sn, sp)

    sn, sp = [], []
    t_list = np.arange(0.0, 1.0, 0.01)
    for t in t_list:
        y_pred = np.zeros_like(test_labels)
        y_pred[np.where(pred_10m >= t)] = 1
        cm = confusion_matrix(test_labels, y_pred)
        sn_t = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0])
        sp_t = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1])
        sn.append(sn_t)
        sp.append(sp_t)

    return t_list, sn, sp
def score(train_X, train_y):
    X_train, X_valid, y_train, y_valid = train_test_split(train_X, train_y, test_size=0.01, random_state=10)

    clf = LDA()
    clf.fit(X_train, y_train)
    y_pred = clf.predict_proba(X_valid)
    return log_loss(y_valid, y_pred)
Beispiel #4
0
def train_predict(X,y,Xt,yt=[],c=1):
    if c==1:
        #clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=5,depth=10, subsample=0.5,col=1) 
        clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=20,depth=20, subsample=0.1,col=0.7)
	#clf=xgb_classifier(num_round=300,eta=0.01,min_child_weight=20,depth=8, subsample=0.1,col=0.7)
        return clf.train_predict(X,y,Xt,yt)
    elif c==2:
	clf = LDA()
	clf.fit(X,y)
	preds = clf.predict_proba(Xt)[:,1]
	return preds
    elif c==3:
        clf = LogisticRegression()
        clf.fit(X,y)
        preds = clf.predict_proba(Xt)[:,1]
        return preds
def train_predict(X,y,Xt,yt=[],c=1):
    if c==1:
        #clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=5,depth=10, subsample=0.5,col=1) 
        clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=20,depth=20, subsample=0.1,col=0.7)
	#clf=xgb_classifier(num_round=300,eta=0.01,min_child_weight=20,depth=8, subsample=0.1,col=0.7)
        return clf.train_predict(X,y,Xt,yt)
    elif c==2:
	clf = LDA()
	clf.fit(X,y)
	preds = clf.predict_proba(Xt)[:,1]
	return preds
    elif c==3:
        clf = LogisticRegression()
        clf.fit(X,y)
        preds = clf.predict_proba(Xt)[:,1]
        return preds
Beispiel #6
0
def LDAClassify_Proba(enrollment_id, trainData, trainLabel, testData):
    clf = LDA(solver='lsqr')
    #clf = LDA()
    clf.fit(trainData, ravel(trainLabel))
    testLabel = clf.predict_proba(testData)[:,1]
    saveResult(enrollment_id, testLabel, 'Proba_sklearn_LDA.csv')
    return testLabel
def train_predict(X,y,Xt,yt=[],c=1):
    if c==1:
        #clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=5,depth=10, subsample=0.5,col=1) 
        #clf=xgb_classifier(num_round=55,eta=0.1,min_child_weight=20,depth=20, subsample=0.1,col=0.7)
	clf=xgb_classifier(num_round=500,eta=0.01,min_child_weight=20,depth=10, subsample=0.1,col=0.7)
	#clf=xgb_classifier(num_round=500,eta=0.01,min_child_weight=20,depth=10, subsample=0.1,col=0.7) # First digit touch - 0.966262479533 #BothStartLoadPhase-0.969428966329
	#clf=xgb_classifier(num_round=500,eta=0.01,min_child_weight=20,depth=10, subsample=0.1,col=0.7)  # HandStart - 0.930538668081
        return clf.train_predict(X,y,Xt,yt)
    elif c==2:
	clf = LDA()
	clf.fit(X,y)
	preds = clf.predict_proba(Xt)[:,1]
	return preds
    elif c==3:
        clf = LogisticRegression()
        clf.fit(X,y)
        preds = clf.predict_proba(Xt)[:,1]
        return preds
Beispiel #8
0
 def __call__(self, x, y, inputs, labels):
     classes = numpy.unique(labels)
     if len(classes) == 1:
         if y == classes[0]:
             return 1
         else:
             return -1
     lda = LDA().fit(inputs, labels)
     prob = lda.predict_proba([x])[0][lda.classes_.tolist().index(y)]
     return 2 * prob - 1
Beispiel #9
0
def train_predict(X, y, Xt, yt=[], c=1):
    if c == 1:
        #clf=xgb_classifier(num_round=45,eta=0.1,min_child_weight=5,depth=10, subsample=0.5,col=1)
        #clf=xgb_classifier(num_round=55,eta=0.1,min_child_weight=20,depth=20, subsample=0.1,col=0.7)
        clf = xgb_classifier(num_round=500,
                             eta=0.01,
                             min_child_weight=20,
                             depth=10,
                             subsample=0.1,
                             col=0.7)
        #clf=xgb_classifier(num_round=500,eta=0.01,min_child_weight=20,depth=10, subsample=0.1,col=0.7) # First digit touch - 0.966262479533 #BothStartLoadPhase-0.969428966329
        #clf=xgb_classifier(num_round=500,eta=0.01,min_child_weight=20,depth=10, subsample=0.1,col=0.7)  # HandStart - 0.930538668081
        return clf.train_predict(X, y, Xt, yt)
    elif c == 2:
        clf = LDA()
        clf.fit(X, y)
        preds = clf.predict_proba(Xt)[:, 1]
        return preds
    elif c == 3:
        clf = LogisticRegression()
        clf.fit(X, y)
        preds = clf.predict_proba(Xt)[:, 1]
        return preds
def lda_model(x_train, y_train, x_test, y_test):
    global get_test

    print "LDA model learning..."

    start_time = time.time()
    #LDA assumes common variance matrix among classes, while QDA doesn't
    clf = LDA()
    #clf = QDA()
    clf.fit(x_train, y_train)

    learning_time = time.time() - start_time
    print "training time is: {:.5f} seconds.".format(learning_time)
    '''
    #use LDA to do dimensionality reduction, reduce to n_class-1 dimensions
    x_t = clf.transform(x_train)
    print x_train.shape
    print x_t.shape
    print x_train[:3]
    print x_t[:3]
    '''

    print "Model Prediction..."
    #y_predict = clf.predict(x_test)

    start_time = time.time()
    #get probability prediction
    y_prob = clf.predict_proba(x_test)

    prediction_time = time.time() - start_time
    print "prediction time is: {:.5f} seconds.".format(prediction_time)

    if get_test == True:
        #the data is from real test set
        #output to file
        output_result(y_prob)
    else:
        #the test set is split from the train set, compute the loss function value
        encoder = LabelEncoder()
        #encode string label 'Class_1', 'Class_2',... to [0,1,...,8]
        y_true = encoder.fit_transform(y_test)
        #the classe labels in encoder is consistent with the class labels in the classifier
        assert (encoder.classes_ == clf.classes_).all()
        #compute the value for loss function
        score = logloss_mc(y_true, y_prob)
        print(
            " -- Multiclass logloss on validation set: {:.5f}.".format(score))
# print (pca.explained_variance_ratio_)
#---------------------End of Snippet

#---------------------Snippet 3, PCA followed by a linear SVM classification

# Using PCA
pca_clf = svm.SVC(probability=True)
pca_clf.fit(PCA_train_data, train_labels)
pca_pResults = pca_clf.predict(PCA_test_data)
pca_pResults_prob = pca_clf.predict_proba(PCA_test_data)

print pca_pResults
# Using LDA
lda_pResults = lda_clf.predict(test_data)
lda_pResults_prob = lda_clf.predict_proba(test_data)

#---------------------Check the order of the classes, make sure we use it correctly
print 'the class order of lda is: ', lda_clf.classes_
print 'the class order of pca is: ', pca_clf.classes_

#---------------------Training of DBN
'''
dbn = DBN(
	[train_data.shape[1], 30, 30,2],
	learn_rates = 0.35,
	learn_rate_decays = 1,
	epochs = 30,
	verbose = 1,
	dropouts = 0.04,
	)
# print "-----------"
# print X[2]
# print "-----------"

# model = LogisticRegression(penalty='l2', dual=True, tol=0.0001, 
#                          C=1, fit_intercept=True, intercept_scaling=1.0, 
#                          class_weight=None, random_state=None)

model = LDA()


print "Trying to construct a LDA classifier"

print "Wrote the model for the LDA classifier"
# print "20 Fold CV Score: ", np.mean(cross_validation.cross_val_score(model, X, y, cv=20, scoring='roc_auc'))

print "Retrain on all training data, predicting test labels...\n"
# X_dense=X.todense()


model.fit(X,y)
print "Model Fitted"
# X_test_dense=X_test.todense()
result = model.predict_proba(X_test)[:,1]
print "Model Predicted"
output = pd.DataFrame( data={"id":test["id"], "sentiment":result} )

print "Model outputted"
# Use pandas to write the comma-separated output file
output.to_csv(os.path.join(os.path.dirname(__file__), 'data', 'Bag_of_Words_model_lda.csv'), index=False, quoting=3)
print "Wrote results to Bag_of_Words_model.csv"	
Beispiel #13
0
    gmmModels[cls].fit(classFeatures)

# ====================== test GMM ===========================
gmmScores = np.zeros((testData.shape[0], numClasses))
for cls in xrange(numClasses):
    gmmScores[:, cls] = gmmModels[cls].score(testFeatures)

# =========================== KNN model =======================
knnModel = KNeighborsClassifier(n_neighbors=5)
knnModel.fit(trainFeatures, truth)
knnScores = knnModel.predict_proba(testFeatures)

# =========================== FDA model =======================
fdaModel = LDA()
fdaModel.fit(trainFeatures, truth)
fdaScores = fdaModel.predict_proba(testFeatures)

# ======================== Build hybrid =======================
scores = np.log(gmmScores) + np.log(knnScores) + np.log(fdaScores)
targetScores = np.amax(scores, axis = 1)
targetClass = np.argmax(scores, axis = 1)

outFile = 'output.csv'
with open(outFile, 'w') as f:
    f.write('ISIN, Risk_Stripe\n')
    for i in xrange(testData.shape[0]):
        line = 'ISIN{0},Stripe {1}\n'
        line = line.format(int(testData[i, 0]), int(targetClass[i]))
        f.write(line)
f.close()
Beispiel #14
0
    X_train = data_preprocess_train(X_train, subject)
    X_test = data_preprocess_test(X_test, subject)
    for i in range(6):
        y_train = y[:, i]
        print('Train subject %d, class %s' % (subject, cols[i]))
        lr1.fit(X_train[::subsample, :], y_train[::subsample])
        lr2.fit(X_train[::subsample2, :], y_train[::subsample2])
        lr3.fit(X_train[::subsample3, :], y_train[::subsample3])
        lr4.fit(X_train[::subsample, :], y_train[::subsample])
        lr5.fit(X_train[::subsample2, :], y_train[::subsample2])
        lr6.fit(X_train[::subsample3, :], y_train[::subsample3])
        pred1[:, i] = lr1.predict_proba(X_test)[:, 1]
        pred2[:, i] = lr2.predict_proba(X_test)[:, 1]
        pred3[:, i] = lr3.predict_proba(X_test)[:, 1]
        pred4[:, i] = lr4.predict_proba(X_test)[:, 1]
        pred5[:, i] = lr5.predict_proba(X_test)[:, 1]
        pred6[:, i] = lr6.predict_proba(X_test)[:, 1]
        pred[:, i] = (pred1[:, i] + pred2[:, i] + pred3[:, i] + pred4[:, i] +
                      pred5[:, i] + pred6[:, i]) / 6.0

    pred_tot.append(pred)

# submission file
submission_file = 'vali1_new_sub.csv'
# create pandas object for sbmission
submission = pd.DataFrame(index=np.concatenate(ids_tot),
                          columns=cols,
                          data=np.concatenate(pred_tot))

# write file
    pred2 = np.empty((X_test.shape[0],6))
    pred3 = np.empty((X_test.shape[0],6))
    pred4 = np.empty((X_test.shape[0],6))

    pred = np.empty((X_test.shape[0],6))
    
    X_train=data_preprocess_train(X_train)
    X_test=data_preprocess_test(X_test)
    for i in range(6):
        y_train= y[:,i]
        print('Train subject %d, class %s' % (subject, cols_alt[i]))
        lr1.fit(X_train[::subsample,:],y_train[::subsample])
        lr2.fit(X_train[::subsample,:],y_train[::subsample])
        lr3.fit(X_train[::subsample2,:],y_train[::subsample2])
        lr4.fit(X_train[::subsample2,:],y_train[::subsample2])
        pred1[:,i] = lr1.predict_proba(X_test)[:,1]
        pred2[:,i] = lr2.predict_proba(X_test)[:,1]
        pred3[:,i] = lr3.predict_proba(X_test)[:,1]
        pred4[:,i] = lr4.predict_proba(X_test)[:,1]
        pred[:,i]=(pred1[:,i]+pred2[:,i]+pred3[:,i]+pred4[:,i])/4
        #pred[:,i]=(pred1[:,i]+pred2[:,i])/2

    #predictions = pred[:, 0:6]
    pred_tot.append(pred)


# create pandas object for sbmission
submission = pd.DataFrame(index=np.concatenate(ids_tot),
                          columns=cols,
                          data=np.concatenate(pred_tot))
Beispiel #16
0
y=df['2015h']
X=df.drop(['2015h'],axis=1)

#build new train and test sets
train,test=train_test_split(df,train_size=.9)
y_train=train['2015h']
x_train=train.drop('2015h',axis=1)
y_test=test['2015h']
x_test=test.drop('2015h',axis=1)

#LDA Accuracy
lda_classifier = LDA(n_components=2)
lda_x_axis = lda_classifier.fit(x_train, y_train).transform(x_train)
lda_classifier.score(x_test, y_test, sample_weight=None)
#Get AUC for test
proba=pd.DataFrame(lda_classifier.predict_proba(x_test))[1]
false_positive_rate, true_positive_rate, thresholds = skrc(y_test,proba)
auc(false_positive_rate, true_positive_rate)

#Decision Tree Accuracy
dt = DecisionTreeClassifier(class_weight='balanced')
dt.fit(x_train,y_train)
dt.score(x_test,y_test)
proba=pd.DataFrame(dt.predict_proba(x_test))[1]
false_positive_rate, true_positive_rate, thresholds = skrc(y_test,proba)
auc(false_positive_rate, true_positive_rate)

#Random Forest Accuracy (okay baseline)
rf = RandomForestClassifier(class_weight='balanced')
rf.fit(x_train,y_train)
rf.score(x_test,y_test)
    plt.show()

    # return the training and testing scores on each parameter value
    return train_scores, test_scores


########FOCUSING ON LOGISTIC REGRESSION AND LDA TEST DATA########################
from sklearn.linear_model import LogisticRegressionCV

logregCV = LogisticRegressionCV(cv= 10, solver = 'lbfgs', penalty = 'l2').fit(train_standardized, target)
logCV_acc = logregCV.scores_
y_pred = logregCV.predict_proba(test_standardized)


ldaC = LDA().fit(train_standardized, target)
y_pred = ldaC.predict_proba(test_standardized)

ad_fit = ad(n_estimators = 10).fit(train_standardized, target)
y_pred = ad_fit.predict_proba(test_standardized)


rf_fit = rf(random_state=99).fit(train_standardized, target)

splitSizes = list(range(1,10,1))
train_scores, test_scores = calc_params(train_standardized, target, rf_fit, splitSizes, 'min_samples_leaf', 5, metric = 'accuracy')
pd.DataFrame(np.array([test_scores, splitSizes]).T, columns = ['Test Recall', 'Minimum Split Size'])

nEst = range(1, 51, 10)
train_scores, test_scores = calc_params(train_standardized, target, rf_fit, nEst, 'n_estimators', 5, metric = 'accuracy')
pd.DataFrame(np.array([test_scores, nEst]).T, columns = ['Test Recall', 'Number of Estimators'])
Beispiel #18
0
    X_test = data_preprocess_test(X_test, subject)
    for i in range(6):
        y_train = y[:, i]
        print(('Train subject %d, class %s' % (subject, cols[i])))
        if cl == 1:
            lr1.fit(X_train[::subsample, :], y_train[::subsample])
            pred[:, i] = lr1.predict_proba(X_test)[:, 1]
        if cl == 2:
            lr2.fit(X_train[::subsample2, :], y_train[::subsample2])
            pred[:, i] = lr2.predict_proba(X_test)[:, 1]
        if cl == 3:
            lr3.fit(X_train[::subsample3, :], y_train[::subsample3])
            pred[:, i] = lr3.predict_proba(X_test)[:, 1]
        if cl == 4:
            lr4.fit(X_train[::subsample, :], y_train[::subsample])
            pred[:, i] = lr4.predict_proba(X_test)[:, 1]
        if cl == 5:
            lr5.fit(X_train[::subsample2, :], y_train[::subsample2])
            pred[:, i] = lr5.predict_proba(X_test)[:, 1]
        if cl == 6:
            lr6.fit(X_train[::subsample3, :], y_train[::subsample3])
            pred[:, i] = lr6.predict_proba(X_test)[:, 1]
        #pred[:,i]=(pred1[:,i]+pred2[:,i]+pred3[:,i]+pred4[:,i]+pred5[:,i]+pred6[:,i])/6.0

    pred_tot.append(pred)

# submission file
submission_file = 'cv/try_sub%d_clf%d_trode%d.csv' % (subx, cl, electrode)
# create pandas object for sbmission
submission = pd.DataFrame(index=np.concatenate(ids_tot),
                          columns=cols,
Beispiel #19
0
        redundant = corr[c][corr[c].abs() > coefficient].index - pd.Index([c]) - add
        remove = remove.union(redundant)
    
    print("For correlation coefficient = ", coefficient)
    #print(remove)
    #print(add)

    train_data = pd.DataFrame(data=train_data_g, columns = df.columns)[df.columns- remove].values
    test_data = pd.DataFrame(data=test_data_g, columns = df.columns)[df.columns- remove].values
    print("num of featurs = ", train_data.shape[1])

    clf = LDA();

    # This gets the time in ipython shell.
    print("\n\nModelling time:")
    %time clf.fit(train_data, train_labels)
    print("Modelling time ends\n\n")

    print("\n\nprediction time starts:")
    %time predicted_labels = clf.predict(test_data)
    print("prediction time ends:\n\n")
    #print(classification_report(test_labels, clf.predict(test_data)))
    print(classification_report(test_labels, predicted_labels))

    print("num of featurs = ", train_data.shape[1])
    y_true = test_labels;
    y_pred_proba = clf.predict_proba(test_data);
    fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba[:, 1])
    roc_auc = auc(fpr, tpr)
    print("ROC AUC =", roc_auc)
    angle = np.arctan(u[1] / u[0])
    angle = 180 * angle / np.pi  # convert to degrees
    # filled gaussian at 2 standard deviation
    ell = mpl.patches.Ellipse(mean,
                              2 * v[0]**0.5,
                              2 * v[1]**0.5,
                              180 + angle,
                              color=color)
    ell.set_clip_box(splot.bbox)
    ell.set_alpha(0.5)
    splot.add_artist(ell)


xx, yy = np.meshgrid(np.linspace(4, 8.5, 200), np.linspace(1.5, 4.5, 200))
X_grid = np.c_[xx.ravel(), yy.ravel()]
zz_lda = lda.predict_proba(X_grid)[:, 1].reshape(xx.shape)
zz_qda = qda.predict_proba(X_grid)[:, 1].reshape(xx.shape)

pl.figure()
splot = pl.subplot(1, 2, 1)
pl.contourf(xx, yy, zz_lda > 0.5, alpha=0.5)
pl.scatter(X[y == 0, 0], X[y == 0, 1], c='b', label=target_names[0])
pl.scatter(X[y == 1, 0], X[y == 1, 1], c='r', label=target_names[1])
pl.contour(xx, yy, zz_lda, [0.5], linewidths=2., colors='k')
plot_ellipse(splot, lda.means_[0], lda.covariance_, 'b')
plot_ellipse(splot, lda.means_[1], lda.covariance_, 'r')
pl.legend()
pl.axis('tight')
pl.title('Linear Discriminant Analysis')

splot = pl.subplot(1, 2, 2)
Beispiel #21
0
model_scores=pd.DataFrame({'model':model,'score':score})

for i in model_scores.index:
    if model_scores.score[i]==max(model_scores.score):
        print model_scores.score[i]
        print model_scores.model[i]

#using LDA model without feature selection to predict probablilites, look at confusion matrix
#and plot ROC curve Accuracy= .949571
X2=X[best[0:23]]
X_test2=X_test[best[0:23]]
lda= LDA(n_components=2)
lda_x_axis = lda.fit(X2, y).transform(X2)
lda.score(X_test2, y_test, sample_weight=None)

y_pred=lda.predict_proba(X_test2)
proba=pd.DataFrame(y_pred)[1]
proba.mean()

#play with the predication threshold to see falsenegative/positive trade off
y_pred2=[]
for i in proba:
    if i>.0553:
        y_pred2.append(1)
    else:
        y_pred2.append(0)

#(true negative) (false positive)
#(false negative) (true positive)
#(786)(207)
#(13)(45)
# 1. Linear Discriminant Analysis
from sklearn.lda import LDA  # loads the library

score_train = np.array([])
score_test = np.array([])

for train_index, test_index in kf:
    CVTrainFeats, CVTestFeats = TrainFeats[train_index], TrainFeats[test_index]
    CVTrainLabels, CVTestLabels = TrainLabels[train_index], TrainLabels[
        test_index]

    model = LDA()
    model.fit(CVTrainFeats, CVTrainLabels)
    score_train = np.append(
        score_train,
        metrics.log_loss(CVTrainLabels, model.predict_proba(CVTrainFeats)))
    score_test = np.append(
        score_test,
        metrics.log_loss(CVTestLabels, model.predict_proba(CVTestFeats)))
    score = metrics.log_loss(TestLabels, model.predict_proba(TestFeats))

# To make sure we're not overfitting
print(("Average CV Training Log loss: %.2f" % np.mean(score_train)))
print(("Average CV Testing Log loss: %.2f" % np.mean(score_test)))
print(("Testing Log loss: %.2f" % score))

print(
    metrics.confusion_matrix(CVTestLabels,
                             model.predict(CVTestFeats),
                             labels=[1, 0]))
Beispiel #23
0
h = dl.fit(X_train, y_train, batch_size=512, nb_epoch=20, show_accuracy=True, 
               validation_data=(X_test, y_test), 
               callbacks = [
                   EarlyStopping(verbose=True, patience=6, monitor='val_loss'),
                   ModelCheckpoint('./BIGDATASLACNet-weights.h5', monitor='val_loss', verbose=True, save_best_only=True)
               ], 
               sample_weight=weights[:n_train])

y_dl = dl.predict(X_, verbose=True).ravel()

from sklearn.lda import LDA
lda = LDA()
lda.fit(X_train, y_train)
# lda.fit(X_[selection], y_[selection])
yld = lda.predict_proba(X_)
yld = yld[:, 1]


DNN_kin = Likelihood2D(np.linspace(-4, 6.2, 6), np.linspace(0, 1, 50))
DNN_kin.fit((np.log(mass_nsj_likelihood + 1e-6)[signal == 1], y_dl[signal == 1]), (np.log(mass_nsj_likelihood + 1e-6)[signal == 0], y_dl[signal == 0]), weights=(weights[signal == 1], weights[signal == 0]))
likelihood2 = DNN_kin.predict((np.log(mass_nsj_likelihood + 1e-6), y_dl))

add_curve(r'Deep Net', 'orange', calculate_roc(signal, y_dl, weights=weights, bins=1000000), discs)
add_curve(r'Deep Net + $(m_{\mathrm{jet}}, \tau_{21})$', 'black', calculate_roc(signal, likelihood2, weights=weights, bins=1000000), discs)
add_curve(r'FLD', 'green', calculate_roc(signal[selection], yld[selection], weights=weights[selection], bins=1000000), discs)
fg = ROC_plotter(discs, title=r"$W' \rightarrow WZ$ vs. QCD Tagging comparison -- match $s \longrightarrow b$." + 
	'\n' + r'Jet $p_T\in[200, 1000]$ $\mathrm{GeV},\vert\eta\vert<2$')
fg.savefig(PLOT_DIR % 'dl-roc.pdf')

# -- small windows..
    rf = RandomForestClassifier(n_estimators=150, n_jobs=-1, criterion="entropy", random_state=1)
    lr = LogisticRegression()
    
    X_train, scaler = compute_features(X_train)
    X_test = compute_features(X_test, scaler)   #pass the learned mean and std to normalized test data
    
    y = np.concatenate(y,axis=0)
    scores = np.empty((X_test.shape[0],6))
    
    downsample = 40
    for i in range(6):
        print('Train subject %d, class %s' % (subject, cols[i]))
        rf.fit(X_train[::downsample,:], y[::downsample,i])
        lda.fit(X_train[::downsample,:], y[::downsample,i])
        lr.fit(X_train[::downsample,:], y[::downsample,i])
       
        scores[:,i] = (rf.predict_proba(X_test)[:,1] + 
                        lda.predict_proba(X_test)[:,1] + 
                        lr.predict_proba(X_test)[:,1])/3.0

    scores_tot.append(scores)
    idx_tot.append(np.concatenate(idx))
    

# create pandas object for submission
submission = pd.DataFrame(index=np.concatenate(idx_tot),
                          columns=cols,
                          data=np.concatenate(scores_tot))

# write file
submission.to_csv(submission_file,index_label='id',float_format='%.3f')
Beispiel #25
0
  X_testKaggle.loc[index,'carrier']=0
  
 if X_testKaggle.loc[index,'color'] in goodcolors:
  X_testKaggle.loc[index,'color']=1
 elif X_testKaggle.loc[index,'color'] in badcolors:
  X_testKaggle.loc[index,'color']=-1
 else :
  X_testKaggle.loc[index,'color']=0
  
 if X_testKaggle.loc[index,'storage'] in storage:
  X_testKaggle.loc[index,'storage']=storage.get(X_testKaggle.loc[index,'storage'])
 else :
  X_testKaggle.loc[index,'storage']=1
   
 if X_testKaggle.loc[index,'productline'] in iPadlines:
  X_testKaggle.loc[index,'productline']=iPadlines.get(X_testKaggle.loc[index,'productline']) 
 elif X_testKaggle.loc[index,'productline'] in iPadminilines:
  X_testKaggle.loc[index,'iPadmini']=iPadminilines.get(X_testKaggle.loc[index,'productline'])  
  X_testKaggle.loc[index,'productline']=0 
 elif X_testKaggle.loc[index,'productline'] in iPadAirlines:
  X_testKaggle.loc[index,'iPadAir']=iPadAirlines.get(X_testKaggle.loc[index,'productline'])  
  X_testKaggle.loc[index,'productline']=0
 elif X_testKaggle.loc[index,'productline']=="Unknown":
  X_testKaggle.loc[index,'productline']=0
  
result=pd.DataFrame(testKaggle, columns=['UniqueID','Probability1'])
y_pred=clf.predict_proba(X_testKaggle)
result['Probability1']=y_pred

np.savetxt("/home/reddowan/Documents/Kaggle edx MIT/resultLDA.csv",result,delimiter=",",fmt='%9f')
# Spatial filtering train
print 'Filtering train data ...'
trainFeats = np.empty([K.shape[0], m*2])
for i in range(0,K.shape[0]):
    aux = np.dot( np.dot(W.T, K[i,:,:]), W )
    trainFeats[i,:] = ( np.diag(aux) ) / np.trace(aux)

# Spatial filtering test
print 'Filtering test data ...'
testFeats = np.empty([Ktest.shape[0], m*2])
for i in range(0,Ktest.shape[0]):
    aux = np.dot( np.dot(W.T, Ktest[i,:,:]), W )
    testFeats[i,:] = ( np.diag(aux) ) / np.trace(aux)


# Classification
print 'Classification ...'
clf = LDA()
clf.fit(trainFeats, labels)
predictedProb = clf.predict_proba(testFeats)
predictedProb = predictedProb[:,1]

# Generate submission
submission = {'ID' : testIDs, 
              'probability' : predictedProb }
submission = pd.DataFrame(submission)
submission.to_csv(outputFile, index = 0, float_format='%11.6f')



    ### Testing 
    testing_data = data[testing_idx, :]
    testing_label = label[testing_idx]

    # Declare the random forest
    #crf = RandomForestClassifier(n_estimators=100, n_jobs=n_jobs)
    #crf = AdaBoostClassifier(n_estimators=100)
    #crf = LinearSVC()
    crf = LDA()

    # Train the classifier
    crf.fit(training_data, training_label)

    # Test the classifier
    pred_labels = crf.predict(testing_data)
    pred_probs = crf.predict_proba(testing_data)
    #pred_probs = crf.decision_function(testing_data)

    # Compute the confusion matrix
    cm = confusion_matrix(testing_label, pred_labels)
    # Compute the sensitivity and specificity
    sens = float(cm[1, 1]) / float(cm[1, 1] + cm[1, 0])
    spec = float(cm[0, 0]) / float(cm[0, 0] + cm[0, 1])
    sens_fold.append(sens)
    spec_fold.append(spec)

    # Compute the roc curve
    roc_exp = roc_curve(testing_label, pred_probs[:, 1])
    auc_exp = roc_auc_score(testing_label, pred_probs[:, 1])
    #roc_exp = roc_curve(testing_label, pred_probs)
    #auc_exp = roc_auc_score(testing_label, pred_probs)
   
    X_train=data_preprocess_train(X_train,subject)
    X_test=data_preprocess_test(X_test,subject)
    for i in range(6):
        y_train= y[:,i]
        print('Train subject %d, class %s' % (subject, cols[i]))
        lr1.fit(X_train[::subsample,:],y_train[::subsample])
        lr2.fit(X_train[::subsample2,:],y_train[::subsample2])
        lr3.fit(X_train[::subsample3,:],y_train[::subsample3])
	lr4.fit(X_train[::subsample,:],y_train[::subsample])
	lr5.fit(X_train[::subsample2,:],y_train[::subsample2])
        lr6.fit(X_train[::subsample3,:],y_train[::subsample3])
        pred1[:,i] = lr1.predict_proba(X_test)[:,1]
        pred2[:,i] = lr2.predict_proba(X_test)[:,1]
        pred3[:,i] = lr3.predict_proba(X_test)[:,1]
	pred4[:,i] = lr4.predict_proba(X_test)[:,1]
	pred5[:,i] = lr5.predict_proba(X_test)[:,1]
        pred6[:,i] = lr6.predict_proba(X_test)[:,1]
        pred[:,i]=(pred1[:,i]+pred2[:,i]+pred3[:,i]+pred4[:,i]+pred5[:,i]+pred6[:,i])/6.0

    pred_tot.append(pred)

# submission file
submission_file = 'vali30_new_sub.csv'
# create pandas object for sbmission
submission = pd.DataFrame(index=np.concatenate(ids_tot),
                          columns=cols,
                          data=np.concatenate(pred_tot))

# write file
submission.to_csv(submission_file,index_label='id',float_format='%.3f')
Beispiel #29
0
		for i in range(1,len(tokens)):
			ind = int(tokens[i].split(':')[0])
			val = float(tokens[i].split(':')[1])
			f[ind - 1] = val
		features[dataset].append(f)

classes = set(labels['train'])
if useBinary:
	prediction=[]
	proba=[]
	#train classifier
	for c in classes:
		lda = LDA(ldasolver)
		lda.fit(features['train'],np.array(labels['train'])==c)
		#test classifier
		p = np.array(lda.predict_proba(features['test']))
		proba.append(p[:,1])
	proba=np.transpose(np.array(proba))
	prediction=np.argmax(proba,axis=1)+1
else:
	#train classifier
	lda = LDA(ldasolver)
	lda.fit(features['train'],labels['train'])

	#test classifier
	prediction = lda.predict(features['test'])
	proba = lda.predict_proba(features['test'])
	print('Accuracy %.2f%%' % lda.score(features['test'],labels['test']))

#output data
file = open(outputFile,'w')
Beispiel #30
0
completeness, contamination = completeness_contamination(predictions, y_test)

print("completeness", completeness)
print("contamination", contamination)

#------------------------------------------------------------
# Compute the decision boundary
clf = classifiers[1]
xlim = (0.7, 1.35)
ylim = (-0.15, 0.4)

xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], 71),
                     np.linspace(ylim[0], ylim[1], 81))

Z = clf.predict_proba(np.c_[yy.ravel(), xx.ravel()])
Z = Z[:, 1].reshape(xx.shape)

#----------------------------------------------------------------------
# plot the results
fig = plt.figure(figsize=(5, 2.5))
fig.subplots_adjust(bottom=0.15, top=0.95, hspace=0.0,
                    left=0.1, right=0.95, wspace=0.2)

# left plot: data and decision boundary
ax = fig.add_subplot(121)
im = ax.scatter(X[-N_plot:, 1], X[-N_plot:, 0], c=y[-N_plot:],
                s=4, lw=0, cmap=plt.cm.binary, zorder=2)
im.set_clim(-0.5, 1)

im = ax.imshow(Z, origin='lower', aspect='auto',
kf = cross_validation.KFold(len(TrainLabels), n_folds=5, shuffle = False, random_state = 123)

## ----------------------------------------------------------------------------
# 1. Linear Discriminant Analysis
from sklearn.lda import LDA     # loads the library

score_train = np.array([])
score_test = np.array([])

for train_index, test_index in kf:
    CVTrainFeats, CVTestFeats = TrainFeats[train_index], TrainFeats[test_index]
    CVTrainLabels, CVTestLabels = TrainLabels[train_index], TrainLabels[test_index]

    model = LDA()
    model.fit(CVTrainFeats, CVTrainLabels)
    score_train = np.append(score_train,metrics.log_loss(CVTrainLabels, model.predict_proba(CVTrainFeats)))
    score_test = np.append(score_test,metrics.log_loss(CVTestLabels, model.predict_proba(CVTestFeats)))
    score = metrics.log_loss(TestLabels, model.predict_proba(TestFeats))

# To make sure we're not overfitting
print("Average CV Training Log loss: %.2f" % np.mean(score_train))
print("Average CV Testing Log loss: %.2f" % np.mean(score_test))
print("Testing Log loss: %.2f" % score)


print metrics.confusion_matrix(CVTestLabels,model.predict(CVTestFeats),labels = [1,0])

        
## ----------------------------------------------------------------------------
# 2. Logistic Regression
from sklearn.linear_model import LogisticRegression
Beispiel #32
0
    pred = np.empty((X_test.shape[0],6))
    
    X_train=data_preprocess_train(X_train)
    X_test=data_preprocess_test(X_test)
    for i in range(6):
        y_train= y[:,i]
        print('Train subject %d, class %s' % (subject, cols[i]))
        
        # Fit models
        lda.fit(X_train,y_train)
        rf.fit(X_train, y_train)
        lr2.fit(X_train,y_train)
        
        # Grab predictions
        pred1[:,i] = lda.predict_proba(X_test)[:,1]
        pred2[:,i] = rf.predict_proba(X_test)[:,1]
        pred3[:,i] = lr2.predict_proba(X_test)[:,1]
        
        # Ensemble!
        pred[:,i]=(pred1[:,i] + pred2[:,i] + pred3[:,i])/3

    pred_tot.append(pred)

# submission file
#lda_file = 'lda.csv'
lda_file = 'lda_rf.csv'

# create pandas object for sbmission

lda = pd.DataFrame(index=np.concatenate(ids_tot),
def plot_ellipse(splot, mean, cov, color):
    v, w = linalg.eigh(cov)
    u = w[0] / linalg.norm(w[0])
    angle = np.arctan(u[1]/u[0])
    angle = 180 * angle / np.pi # convert to degrees
    # filled gaussian at 2 standard deviation
    ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5,
                                            180 + angle, color=color)
    ell.set_clip_box(splot.bbox)
    ell.set_alpha(0.5)
    splot.add_artist(ell)

xx, yy = np.meshgrid(np.linspace(4, 8.5, 200), np.linspace(1.5, 4.5, 200))
X_grid = np.c_[xx.ravel(), yy.ravel()]
zz_lda = lda.predict_proba(X_grid)[:,1].reshape(xx.shape)
zz_qda = qda.predict_proba(X_grid)[:,1].reshape(xx.shape)

pl.figure()
splot = pl.subplot(1, 2, 1)
pl.contourf(xx, yy, zz_lda > 0.5, alpha=0.5)
pl.scatter(X[y==0,0], X[y==0,1], c='b', label=target_names[0])
pl.scatter(X[y==1,0], X[y==1,1], c='r', label=target_names[1])
pl.contour(xx, yy, zz_lda, [0.5], linewidths=2., colors='k')
plot_ellipse(splot, lda.means_[0], lda.covariance_, 'b')
plot_ellipse(splot, lda.means_[1], lda.covariance_, 'r')
pl.legend()
pl.axis('tight')
pl.title('Linear Discriminant Analysis')

splot = pl.subplot(1, 2, 2)
  X_test = pd.concat(test)
  all_ids.append(np.concatenate(idx))
  X_test = X_test.drop(['id'], axis=1)
  X_test = np.asarray(X_test.astype(float))


  current_prediction_lda = np.empty((X_test.shape[0], 6)) # number of test samples X number of labels
  current_prediction_lr = np.empty((X_test.shape[0], 6)) # number of test samples X number of labels
  current_prediction_qda = np.empty((X_test.shape[0], 6)) # number of test samples X number of labels
  X_test = data_preprocess_test(X_test)

  for i in range(6):
    print 'testing subject_id=',subject_id
    current_prediction_lr[:,i] = lr.predict_proba(X_test)[:,1]
    current_prediction_qda[:,i] = qda.predict_proba(X_test)[:,1]
    current_prediction_lda[:,i] = lda.predict_proba(X_test)[:,1]

  	# print 'predicted:',current_prediction[:,i]

  all_predictions_lda.append(current_prediction_lda)
  all_predictions_qda.append(current_prediction_qda)
  all_predictions_lr.append(current_prediction_lr)

  all_predictions_avg.append( (current_prediction_lda+current_prediction_qda+current_prediction_lr)/3 )

print 'testing complete'


print 'ids ',np.concatenate(all_ids).shape
print 'predictions ',np.concatenate(all_predictions_avg).shape
Beispiel #35
0
    rnn_pred_proba = np.max(rnn_pre, axis=1)
    rnn_pred_proba[1000:] = 0
    #print ('RNN AUC: ',str(metrics.roc_auc_score(y_test,rnn_pre)))
    print('RNN ACC: ', str(acc))
    print(
        'RNN Recall for each class: ',
        str(metrics.recall_score(y_test, rnn_pred, pos_label=1, average=None)))
    print('RNN F1-score for each class: ',
          str(metrics.f1_score(y_test, rnn_pred, average=None)))
    print('RNN Precesion for each class: ',
          str(metrics.precision_score(y_test, rnn_pred, average=None)))
    metrics.confusion_matrix(y_test, rnn_pred)
    ##########################LDA预测############
    clf = LDA()
    clf.fit(x_train, y_train)
    lda_pre = clf.predict_proba(x_test)
    lda_pred = np.argmax(lda_pre, axis=1)

    print('lda ACC: ', str(metrics.accuracy_score(y_test, lda_pred)))
    print('lda Recall for each class: ',
          str(metrics.recall_score(y_test, lda_pred, average=None)))
    print('lda F1-score for each class: ',
          str(metrics.f1_score(y_test, lda_pred, average=None)))
    print('lda Precesion for each class: ',
          str(metrics.precision_score(y_test, lda_pred, average=None)))
    metrics.confusion_matrix(y_test, lda_pred)

    ####################朴素贝叶斯预测################
    gnb = GaussianNB()
    gnb.fit(x_train, y_train)
    Bayes_pre = gnb.predict_proba(x_test)
Beispiel #36
0
    pred = np.empty((X_test.shape[0], 6))

    X_train = data_preprocess_train(X_train)
    X_test = data_preprocess_test(X_test)
    for i in range(6):
        y_train = y[:, i]
        print('Train subject %d, class %s' % (subject, cols[i]))

        # Fit models
        lda.fit(X_train, y_train)
        rf.fit(X_train, y_train)
        lr2.fit(X_train, y_train)

        # Grab predictions
        pred1[:, i] = lda.predict_proba(X_test)[:, 1]
        pred2[:, i] = rf.predict_proba(X_test)[:, 1]
        pred3[:, i] = lr2.predict_proba(X_test)[:, 1]

        # Ensemble!
        pred[:, i] = (pred1[:, i] + pred2[:, i] + pred3[:, i]) / 3

    pred_tot.append(pred)

# submission file
#lda_file = 'lda.csv'
lda_file = 'lda_rf.csv'

# create pandas object for sbmission

lda = pd.DataFrame(index=np.concatenate(ids_tot),
Beispiel #37
0
    X_test=data_preprocess_test(X_test,subject)
    for i in range(6):
        y_train= y[:,i]
        print('Train subject %d, class %s' % (subject, cols[i]))
        if cl==1:
            lr1.fit(X_train[::subsample,:],y_train[::subsample])
            pred[:,i] = lr1.predict_proba(X_test)[:,1]
        if cl==2:
            lr2.fit(X_train[::subsample2,:],y_train[::subsample2])
            pred[:,i] = lr2.predict_proba(X_test)[:,1]
        if cl==3:
            lr3.fit(X_train[::subsample3,:],y_train[::subsample3])
            pred[:,i] = lr3.predict_proba(X_test)[:,1]
        if cl==4:
            lr4.fit(X_train[::subsample,:],y_train[::subsample])
            pred[:,i] = lr4.predict_proba(X_test)[:,1]
        if cl==5:
            lr5.fit(X_train[::subsample2,:],y_train[::subsample2])
            pred[:,i] = lr5.predict_proba(X_test)[:,1]
        if cl==6:
            lr6.fit(X_train[::subsample3,:],y_train[::subsample3])
            pred[:,i] = lr6.predict_proba(X_test)[:,1]
        #pred[:,i]=(pred1[:,i]+pred2[:,i]+pred3[:,i]+pred4[:,i]+pred5[:,i]+pred6[:,i])/6.0

    pred_tot.append(pred)

# submission file
submission_file = 'cv/try_sub%d_clf%d_trode%d.csv'%(subx,cl,electrode)
# create pandas object for sbmission
submission = pd.DataFrame(index=np.concatenate(ids_tot),
                          columns=cols,
Beispiel #38
0
for cls in xrange(numClasses):
    classData = trainData[truth==cls, :]
    col_mean = scipy.stats.mode(classData, axis=0)
    ids = np.where(np.isnan(classData))
    classData[ids] = np.take(col_mean, ids[1])
    trainData[truth==cls, :] = classData

col_mean = scipy.stats.mode(trainData, axis=0)
ids = np.where(np.isnan(testData))
testData[ids] = np.take(col_mean, ids[1])

trainFeatures = np.hstack((trainData[:, 1:19], trainData[:, 20:]))
testFeatures = testData[:, 1:]

# =========================== FDA model =======================
model = LDA()
model.fit(trainFeatures, truth)

scores = model.predict_proba(testFeatures)
targetScores = np.amax(scores, axis = 1)
targetClass = np.argmax(scores, axis = 1)

outFile = 'output.csv'
with open(outFile, 'w') as f:
    f.write('ISIN, Risk_Stripe\n')
    for i in xrange(testData.shape[0]):
        line = 'ISIN{0},Stripe {1}\n'
        line = line.format(int(testData[i, 0]), int(targetClass[i]))
        f.write(line)
f.close()
Beispiel #39
0
"""

print '../Output/gda_results_p'+str(numParticles)+'_L'+str(Lambda)+'_m'+str(multiplier)+'_tjets.txt'
f = open('../Output/gda_results_p'+str(numParticles)+'_L'+str(Lambda)+'_m'+str(multiplier)+'_tjets.txt','w')

testError = 0
truthFraction = 0
testHSError = 0
testPUError = 0
truthpt = 0
totalpt = 0
HSpt = 0
totalPUpt = 0
totalPUptretained = 0
for i in range(len(test_data)):
    predictFalse, predictTrue = clf.predict_proba(test_data[i])[0]

    pt = test_data[i][ptIndex]
    y = test_truth[i]
    sign = 1 if predictTrue > 0.5 else -1
    prediction = predictTrue * sign
    margin = prediction*y
    
    if y>0:
        truthFraction +=1
        truthpt+=pt
    if y<0:
        totalPUpt+=pt
        if prediction>0: totalPUptretained+=pt
    if prediction>0:
        totalpt+=pt
Beispiel #40
0
completeness, contamination = completeness_contamination(predictions, y_test)

print("completeness", completeness)
print("contamination", contamination)

#------------------------------------------------------------
# Compute the decision boundary
clf = classifiers[1]
xlim = (0.7, 1.35)
ylim = (-0.15, 0.4)

xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], 71),
                     np.linspace(ylim[0], ylim[1], 81))

Z = clf.predict_proba(np.c_[yy.ravel(), xx.ravel()])
Z = Z[:, 1].reshape(xx.shape)

#----------------------------------------------------------------------
# plot the results
fig = plt.figure(figsize=(5, 2.5))
fig.subplots_adjust(bottom=0.15,
                    top=0.95,
                    hspace=0.0,
                    left=0.1,
                    right=0.95,
                    wspace=0.2)

# left plot: data and decision boundary
ax = fig.add_subplot(121)
im = ax.scatter(X[-N_plot:, 1],
Beispiel #41
0
    #scores3 = np.empty((X_test.shape[0],6))
    #scores4 = np.empty((X_test.shape[0],6))
    #scores5 = np.empty((X_test.shape[0],6))

    downsample = 20
    # test SVM for 2 first subjects
    if subject in subjects:
        for i in range(6):
            print('Train subject %d, class %s' % (subject, cols[i]))
            #rf.fit(X_train[::downsample,:], y[::downsample,i])
            lda.fit(X_train[::, :], y[::, i])
            #lr.fit(X_train[::downsample,:], y[::downsample,i])
            #clf.fit(X_train[::downsample,:], y[::downsample,i])

            #scores1[:,i] = rf.predict_proba(X_test)[:,1]
            scores2[:, i] = lda.predict_proba(X_test)[:, 1]
            #scores3[:,i] = lr.predict_proba(X_test)[:,1]
            #scores4[:,i] = clf.predict_proba(X_test)[:,1]
            #scores5[:,i] = clf.predict(X_test)[:,1]

    #scores_tot1.append(scores1)
    scores_tot2.append(scores2)
    #scores_tot3.append(scores3)
    #scores_tot4.append(scores4)
    #scores_tot5.append(scores4)
    idx_tot.append(np.concatenate(idx))

#%%########### submission file ################################################
submission_file = 'models/model2_ds0_low2_band1_test1.csv'
# create pandas object for submission
submission = pd.DataFrame(index=np.concatenate(idx_tot),
    #scores3 = np.empty((X_test.shape[0],6))
    #scores4 = np.empty((X_test.shape[0],6))
    #scores5 = np.empty((X_test.shape[0],6))
    
    downsample = 20
    # test SVM for 2 first subjects
    if subject in subjects:
        for i in range(6):
            print('Train subject %d, class %s' % (subject, cols[i]))
            #rf.fit(X_train[::downsample,:], y[::downsample,i])
            lda.fit(X_train[::,:], y[::,i])
            #lr.fit(X_train[::downsample,:], y[::downsample,i])
            #clf.fit(X_train[::downsample,:], y[::downsample,i])  
           
            #scores1[:,i] = rf.predict_proba(X_test)[:,1]
            scores2[:,i] = lda.predict_proba(X_test)[:,1] 
            #scores3[:,i] = lr.predict_proba(X_test)[:,1]
            #scores4[:,i] = clf.predict_proba(X_test)[:,1]
            #scores5[:,i] = clf.predict(X_test)[:,1]

    #scores_tot1.append(scores1)
    scores_tot2.append(scores2)
    #scores_tot3.append(scores3)
    #scores_tot4.append(scores4)
    #scores_tot5.append(scores4)
    idx_tot.append(np.concatenate(idx))
    
#%%########### submission file ################################################
submission_file = 'models/model2_ds0_low2_band1_test1.csv'
# create pandas object for submission
submission = pd.DataFrame(index=np.concatenate(idx_tot),
Beispiel #43
0
def predict(train_X, train_y, test_X):
    clf = LDA()
    clf.fit(train_X, train_y)
    return clf.predict_proba(test_X)