Python LeaveOneOut Beispiele, sklearn.cross_validation.LeaveOneOut Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_cross_validation.py Projekt: jordicolomer/scikit-learn

def test_cross_val_generator_with_indices():
    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    y = np.array([1, 1, 2, 2])
    labels = np.array([1, 2, 3, 4])
    # explicitly passing indices value is deprecated
    loo = cval.LeaveOneOut(4)
    lpo = cval.LeavePOut(4, 2)
    kf = cval.KFold(4, 2)
    skf = cval.StratifiedKFold(y, 2)
    lolo = cval.LeaveOneLabelOut(labels)
    lopo = cval.LeavePLabelOut(labels, 2)
    ps = cval.PredefinedSplit([1, 1, 2, 2])
    ss = cval.ShuffleSplit(2)
    for cv in [loo, lpo, kf, skf, lolo, lopo, ss, ps]:
        for train, test in cv:
            assert_not_equal(np.asarray(train).dtype.kind, 'b')
            assert_not_equal(np.asarray(train).dtype.kind, 'b')
            X[train], X[test]
            y[train], y[test]

Beispiel #2

0

Datei anzeigen

def test_cross_val_generator_with_default_indices():
    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
    y = np.array([1, 1, 2, 2])
    labels = np.array([1, 2, 3, 4])
    loo = cval.LeaveOneOut(4)
    lpo = cval.LeavePOut(4, 2)
    kf = cval.KFold(4, 2)
    skf = cval.StratifiedKFold(y, 2)
    lolo = cval.LeaveOneLabelOut(labels)
    lopo = cval.LeavePLabelOut(labels, 2)
    b = cval.Bootstrap(2)  # only in index mode
    ss = cval.ShuffleSplit(2)
    ps = cval.PredefinedSplit([1, 1, 2, 2])
    for cv in [loo, lpo, kf, skf, lolo, lopo, b, ss, ps]:
        for train, test in cv:
            assert_not_equal(np.asarray(train).dtype.kind, 'b')
            assert_not_equal(np.asarray(train).dtype.kind, 'b')
            X[train], X[test]
            y[train], y[test]

Beispiel #3

0

Datei anzeigen

def _cross_val_score_loo_r0(lm, X, y):
    """
	mean_square_error metric is used from sklearn.metric.

	Return 
	--------
	The mean squared error values are returned. 
	"""

    if len(y.shape) == 1:
        y = np.array([y]).T

    kf = cross_validation.LeaveOneOut(y.shape[0])
    score_l = list()
    for tr, te in kf:
        lm.fit(X[tr, :], y[tr, :])
        yp = lm.predict(X[te, :])
        score_l.append(metrics.mean_squared_error(y[te, :], yp))

    return score_l

Beispiel #4

0

Datei anzeigen

 def LOOCV(self, labels, values, details=False):
     '''
     Performs leave one out cross validation.
     Takes a subset of the input data label_array and values to do the cross validation.
     RETURNS: array of length folds of cross validation scores,
     detailedResults is an array of length # of samples containing the predicted classes
     '''
     num_samples = values.shape[0]
     scores = np.zeros(num_samples)
     detailedResults = np.zeros(num_samples)
     # get training and testing set, train on training set, score on test set
     for train, test in cross_validation.LeaveOneOut(num_samples):
         values_test = values[test]
         label_test = labels[test]
         self.Train(labels[train], values[train], fout=None)
         scores[test] = self.classifier.score(values_test, label_test)
         if details:
             detailedResults[test] = self.Predict(values_test)
     if details:
         return scores, detailedResults
     return scores

Beispiel #5

0

Datei anzeigen

Datei: wx_gse_multi_class.py Projekt: ZhangHongBo2019/DearWXpub

    def do_LOOCV(all_x, all_y):
        loo = cross_validation.LeaveOneOut(len(all_x))
        tot_cnt = np.zeros(num_cls)
        hit_cnt = np.zeros(num_cls)

        cancer_prob = []
        labels = []
        cnt = 0
        for train_index, test_index in tqdm(loo):
            train_val_x, test_x = all_x[train_index], all_x[test_index]
            train_val_y, test_y = all_y[train_index], all_y[test_index]

            train_val_x, train_val_y = shuffle(train_val_x,
                                               train_val_y,
                                               random_state=RANDOM_STATE)
            n_trn = len(train_val_x)
            n_dev = int(n_trn * VAL_RATIO)
            n_trn = n_trn - n_dev
            train_x = train_val_x[0:n_trn]
            train_y = train_val_y[0:n_trn]
            val_x = train_val_x[n_trn:]
            val_y = train_val_y[n_trn:]

            prob = classifier_LOOCV(train_x,
                                    train_y,
                                    val_x,
                                    val_y,
                                    test_x,
                                    test_y,
                                    method_clf=method_clf,
                                    verbose=verbose,
                                    num_cls=num_cls)

            for i in range(0, num_cls):
                if test_y[0] == i:
                    tot_cnt[i] += 1
                    if np.argmax(prob) == i:
                        hit_cnt[i] += 1

        return (tot_cnt, hit_cnt)

Beispiel #6

0

Datei anzeigen

def LR_training_python(lrf, Y, verboseoutput):
    Y = Y.reshape((len(Y), ))
    loo = cross_validation.LeaveOneOut(len(Y))
    mae2 = 0
    errors2 = []
    for train_idx, test_idx in loo:
        f_train, f_test = lrf[train_idx], lrf[test_idx]
        Y_train, Y_test = Y[train_idx], Y[test_idx]
        if not np.any(np.isnan(f_train)) and np.all(np.isfinite(f_train)):
            r2 = linearRegression(f_train, Y_train)
            y2 = r2.predict(f_test)
            errors2.append(np.abs(Y_test - y2))
            if verboseoutput:
                print Y_test[0], y2[0]
        else:
            print 'nan or infinite'
            pass

    mae2 = np.mean(errors2)
    var2 = np.sqrt(np.var(errors2))
    mre2 = mae2 / Y.mean()
    return mae2, var2, mre2

Beispiel #7

0

Datei anzeigen

def validation(X,Y):
    repeats = 20
    metric_list = [] 
    parameters = {'estimator__n_estimators':np.arange(5,40)}
        
    for i in np.arange(repeats):
        # split train test
        X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, Y, test_size=0.2,random_state=i)   
        loocv = cross_validation.LeaveOneOut(n = len(X_train))
        kfold = cross_validation.KFold(n = len(X_train),n_folds = 6)
        cv = GridSearchCV(clf,param_grid=parameters,n_jobs=-1, cv=loocv)
        cv.fit(X_train, y_train)
        tuned_model = cv.best_estimator_
        model = tuned_model.fit(X_train,y_train)
        y_score = model.predict_proba(X_test)
        roc_auc = get_roc(y_test, y_score, y_train)
        roc_df = pd.DataFrame(roc_auc, index=['auc'])
        roc_df = pd.melt(roc_df, value_name='AUC',var_name='classes')
        roc_df['repeat'] = np.repeat(i,len(roc_df))
        metric_list.append(roc_df)
        print 'Trained %i times' %i
    return pd.concat(metric_list,axis=0)

Beispiel #8

0

Datei anzeigen

 def GetOptimalBandwidth(self,datalabel,bandlims,numbands):
     '''Optimize the bandwidth using leave-one-out cross-validation.  
     Essentially, for a single bandwidth, a PDF is made with all 
     points except one, and the unused point is tested against the model. 
     This is done many times, and an average error is computed.  This is
     done for each bandwidth, and the bandwidth with the lowest average
     error is returned.
     Example follows that at jakevdp.github.io/PythonDataScienceHandbook.
     Args
         datalabel: string
             string describing which datalabel in the dataframe to find
             the bandwidth for
         bandlims: array (length 2)
             limits to search for the optimal bandwidth in
         numbands: int
             ints 
     '''
     if bandlims[1] < 0 or bandlims[0] < 0:
         print("Bandwidth must be greater than zero")
         return
     bandwidths = np.linspace(bandlims[0],bandlims[1],numbands)
     data = self.df[datalabel]
     if isinstance(self.df[datalabel][0],np.ndarray):
         print("WERE IN HERE")
         data_arr = []
         for i in range(len(self.df[datalabel])):
             data_arr = data_arr + list(self.df[datalabel][0])
         data=np.array(data_arr)
     if len(data)>500:
         print("This may take some time depending on your data length.")
         print("numbands > 10 with len(data)>500 starts to take a bit")
     grid = sgs.GridSearchCV(skn.KernelDensity(kernel='gaussian'),
                         {'bandwidth': bandwidths},
                         cv=cv.LeaveOneOut(len(data)),
                         verbose=1)
     grid.fit(data[:,None])
     thebandwidth = grid.best_params_['bandwidth']
     return thebandwidth

Beispiel #9

0

Datei anzeigen

    def DoLOOCV(all_x, all_y):
        loo = cross_validation.LeaveOneOut(len(all_x))
        acc = []
        for train_index, test_index in loo:
            train_val_x, test_x = all_x[train_index], all_x[test_index]
            train_val_y, test_y = all_y[train_index], all_y[test_index]

            train_val_x, train_val_y = shuffle(train_val_x,
                                               train_val_y,
                                               random_state=RANDOM_STATE)
            n_trn = len(train_val_x)
            n_dev = int(n_trn * VAL_RATIO)
            n_trn = n_trn - n_dev
            train_x = train_val_x[0:n_trn]
            train_y = train_val_y[0:n_trn]
            val_x = train_val_x[n_trn:]
            val_y = train_val_y[n_trn:]

            is_correct = ClassifierLoocv(train_x, train_y, val_x, val_y,
                                         test_x, test_y)
            acc.append(is_correct)

        return acc

Beispiel #10

0

Datei anzeigen

def test_cross_val_predict():
    boston = load_boston()
    X, y = boston.data, boston.target
    cv = cval.KFold(len(boston.target))

    est = Ridge()

    # Naive loop (should be same as cross_val_predict):
    preds2 = np.zeros_like(y)
    for train, test in cv:
        est.fit(X[train], y[train])
        preds2[test] = est.predict(X[test])

    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_array_almost_equal(preds, preds2)

    preds = cval.cross_val_predict(est, X, y)
    assert_equal(len(preds), len(y))

    cv = cval.LeaveOneOut(len(y))
    preds = cval.cross_val_predict(est, X, y, cv=cv)
    assert_equal(len(preds), len(y))

    Xsp = X.copy()
    Xsp *= (Xsp > np.median(Xsp))
    Xsp = coo_matrix(Xsp)
    preds = cval.cross_val_predict(est, Xsp, y)
    assert_array_almost_equal(len(preds), len(y))

    preds = cval.cross_val_predict(KMeans(), X)
    assert_equal(len(preds), len(y))

    def bad_cv():
        for i in range(4):
            yield np.array([0, 1, 2, 3]), np.array([4, 5, 6, 7, 8])

    assert_raises(ValueError, cval.cross_val_predict, est, X, y, cv=bad_cv())

Beispiel #11

0

Datei anzeigen

def runOneOut():
    num_folds = 2
    num_instances = len(X)
    num_trees = 100
    loocv = cross_validation.LeaveOneOut(n=num_instances)
    model = LogisticRegression()
    #results = cross_validation.cross_val_score(model, X, Y.ravel(), cv=loocv,n_jobs=-1)
    #print("LogisticRegression Accuracy: %.3f%% (%.3f%%)") % (results.mean() * 100.0, results.std() * 100.0)

    model = DecisionTreeClassifier(max_depth=5)
    #results = cross_validation.cross_val_score(model, X, Y.ravel(), cv=loocv,n_jobs=-1)
    #print("Decision Tree Accuracy: %.3f%% (%.3f%%)") % (results.mean() * 100.0, results.std() * 100.0)

    #model = GradientBoostingClassifier(learning_rate=0.005,n_estimators=num_trees, random_state=seed,max_depth=5, min_samples_split=1600,min_samples_leaf=50, subsample=0.8)
    #results = cross_validation.cross_val_score(model, X, Y.ravel(), cv=loocv,n_jobs=-1)

    model = XGBClassifier()
    results = cross_validation.cross_val_score(model,
                                               X,
                                               Y.ravel(),
                                               cv=loocv,
                                               n_jobs=-1)
    print("Xboost Accuracy: %.3f%% (%.3f%%)") % (results.mean() * 100.0,
                                                 results.std() * 100.0)

Beispiel #12

0

Datei anzeigen

def use_grid_search_svm(traindata_features, traindata_labels, nfolds):
    X = traindata_features[:, 0:2]
    numobs = X.shape[0]

    hyperparam_grid = [{
        'C': np.logspace(-12, 12, 100, base=2),
        'gamma': np.logspace(-12, 12, 100, base=2)
    }]
    y = np.array(traindata_labels)

    #1. first shuffle the data
    inds = list(range(numobs))
    shuffle(inds)
    X = X[inds, :]
    y = y[inds]

    cv = cross_validation.LeaveOneOut(numobs)
    grid_searcher = grid_search.GridSearchCV(svm.SVC(kernel='rbf'),
                                             hyperparam_grid,
                                             cv=cv)
    grid_searcher.fit(X, y)

    print(grid_searcher.best_score_)
    return grid_searcher.best_estimator_

Beispiel #13

0

Datei anzeigen

def use_grid_search_LogisticRegression(traindata_features, traindata_labels,
                                       nfolds):
    print("in")
    X = traindata_features
    numobs = X.shape[0]

    hyperparam_grid = [{'C': np.logspace(-12, 12, 10, base=2)}]
    y = np.array(traindata_labels)
    print("in 1")
    # 1. first shuffle the data
    inds = list(range(numobs))
    shuffle(inds)
    X = X[inds, :]
    y = y[inds]

    print("in 2")
    cv = cross_validation.LeaveOneOut(numobs)
    grid_searcher = grid_search.GridSearchCV(linear_model.LogisticRegression(),
                                             hyperparam_grid,
                                             cv=cv)
    grid_searcher.fit(X, y)
    print("in 3")
    print(grid_searcher.best_score_)
    return grid_searcher.best_estimator_

Beispiel #14

0

Datei anzeigen

Datei: p22.py Projekt: ankitagarwal/magic

from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets, cross_validation

data = datasets.load_svmlight_file("a9a")
X = data[0]
print X.shape
y = data[1]

loo = cross_validation.LeaveOneOut(X.shape[0])
errors = []
# Train and find best k
# k = [1, 2, 5, 10, 20]
# for idx, k in enumerate(k):
#     score = 0
#     i = 0
#     for train_index, test_index in loo:
#         i += 1
#         X_train, X_test = X[train_index], X[test_index]
#         y_train, y_test = y[train_index], y[test_index]
#         clf = KNeighborsClassifier(k)
#         clf.fit(X_train, y_train)
#         score += clf.score(X_test, y_test)
#         if i % 1000 == 0:
#             print "Progress - i = " + str(i) + " k = " + str(k)
#     sc = score/float(X.shape[0])
#     print "Finale score for k = " + str(k) + " " + str(sc)
#     errors.append(1-sc)

clf = KNeighborsClassifier(20)
clf.fit(X, y)

Beispiel #15

0

Datei anzeigen

Datei: summary.py Projekt: simonalford42/icecuber-ARC

for i in inds:
    t = read('store/tmp/%d_err.txt'%i)
    if t:
        m,s = t.split('elapsed')[0].split(' ')[-1].split(':')
        y.append(float(m)*60+float(s))
        f = [float(i) for i in t.split('Features: ')[-1].split('\n')[0].split(' ')]
        p = []
        print(f, y[-1])
        for i in range(len(f)):
            for j in range(i):
                p.append(f[i]*f[j])
            p.append(f[i])
        p = [f[0], f[3], f[0]*f[3]]
        x.append(p)

"""loo = cross_validation.LeaveOneOut(len(y))
regr = linear_model.LinearRegression()
scores = cross_validation.cross_val_score(regr, x, y, scoring='neg_mean_squared_error', cv=loo,)
print(10**((-scores.mean())**.5))"""
model = linear_model.LinearRegression()
model.fit(x, y)
r_sq = model.score(x, y)

loo = cross_validation.LeaveOneOut(len(y))
scores = cross_validation.cross_val_score(model, x, y, scoring='neg_mean_squared_error', cv=loo,)
print(((-scores.mean())**.5))

print('coefficient of determination:', r_sq)
print('intercept:', model.intercept_)
print('slope:', model.coef_)

Beispiel #16

0

Datei anzeigen

Datei: test.py Projekt: giordamaug/WisardClassifier-C_lists

B_enabled = True
# IRIS (arff) - load datasets
data, meta = arff.loadarff(open("datasets/iris.arff", "r"))
y_train = np.array(data['class'])
X_train = np.array([list(x) for x in data[meta._attrnames[0:-1]]])
X_train = X_train.toarray() if sps.issparse(
    X_train) else X_train  # avoid sparse data
class_names = np.unique(y_train)
# IRIS (arff) - cross validation example
clf = WisardClassifier(nobits=16,
                       bleaching=B_enabled,
                       notics=256,
                       mapping='linear',
                       debug=True,
                       default_bleaching=3)
kf = cross_validation.LeaveOneOut(len(class_names))
predicted = cross_validation.cross_val_score(clf,
                                             X_train,
                                             y_train,
                                             cv=kf,
                                             n_jobs=1)
print("Accuracy Avg: %.2f" % predicted.mean())

# IRIS (libsvm) - load datasets
X_train, y_train = load_svmlight_file(open("datasets/iris.libsvm", "r"))
class_names = np.unique(y_train)
X_train = X_train.toarray() if sps.issparse(
    X_train) else X_train  # avoid sparse data
# IRIS - cross validation example (with fixed seed)
clf = WisardClassifier(nobits=16,
                       notics=1024,

Beispiel #17

0

Datei anzeigen

plot(np.arange(1, kMax), scoreK)
xlabel('k')
ylabel('accuracy')
title('k-NN 5-fold cross validation accuracy vs k')
grid(True)
savefig('q1b.png')
print('q1b done')

#### q1a : leave one out accuracy

#take a random n size sample from the dataset
n = 10000
sampleIndices = np.random.randint(N, size=n)
X = X[sampleIndices, :]
y = y[sampleIndices]
loo = cross_validation.LeaveOneOut(n)

scoreK = []
for k in range(1, kMax):
    neigh = KNeighborsClassifier(n_neighbors=k)
    scores = cross_validation.cross_val_score(neigh, X, y, cv=loo)
    scoreK.append(np.mean(scores))

print(scoreK)
plot(np.arange(1, kMax), scoreK)
xlabel('k')
ylabel('accuracy')
title('k-NN leave one out accuracy vs k')
grid(True)
savefig('q1a.png')
print('q1a done')

Beispiel #18

0

Datei anzeigen

Datei: siml_classifier.py Projekt: bnhalder/basic_ml

    chistogram.append(chist(imc))
    labels.append(fname[:-len('xx.jpg')])

print('Finished computing features.')

haralick = np.array(haralick)
chistogram = np.array(chistogram)
labels = np.array(labels)

haralick_plus_chist = np.hstack([chistogram, haralick])

clf = Pipeline([('preproc', StandardScaler()),
                ('classifier', LogisticRegression())])

from sklearn import cross_validation
cv = cross_validation.LeaveOneOut(len(images))
scores = cross_validation.cross_val_score(clf, haralick, labels, cv=cv)
print(
    'Accuracy (Leave-one-out) with Logistic Regression [haralick features]: {:.1%}'
    .format(scores.mean()))

scores = cross_validation.cross_val_score(clf, chistogram, labels, cv=cv)
print(
    'Accuracy (Leave-one-out) with Logistic Regression [color histograms]: {:.1%}'
    .format(scores.mean()))

scores = cross_validation.cross_val_score(clf,
                                          haralick_plus_chist,
                                          labels,
                                          cv=cv)
print(

Beispiel #19

0

Datei anzeigen

Datei: Models.py Projekt: guozihao1020/Stock-Prediction-Time-Series-Analysis-Python

def RF_Model(Scaled_Input_Data, Output_Data):
    T0 = time.time()
    n = len(Scaled_Input_Data)
    RFModel = RandomForestRegressor()
    RFModel.fit(Scaled_Input_Data, Output_Data)
    RF_Time = time.time() - T0
    print('The computational time of Random Forest Regression for ', n, ' examples is: ', RF_Time)
    MSEs_RF = cross_validation.cross_val_score(RFModel, Scaled_Input_Data, Output_Data, cv=cross_validation.LeaveOneOut(n), scoring="mean_absolute_error")
    MeanMSE_RF = np.mean(list(MSEs_RF))
    print('The average MSE of Random Forest Regression for ', n, ' examples is: ', (-1*MeanMSE_RF))
    return(MeanMSE_RF, RFModel)

Beispiel #20

0

Datei anzeigen

words = [k for k,v in filtered_word_dict.iteritems()]

wiki_requests_data = np.zeros((len(binary_wiki_requests),len(words)))
wiki_requests_target = np.zeros(2178)

request_count = 0
for request in binary_wiki_requests:
    for word in request['words']:
        if word in words:
            idx = words.index(word)
            wiki_requests_data[request_count][idx] += 1
    wiki_requests_target[request_count] = request['class']
    request_count += 1

loo = cross_validation.LeaveOneOut(len(binary_wiki_requests))
print len(loo)
print len(binary_wiki_requests)

accuracy = 0.0
count = 0

for train_index, test_index in loo:
    X_train, X_test = wiki_requests_data[train_index], wiki_requests_data[test_index]
    y_train, y_test = wiki_requests_target[train_index], wiki_requests_target[test_index]

    clf = svm.LinearSVC()
    clf.fit(X_train,y_train)

    prediction = clf.predict(X_test)

Beispiel #21

0

Datei anzeigen

Datei: testing.py Projekt: pomaranczki/orange3

 def setup_indices(self, train_data, test_data):
     self.indices = skl_cross_validation.LeaveOneOut(len(test_data))

Beispiel #22

0

Datei anzeigen

def region_CV_fits_and_errors(X,
                              Y,
                              P_X,
                              P_Y,
                              P_Y_dag,
                              err_fun,
                              Omega=None,
                              rel_type=2):
    n_inj = X.shape[1]
    outer_sets = cross_validation.LeaveOneOut(n_inj)
    err_reg = np.zeros((len(outer_sets), ))
    err_homog = np.zeros((len(outer_sets), ))
    rel_err_reg = np.zeros((len(outer_sets), ))
    rel_err_homog = np.zeros((len(outer_sets), ))
    GOF_reg = np.zeros((len(outer_sets), ))
    GOF_homog = np.zeros((len(outer_sets), ))
    for i, (train, test) in enumerate(outer_sets):
        # compare models in outer sets only, same as eventual test errors in the
        # nested cross-validation procedure
        X_train = X[:, train]
        X_test = X[:, test]
        Y_train = Y[:, train]
        Y_test = Y[:, test]
        if Omega is not None:
            Omega_train = Omega[:, train]
            Omega_test = Omega[:, test]
            # W = fit_linear_model_proj(X_train, Y_train, P_Y_dag, P_X,
            #                           Omega_train)
            W = fit_linear_model(P_X.dot(X_train), P_Y.dot(Y_train))
        else:
            W = fit_linear_model(P_X.dot(X_train), P_Y.dot(Y_train))
        # W = fit_linear_model(P_X.dot(X_train), P_Y.dot(Y_train))
        Y_pred = W.dot(P_X.dot(X_test))
        Y_pred_homog = P_Y_dag.dot(Y_pred)
        Y_test_reg = P_Y.dot(Y_test)
        resid_reg = Y_pred - Y_test_reg  # regional matrix
        resid_homog = Y_pred_homog - Y_test  # voxel-homogeneous matrix
        err_reg[i] = err_fun(resid_reg)
        if Omega is not None:
            err_homog[i] = err_fun(proj_Omega(resid_homog, Omega_test))
        else:
            err_homog[i] = err_fun(resid_homog)
        if rel_type == 1:
            rel_err_reg[i] = err_reg[i] / err_fun(Y_test_reg)
            rel_err_homog[i] = err_homog[i] / err_fun(Y_test)
            GOF_reg[i] = err_fun(W.dot(P_X.dot(X_train))-P_Y.dot(Y_train))/\
              err_fun(P_Y.dot(Y_train))
            GOF_homog[i] = err_fun(P_Y_dag.dot(W.dot(P_X.dot(X_train)))-Y_train)/\
              err_fun(Y_train)
        elif rel_type == 2:
            rel_err_reg[i] = \
              2*err_reg[i] / (err_fun(Y_test_reg) + err_fun(Y_pred))
            GOF_reg[i] = 2*err_fun(W.dot(P_X.dot(X_train))-P_Y.dot(Y_train))/\
              (err_fun(P_Y.dot(Y_train))+err_fun(W.dot(P_X.dot(X_train))))
            if (Omega is not None) and proj_errors:
                rel_err_homog[i] = 2*err_homog[i] / \
                  (err_fun(proj_Omega(Y_test, Omega_test))+
                   err_fun(proj_Omega(Y_pred_homog, Omega_test)))
                GOF_homog[i] = \
                  2*err_fun(P_Y_dag.dot(W.dot(P_X.dot(X_train)))-Y_train) / \
                  (err_fun(proj_Omega(Y_train, Omega_train)) +
                   err_fun(proj_Omega(P_Y_dag.dot(W.dot(P_X.dot(X_train))),
                                      Omega_train)))
            else:
                rel_err_homog[i] = 2*err_homog[i]/\
                  (err_fun(Y_test) + err_fun(Y_pred_homog))
                GOF_homog[i] = \
                  2*err_fun(P_Y_dag.dot(W.dot(P_X.dot(X_train)))-Y_train)/\
                  (err_fun(Y_train) +
                   err_fun(P_Y_dag.dot(W.dot(P_X.dot(X_train)))))
        #if i == 2:
        #    import pdb
        #    pdb.set_trace()
    return (err_reg, err_homog, rel_err_reg, rel_err_homog, GOF_reg, GOF_homog)

Beispiel #23

0

Datei anzeigen

Datei: prediction.py Projekt: yassinebha/Proteus

def compute_acc_conf(x,
                     y,
                     confounds,
                     verbose=False,
                     balanced=True,
                     loo=False,
                     optimize=True,
                     C=.01):
    encoder = preprocessing.LabelEncoder()
    encoder.fit(y)

    # remove intra matrix mean and var
    #x = ts.normalize_data(x)
    #cv = cross_validation.KFold(len(y),n_folds=10)
    if loo:
        cv = cross_validation.LeaveOneOut(len(y))
    else:
        cv = StratifiedKFold(y=encoder.transform(y), n_folds=10)

    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    all_tpr = []

    total_test_score = []
    y_pred = []
    #clf_array = []
    bc_all = []

    prec = []
    recall = []

    if len(np.unique(y)) == 1:
        print 'Unique class: 100%', np.sum(encoder.transform(y) == 0) / len(y)
        return (1., 0., len(y))

    for i, (train, test) in enumerate(cv):

        select_x = x.copy()

        #betacluster = bc.BetaCluster(crm.transform(confounds[train,:],select_x[train,:]),encoder.transform(y[train]),100,k_feature=200)
        #bc_all.append(betacluster)

        if balanced:
            clf = SVC(kernel='linear', class_weight='auto', C=C)
        else:
            clf = SVC(kernel='linear', C=C)

        if len(confounds) == 0:
            xtrain = select_x[train, :]
            xtest = select_x[test, :]
        else:
            crm = ConfoundsRm(confounds[train, :], select_x[train, :])
            xtrain = crm.transform(confounds[train, :], select_x[train, :])
            xtest = crm.transform(confounds[test, :], select_x[test, :])

        ytrain = encoder.transform(y[train])
        ytest = encoder.transform(y[test])

        #clf.probability = True
        if optimize:
            clf, score = plib.grid_search(clf,
                                          xtrain,
                                          ytrain,
                                          n_folds=10,
                                          verbose=verbose)

        clf.fit(xtrain, ytrain)
        total_test_score.append(clf.score(xtest, ytest))
        #clf_array.append(clf)

        prec.append(metrics.precision_score(ytest, clf.predict(xtest)))
        recall.append(metrics.recall_score(ytest, clf.predict(xtest)))

        if loo:
            y_pred.append(clf.predict(xtest))
        if verbose:
            print('nSupport: ', clf.n_support_)
            print "Train:", clf.score(xtrain, ytrain)
            print "Test :", clf.score(xtest, ytest)
            print "Prediction :", clf.predict(xtest)
            print "Real Labels:", ytest
            print('Precision:', prec[-1], 'Recall:', recall[-1])

    if loo:
        total_std_test_score = estimate_std(
            metrics.accuracy_score(encoder.transform(y), np.array(y_pred)),
            len(y))
        print('Mean:', np.mean(total_test_score), 'Std:', total_std_test_score,
              'AvgPrecision:', np.mean(prec), 'AvgRecall:', np.mean(recall))
        return (np.mean(total_test_score), total_std_test_score, len(y))
    else:
        print('Mean:', np.mean(total_test_score), 'Std:',
              np.std(total_test_score), 'AvgPrecision:', np.mean(prec),
              'AvgRecall:', np.mean(recall))
        return (np.mean(total_test_score), np.std(total_test_score))

Beispiel #24

0

Datei anzeigen

Datei: Models.py Projekt: guozihao1020/Stock-Prediction-Time-Series-Analysis-Python

def First_Model_SVR(Scaled_Input_Data, Output_Data):
    T0 = time.time()
    n = len(Scaled_Input_Data)
    Grid_Dict = {"C": [1e-2, 1e-1,1e0, 1e1, 1e2],"gamma": np.logspace(-4, 2, 6)}
    svr_Tuned = GridSearchCV(SVR(kernel='rbf', gamma=0.1, tol = 0.005), cv=5,param_grid=Grid_Dict, scoring="mean_absolute_error")
    svr_Tuned.fit(Scaled_Input_Data, Output_Data)
    SVR_MSE = SVR(kernel='rbf', C=svr_Tuned.best_params_['C'], gamma=svr_Tuned.best_params_['gamma'], tol = 0.01)
    SVR_Time = time.time() - T0
    print('The computational time of Radial based Support Vector Regression for ', n, ' examples is: ', SVR_Time)
    MSEs_SVR = cross_validation.cross_val_score(SVR_MSE, Scaled_Input_Data, Output_Data, cv=cross_validation.LeaveOneOut(n), scoring="mean_absolute_error")
    MeanMSE_SVR = np.mean(list(MSEs_SVR))
    print('The average MSE of Radial based Support Vector Regression for ', n, ' examples is: ', (-1*MeanMSE_SVR))
    return(MeanMSE_SVR, svr_Tuned)

Beispiel #25

0

Datei anzeigen

def tenFoldCV_onChicagoCrimeData(features=['corina'],
                                 CVmethod='10Fold',
                                 P=10,
                                 NUM_ITER=20,
                                 SHUFFLE=True):
    """
    Use different years data to train the NB model
    """
    YEARS = range(2003, 2014)

    Y = []
    C = []
    FL = []
    GL = []
    T = []
    for year in YEARS:
        W = generate_transition_SocialLag(year, lehd_type=0)
        Yhat = retrieve_crime_count(year - 1, ['total'])
        y = retrieve_crime_count(year, ['total'])
        c = generate_corina_features()
        popul = c[1][:, 0].reshape((77, 1))

        # crime count is normalized by the total population as crime rate
        # here we use the crime count per 10 thousand residents
        y = np.divide(y, popul) * 10000
        Yhat = np.divide(Yhat, popul) * 10000

        W2 = generate_geographical_SpatialLag_ca()

        f1 = np.dot(W, Yhat)
        f2 = np.dot(W2, Yhat)

        FL.append(f1)
        GL.append(f2)
        Y.append(y)
        T.append(Yhat)
        C.append(c[1])

    Y = np.concatenate(Y, axis=0)
    columnName = ['intercept']
    f = np.ones(Y.shape)
    if 'corina' in features:
        C = np.concatenate(C, axis=0)
        f = np.concatenate((f, C), axis=1)
        columnName += c[0]
    if 'sociallag' in features:
        FL = np.concatenate(FL, axis=0)
        f = np.concatenate((f, FL), axis=1)
        columnName += ['sociallag']
    if 'spatiallag' in features:
        GL = np.concatenate(GL, axis=0)
        f = np.concatenate((f, GL), axis=1)
        columnName += ['spatiallag']
    if 'temporallag' in features:
        T = np.concatenate(T, axis=0)
        f = np.concatenate((f, T), axis=1)
        columnName += ['temporallag']

    if SHUFFLE:
        f, Y = shuffle(f, Y)

    if CVmethod == '10Fold':
        splt = cross_validation.KFold(n=f.shape[0], n_folds=10, shuffle=True)
    elif CVmethod == 'leaveOneOut':
        splt = cross_validation.LeaveOneOut(n=f.shape[0])
    elif CVmethod == 'leavePOut':
        splt = cross_validation.LeavePOut(n=f.shape[0], p=P)

    mae1 = []
    mae2 = []
    mre1 = []
    mre2 = []
    sd_mae1 = []
    sd_mae2 = []
    sd_mre1 = []
    sd_mre2 = []
    med_mae1 = []
    med_mae2 = []
    med_mre1 = []
    med_mre2 = []
    cnt = 0

    if CVmethod == 'leaveOneOut':
        y_gnd = []
        y_lr = []

    for train_idx, test_idx in splt:
        cnt += 1
        if cnt > NUM_ITER:
            break
        f_train, f_test = f[train_idx, :], f[test_idx, :]
        Y_train, Y_test = Y[train_idx, :], Y[test_idx, :]

        # write file for invoking NB regression in R
        np.savetxt("Y_train.csv", Y_train, delimiter=",")
        np.savetxt("Y_test.csv", Y_test, delimiter=",")
        pd.DataFrame(f_train, columns=columnName).to_csv("f_train.csv",
                                                         sep=",",
                                                         index=False)
        pd.DataFrame(f_test, columns=columnName).to_csv("f_test.csv",
                                                        sep=",",
                                                        index=False)

        # NB regression
        nbres = subprocess.check_output(['Rscript',
                                         'nbr_eval_kfold.R']).split(" ")
        y1 = np.array([float(e) for e in nbres])
        y1 = y1.reshape((y1.shape[0], 1))
        a = np.abs(Y_test - y1)

        mae1.append(np.mean(a))
        sd_mae1.append(np.std(a))
        med_mae1 += a.tolist()
        r = a / Y_test
        mre1.append(np.mean(r))
        sd_mre1.append(np.std(r))
        med_mre1 += r.tolist()

        # Linear regression
        r2 = linearRegression(f_train, Y_train)
        y2 = r2.predict(f_test)
        y2 = y2.reshape((y2.shape[0], 1))
        ae = np.abs(Y_test - y2)
        mae2.append(np.mean(ae))
        sd_mae2.append(np.std(ae))
        med_mae2 += ae.tolist()
        re = ae / Y_test
        mre2.append(np.mean(re))
        sd_mre2.append(np.std(re))
        med_mre2 += re.tolist()

        if CVmethod == 'leaveOneOut':
            y_gnd.append(Y_test)
            y_lr.append(y2)

    if CVmethod == 'leaveOneOut':
        print np.mean(mae1), np.median(mae1), np.mean(mre1), np.median(mre1),
        print np.mean(mae2), np.median(mae2), np.mean(mre2), np.median(mre2)
        return y_gnd, y_lr
    else:
        print np.mean(mae1), np.mean(sd_mae1), np.median(med_mae1), np.mean(
            mre1), np.mean(sd_mre1), np.median(med_mre1),
        print np.mean(mae2), np.mean(sd_mae2), np.median(med_mae2), np.mean(
            mre2), np.mean(sd_mre2), np.median(med_mre2)

    return mae1, mae2

Beispiel #26

0

Datei anzeigen

Datei: ex7_1_2.py Projekt: kazyka/Introduction-To-Machine-Learning-And-Data-Mining

# exercise 7.1.2

from pylab import *
from scipy.io import loadmat
from sklearn.neighbors import KNeighborsClassifier
from sklearn import cross_validation

# requires data from exercise 4.1.1
from ex4_1_1 import *

# Maximum number of neighbors
L=40

CV = cross_validation.LeaveOneOut(N)
errors = np.zeros((N,L))
i=0
for train_index, test_index in CV:
    print('Crossvalidation fold: {0}/{1}'.format(i+1,N))

    # extract training and test set for current CV fold
    X_train = X[train_index,:]
    y_train = y[train_index,:]
    X_test = X[test_index,:]
    y_test = y[test_index,:]

    # Fit classifier and classify the test points (consider 1 to 40 neighbors)
    for l in range(1,L+1):
        knclassifier = KNeighborsClassifier(n_neighbors=l);
        knclassifier.fit(X_train, ravel(y_train));
        y_est = knclassifier.predict(X_test);
        errors[i,l-1] = np.sum(y_est[0]!=y_test[0,0])

Beispiel #27

0

Datei anzeigen

Datei: find_parameters.py Projekt: sguthrie/tiling-classifier

valid_param_vals = []
print "Checking for errors for classifier %s"  % (classifier_to_print)
print "Params: %s" % (classifier_varying_params)
for param_val in classifier_varying_params[key]:
    try:
        alt_classifier = base.clone(classifier)
        alt_classifier = alt_classifier.set_params(**{key:param_val})
        alt_classifier.fit(population, training_labels)
        valid_param_vals.append(param_val)
    except ValueError as e:
        info_fh.write("The parameter %s:%s for SVM %s for antigen %s errored: '%s'\n" % (key, param_val, classifier_to_print, antigen_type, e))
if len(valid_param_vals) > 0:
    try:
        print "Running grid for classifier %s"  % (classifier_to_print)
        print "Params: %s" % (classifier_varying_params)
        cv = cross_validation.LeaveOneOut(len(training_labels))
        grid = grid_search.GridSearchCV(classifier, {key:valid_param_vals}, cv=cv,, refit=False, verbose=3)
        t0 = time.time()
        grid.fit(population, training_labels)
        t1 = time.time()
        time_logging_fh.write('Fitting grid with SVM %s for antigen %s took %f seconds\n' % (classifier_to_print, antigen_type, t1-t0))
        print "The best parameters for antigen %s, SVM %s are:"  % (antigen_type, classifier_to_print)
        print "%s with a score of %f" % (grid.best_params_, grid.best_score_)
        print "Grid scores:"
        for thing in grid.grid_scores_:
            print thing
        info_fh.write("The best parameters for SVM %s for antigen %s are %s with a score of %f\n" % (classifier_to_print, antigen_type, grid.best_params_, grid.best_score_))
        info_fh.write("Grid scores:\n")
        info_fh.write("%s\n" % (grid.grid_scores_))
        plt.figure()
        plt.errorbar(

Beispiel #28

0

Datei anzeigen

Datei: 3 Evaluate using Leave One Out Cross Validation.py Projekt: SumanthReddyKaliki/Machine-Learning-With-Python

import pandas
from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression
url = "https://goo.gl/vhm1eU"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
num_folds = 10
num_instances = len(X)
loocv = cross_validation.LeaveOneOut(n=num_instances)
model = LogisticRegression()
results = cross_validation.cross_val_score(model, X, Y, cv=loocv)
print("Accuracy: %.3f%% (%.3f%%)") % (results.mean()*100.0, results.std()*100.0)# -*- coding: utf-8 -*-

Beispiel #29

0

Datei anzeigen

Datei: benchmark.py Projekt: franloza/mathsML

def benchmark(df):

    predictors = [
        "LinearRegression", "Lasso", "AdaBoostRegressor",
        "RandomForestRegressor", "DecisionTreeRegressor"
    ]
    predictorsMapper = {
        'LinearRegression': linear_model.LinearRegression(),
        'Lasso': linear_model.Lasso(alpha=0.1, max_iter=1000),
        'AdaBoostRegressor': ensemble.AdaBoostRegressor(),
        'RandomForestRegressor': ensemble.RandomForestRegressor(),
        'DecisionTreeRegressor': tree.DecisionTreeRegressor()
    }

    #Separate data by operators
    sumData = df[(df.Operator == 1)]
    sumTarget = sumData.Time
    sumData = sumData.drop(sumData.columns[[1, 3]], axis=1)

    subData = df[(df.Operator == 2)]
    subTarget = subData.Time
    subData = subData.drop(subData.columns[[1, 3]], axis=1)

    mulData = df[(df.Operator == 3)]
    mulTarget = mulData.Time
    mulData = mulData.drop(mulData.columns[[1, 3]], axis=1)

    divData = df[(df.Operator == 4)]
    divTarget = divData.Time
    divData = divData.drop(divData.columns[[1, 3]], axis=1)

    sumLoo = cross_validation.LeaveOneOut(len(sumTarget))
    subLoo = cross_validation.LeaveOneOut(len(subTarget))
    mulLoo = cross_validation.LeaveOneOut(len(mulTarget))
    divLoo = cross_validation.LeaveOneOut(len(divTarget))

    for p in predictors:
        print("Benchmarking " + p + "...")
        scoreTotal = 0
        sumRegr = predictorsMapper.get(p, False)
        subRegr = predictorsMapper.get(p, False)
        mulRegr = predictorsMapper.get(p, False)
        divRegr = predictorsMapper.get(p, False)
        scoreSum = abs(
            cross_validation.cross_val_score(sumRegr,
                                             sumData,
                                             sumTarget,
                                             scoring='mean_squared_error',
                                             cv=sumLoo).mean())
        scoreSub = abs(
            cross_validation.cross_val_score(subRegr,
                                             subData,
                                             subTarget,
                                             scoring='mean_squared_error',
                                             cv=subLoo).mean())
        scoreMul = abs(
            cross_validation.cross_val_score(mulRegr,
                                             mulData,
                                             mulTarget,
                                             scoring='mean_squared_error',
                                             cv=mulLoo).mean())
        scoreDiv = abs(
            cross_validation.cross_val_score(divRegr,
                                             divData,
                                             divTarget,
                                             scoring='mean_squared_error',
                                             cv=divLoo).mean())
        scoreTotal = scoreSum + scoreSub + scoreMul + scoreDiv
        print("Mean Squared Error (by operator):")
        print("\tSum regressor: " + str(scoreSum))
        print("\tSubstraction regressor: " + str(scoreSub))
        print("\tMultiplication regressor: " + str(scoreMul))
        print("\tDivision regressor: " + str(scoreDiv))
        print("\tTotal: " + str(scoreTotal))

Beispiel #30

0

Datei anzeigen

Datei: Models.py Projekt: guozihao1020/Stock-Prediction-Time-Series-Analysis-Python

def Second_Model_KRR(Scaled_Input_Data, Output_Data):
    T0 = time.time()
    n = len(Scaled_Input_Data)
    Grid_Dict = {"alpha": [1e0, 1e-1, 1e-2],"gamma": np.logspace(-2, 1, 3)}
    krr_Tuned = GridSearchCV(KernelRidge(kernel='rbf', gamma=0.1), cv=5 ,param_grid=Grid_Dict, scoring="mean_absolute_error")
    krr_Tuned.fit(Scaled_Input_Data, Output_Data)
    KRR_MSE = KernelRidge(kernel='rbf', alpha=krr_Tuned.best_params_['alpha'], gamma=krr_Tuned.best_params_['gamma'])
    KRR_Time = time.time() - T0
    print('The computational time of Kernel Ridge Regression for ', n, ' examples is: ', KRR_Time)
    MSEs_KRR = cross_validation.cross_val_score(KRR_MSE, Scaled_Input_Data, Output_Data, cv=cross_validation.LeaveOneOut(n), scoring="mean_absolute_error")
    MeanMSE_KRR = np.mean(list(MSEs_KRR))
    print('The average MSE of Kernel Ridge Regression for ', n, ' examples is: ', (-1*MeanMSE_KRR))
    return(MeanMSE_KRR, krr_Tuned)