def nestedCVSVM(features, labels, classifier_name, normal=None,plot=True, rbf=False): looOuter= LeaveOneOut(len(labels)) poolOuter=numpy.zeros((len(labels), 2)) Cs=numpy.zeros((len(labels))) normal_features=features if(normal=="log"): normal_features=numpy.log(normal_features) if(normal=="scaled"): scaler=preprocessing.StandardScaler() normal_features=scaler.fit_transform(normal_features) #How good is the method in the outer loop (LR) at predicting cancer? for i, (trainOuter, testOuter) in enumerate(looOuter): outerFeaturesTrain=normal_features[trainOuter] outerLabelsTrain=labels[trainOuter] #What is the lr model with the best hyperparameter settings to predict the #test sample from the training samples? best_auc=0 best_c=0 lessThanOneC=numpy.arange(0.01,1.0,0.01) greaterThanOneC=numpy.arange(1,101,1) for innerC in numpy.nditer(numpy.concatenate((lessThanOneC,greaterThanOneC))): #How good is the model with this hyperparameter? looInner=LeaveOneOut(len(outerLabelsTrain)) poolInner=numpy.zeros((len(outerLabelsTrain), 2)) for j, (trainInner, testInner) in enumerate(looInner): innerFeaturesTrain=outerFeaturesTrain[trainInner] innerLabelsTrain=outerLabelsTrain[trainInner] innerModel=svm.LinearSVC(penalty="l1", dual=False,C=float(innerC)) if rbf==True: innerModel=svm.SVC(kernel='rbf',C=float(innerC)) innerModel.fit(innerFeaturesTrain,innerLabelsTrain) dfInner = innerModel.decision_function(outerFeaturesTrain[testInner]) poolInner[j,0]=outerLabelsTrain[testInner] poolInner[j,1]=dfInner[0] fpr, tpr, thresholds = roc_curve(poolInner[:,0], poolInner[:,1]) roc_auc = auc(fpr, tpr) if(roc_auc>best_auc): best_auc=roc_auc best_c=float(innerC) print( "C chosen for " + str(i)+ ": "+str(best_c) ) Cs[i]=best_c bestCModel=svm.LinearSVC(penalty="l1", dual=False,C=best_c) if rbf==True: bestCModel=svm.SVC(kernel='rbf',C=best_c) bestCModel.fit(outerFeaturesTrain, outerLabelsTrain) dfOuter = bestCModel.decision_function(normal_features[testOuter]) poolOuter[i,0]=labels[testOuter] poolOuter[i,1]=dfOuter[0] fpr, tpr, thresholds = roc_curve(poolOuter[:,0], poolOuter[:,1]) roc_auc = auc(fpr, tpr) plotROC(fpr, tpr,roc_auc, classifier_name,plot) return Cs
def split_data(size, cv_model_name): # http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.LeaveOneOut.html#sklearn.cross_validation.LeaveOneOut if cv_model_name == 'loo': from sklearn.cross_validation import LeaveOneOut cv_model = LeaveOneOut(size) # http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.KFold.html#sklearn.cross_validation.KFold if cv_model_name == 'kfold': from sklearn.cross_validation import KFold cv_model = KFold(size, 10) # http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.LeavePOut.html#sklearn.cross_validation.LeavePOut if cv_model_name == 'lpo': from sklearn.cross_validation import LeavePOut cv_model = LeavePOut(size, 2) # The folowwing 3 cross validation models # when learning a model *for each subj* leave a clip out (all it's sub-segments), 18 if cv_model_name == 'LeaveOneClipOutForEachSubject': # This one also works for MODEL FOR EACH SUBJ (leaving a whole clip out) cv_model = [] full_arr = np.array([i for i in range(size)]) full_test = np.array_split(full_arr, NUM_CLIPS) for clip in range(NUM_CLIPS): test = full_test[clip] train = np.setdiff1d(full_arr, test) cv_model.append((train, test)) # EASIEST: when learning a model *over all subj* leave a clip out (all it's sub-segments), 468 if cv_model_name == 'LeaveOneClipOutForAllSubject': cv_model = [] full_arr = np.array([i for i in range(size)]) full_test = np.array_split(full_arr, NUM_CLIPS*len(dictionaries.SUBJECTS_IDS)) for clip in range(NUM_CLIPS*len(dictionaries.SUBJECTS_IDS)): test = full_test[clip] train = np.setdiff1d(full_arr, test) cv_model.append((train, test)) # when learning a model *over all subj* leave one subj out, 26 # assuming number of clips is 18 if cv_model_name == 'LeaveOneSubjOut': cv_model = [] full_arr = np.array([i for i in range(size)]) full_test = np.array_split(full_arr, len(dictionaries.SUBJECTS_IDS)) for subj in range(len(dictionaries.SUBJECTS_IDS)): test = full_test[subj] train = np.setdiff1d(full_arr, test) cv_model.append((train, test)) return cv_model
def fit(self): df = pd.read_csv('Datasetnew.csv',header=None) h=np.asarray(df) dataset = np.nan_to_num(h) XX = dataset[:,1:65] y = dataset[:,0] X = preprocessing.normalize(XX) loo = LeaveOneOut(len(y)) correct_1 = 0 correct_0 = 0 wrong = 0 for train, test in loo: X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[test] clf = GridSearchCV(estimator=SVC(), param_grid=parameter_candidates, n_jobs=-1) clf.fit(X_train, y_train) predict = clf.predict(X_test) cnf_matrix_mnb = confusion_matrix(y_test, predict) if (predict == 1 and y_test ==1): correct_1 = correct_1 + 1 elif(predict == 0 and y_test == 0): correct_0 = correct_0 + 1 else: wrong = wrong + 1 print() print("correct_1 %s" %correct_1) print("correct_0 %s" %correct_0) print("wrong %s" %wrong)
def get_cv_method(targets, cvmethod='10', stratified=True): ''' Create cross-validation class Input: targets : class labels set in the same order as in X cvmethod : string of a number or number for a K-fold method, 'loo' for LeaveOneOut stratified: boolean indicating whether to use a Stratified K-fold approach Output: cv: Returns a class from sklearn.cross_validation ''' #cross-validation n = len(targets) if stratified: if isinstance(cvmethod, int): return StratifiedKFold(targets, cvmethod) elif isinstance(cvmethod, str): if cvmethod.isdigit(): return StratifiedKFold(targets, int(cvmethod)) else: if isinstance(cvmethod, int): return KFold(n, cvmethod) elif isinstance(cvmethod, str): if cvmethod.isdigit(): return KFold(n, int(cvmethod)) if cvmethod == 'loo': return LeaveOneOut(n) return StratifiedKFold(targets, int(cvmethod))
def update_estimation(self): for (apps, usage) in self.cluster.apps_usage(): if len(apps) > 0 and usage.is_not_idle(): for rest, out in LeaveOneOut(len(apps)): self.estimation.update_app(apps[out[0]], [apps[i] for i in rest], usage.rate()) if self.print_estimation: self.estimation.print()
def evalDatasets(trainSets): for inData in evalSets: seqs, vecs, labels = parseAndBinarize(inData) ggRec, ggPrec, ggF1, gcRec, gcPrec, gcF1 = rulePredScores(vecs, labels) cVal = LeaveOneOut(len(labels)) cValPreds = [] cValTests = [] for train, test in cVal: X_train, X_test = vecs[train], vecs[test] y_train, y_test = labels[train], labels[test] #clf = svm.SVC(kernel="linear", probability=True) clf = tree.DecisionTreeClassifier(min_samples_leaf=4, max_depth=4) #clf = RandomForestClassifier() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) cValPreds.append(y_pred[0]) cValTests.append(y_test[0]) clfRec, clfPrec, clfF1 = scorePreds(cValTests, cValPreds) row = [ggRec, ggPrec, gcRec, gcPrec, clfRec, clfPrec, clfF1] row = ["%0.2f" % x for x in row] row.insert(0, inData) print "\t".join(row) classifiers.append(("DecTree_" + inData, clf)) #clf = dummy.DummyClassifier() #clf.fit(vecs, labels) #classifiers.append( ("Dummy_"+inData, clf) ) return classifiers
def test_model(self, n_folds=10, leave_one_out=False): """ Test the model by cross-validating with Stratified k-folds For a cross-validation example, see: http://scikit-learn.org/stable/auto_examples/plot_roc_crossval.html """ log.debug("Testing model ({} folds)".format(n_folds)) X = self.data.data y = self.data.target avg_score = 0.0 if leave_one_out: cv = LeaveOneOut(len(y)) else: cv = StratifiedKFold(y, n_folds=n_folds) for train, test in cv: model = self.build_model().fit(X[train], y[train]) avg_score += model.score(X[test], y[test]) if leave_one_out: avg_score /= len(y) else: avg_score /= n_folds print("Average score: {}".format(avg_score)) return avg_score
def loo(X, labels): label_encoder = LabelEncoder() int_labels = label_encoder.fit_transform(labels) print(int_labels) clf = SVC(kernel='linear') #, probability=True) nb = X.shape[0] loo = LeaveOneOut(nb) silver, gold = [], [] for train, test in loo: print('.') X_train, X_test = X[train], X[test] y_test = [int_labels[i] for i in test] y_train = [int_labels[i] for i in train] clf.fit(X_train, y_train) pred = clf.predict(X_test) silver.append(pred[0]) gold.append(y_test[0]) info = 'Accuracy after SVC-LOO:' + str(accuracy_score(silver, gold)) # confusion matrix plt.clf() T = label_encoder.inverse_transform(gold) P = label_encoder.inverse_transform(silver) cm = confusion_matrix(T, P, labels=label_encoder.classes_) cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] np.set_printoptions(precision=2) sns.plt.figure() plot_confusion_matrix(cm_normalized, target_names=label_encoder.classes_) sns.plt.title(info) sns.plt.savefig('../figures/conf_matrix.pdf')
def leaveOneOut_error(Y, X): """ Use GLM model from python statsmodels library to fit data. Evaluate with leave-one-out setting, return the average of n errors. Input: features - a list features. ['all'] == ['demo', 'poi', 'geo', 'taxi'] gwr_gamma - the GWR weight matrx Output: error - the average error of k leave-one-out evaluation """ errors = [] errs_train = np.zeros(2) loo = LeaveOneOut(len(Y)) X = sm.add_constant(X, prepend=False) for train_idx, test_idx in loo: X_train, Y_train = X[train_idx], Y[train_idx] # Train NegativeBinomial Model from statsmodels library glm = sm.GLM(Y_train, X_train, family=sm.families.NegativeBinomial()) nbm = glm.fit() ybar = nbm.predict(X[train_idx]) er_train = np.mean(np.abs(ybar - Y[train_idx])) errs_train += er_train, er_train / np.mean(Y[train_idx]) # print er_train, er_train / np.mean(Y[train_idx]) ybar = nbm.predict(X[test_idx]) errors.append(np.abs(ybar - Y[test_idx])) # print ybar, Y[test_idx] print errs_train / len(Y) return np.mean(errors), np.mean(Y), np.mean( errors / Y), np.mean(errors) / np.mean(Y)
def loo_regressions(xs, ys, ft, dt, mt): print '[INFO]', ft, dt # Align matricies x = xs.loc[:, ys.columns].dropna(axis=1).T y = ys[x.index].T # Define cross-validation cv = LeaveOneOut(len(y)) # Run regressions y_pred, y_betas = {}, {} for m in y: y_pred[m] = {} betas = [] for train, test in cv: lm = ElasticNet(alpha=0.01).fit(x.ix[train], y.ix[train, m]) y_pred[m][x.index[test][0]] = lm.predict(x.ix[test])[0] betas.append(dict(zip(*(x.columns, lm.coef_)))) y_betas[m] = DataFrame(betas).median().to_dict() y_pred = DataFrame(y_pred).ix[y.index, y.columns] print '[INFO] Regression done: ', ft, dt # Perform correlation with predicted values metabolites_corr = [(ft, dt, f, mt, 'metabolites', pearson(y[f], y_pred[f])[0]) for f in y_pred] conditions_corr = [(ft, dt, s, mt, 'conditions', pearson(y.ix[s], y_pred.ix[s])[0]) for s in y_pred.index] return (metabolites_corr + conditions_corr), (ft, dt, mt, y_betas)
def run_analysis_pipeline(data, panel_size, output_file_name=None): log.debug("Panel size %d", panel_size) n = data.shape[0] n_features = data.shape[1] - 1 feature_labels = list(data)[0:n_features] features = numpy.array(data.ix[:, :n_features]) labels = numpy.array(data.ix[:, n_features]) the_cv_fold = functools.partial(outer_cv_fold, features=features, labels=labels, feature_labels=feature_labels, panel_size=panel_size) with Pool(10) as p: results = p.map(the_cv_fold, LeaveOneOut(n)) if output_file_name is not None: with open(output_file_name, 'wb') as outfile: dump(list(results), outfile) log.debug("Panel size %d results saved", panel_size) return (results)
def knn(X, Y): neighbors = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] weights = ['distance'] n_components = [5, 10, 15, 20, 25, 30] parameters = [{ 'k_nn__n_neighbors': neighbors, 'k_nn__weights': weights, 'pca__n_components': n_components }] dataLength = len(X) lv = LeaveOneOut(dataLength) pipeline = Pipeline([ ('pca', PCA()), ('k_nn', KNeighborsClassifier()), ]) clf = GridSearchCV(pipeline, parameters, cv=lv) clf.fit(X, Y) # Obtaining Parameters from grid_scores accuracy = [p[1] for p in clf.grid_scores_] pca_components = [p[0]['pca__n_components'] for p in clf.grid_scores_] knn_neighbors = [p[0]['k_nn__n_neighbors'] for p in clf.grid_scores_] asarray(accuracy) asarray(pca_components) asarray(knn_neighbors) accuracy = np.reshape(accuracy, (-1, 2)) pca_components = np.reshape(pca_components, (-1, 2)) knn_neighbors = np.reshape(knn_neighbors, (-1, 2)) fig = plt.figure() ax = fig.gca(projection='3d') surf = ax.plot_surface(pca_components, knn_neighbors, accuracy, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False) #ax.set_zlim(-1.01, 1.01) ax.zaxis.set_major_locator(LinearLocator(10)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) fig.colorbar(surf, shrink=0.5, aspect=5) ax.set_xlabel('Number of PCA components') ax.set_ylabel('Number of neigbours') ax.set_zlabel('Accuracy') plt.show() print "Best Parameters: {}".format(clf.best_params_) print "Accuracy: {}".format(clf.best_score_)
def OutlierDetector(TrainX, TrainY): Reg = linear_model.LinearRegression() loo = LeaveOneOut(len(TrainY)) MAE = np.array(np.zeros(len(TrainY))) id = 0 for train, test in loo: SubTrainX = TrainX.iloc[train, :] SubTrainY = TrainY.iloc[train] SubTestsX = TrainX.iloc[test, :] SubTestsY = TrainY.iloc[test] # Re-Indexing SubTrainX.index = np.arange(0, len(SubTrainX)) SubTrainY.index = np.arange(0, len(SubTrainY)) SubTestsX.index = np.arange(0, len(SubTestsX)) SubTestsY.index = np.arange(0, len(SubTestsY)) Reg.fit(SubTrainX, SubTrainY) TestOutput = Reg.predict(SubTestsX) MAE[id] = np.absolute(TestOutput - SubTestsY) id = id + 1 Good = MAE.argsort()[:round(len(TrainY) * 0.8)] TrainX = TrainX.iloc[Good, :] TrainY = TrainY.iloc[Good] return TrainX, TrainY
def fit(self): df = pd.read_csv('Datasetnew.csv', header=None) h = np.asarray(df) dataset = np.nan_to_num(h) XX = dataset[:, 1:65] y = dataset[:, 0] X = preprocessing.normalize(XX) loo = LeaveOneOut(len(y)) correct_1 = 0 wrong_1 = 0 correct_0 = 0 wrong = 0 for train, test in loo: X_train, X_test, y_train, y_test = X[train], X[test], y[train], y[ test] self.clf.fit(X_train, y_train) predict = self.clf.predict(X_test) cnf_matrix_mnb = confusion_matrix(y_test, predict) #print() #print("predicted %s" % predict) #print("original %s" % y_test) if (predict == 1 and y_test == 1): correct_1 = correct_1 + 1 elif (predict == 0 and y_test == 0): correct_0 = correct_0 + 1 else: wrong = wrong + 1 print() print("correct_1 %s" % correct_1) print("correct_0 %s" % correct_0) print("wrong %s" % wrong)
def test_mvl_fuse_function(self): Y, D, P, T, G = generate_raw_samples() T = sm.add_constant(T, prepend=False) P = sm.add_constant(P, prepend=False) D = sm.add_constant(D, prepend=False) G = sm.add_constant(G, prepend=False) loo = LeaveOneOut(len(Y)) er = [] for train_idx, test_idx in loo: tm = taxi_view_model(train_idx, Y, T) pm = poi_view_model(train_idx, Y, P) gm = geo_view_model(train_idx, Y, G) dm = demo_view_model(train_idx, Y, D) models = [tm, pm, gm, dm] lm = mvl_fuse_function(models, train_idx, Y) tm_test = tm[0].predict(T[test_idx]) pm_test = pm[0].predict(P[test_idx]) gm_test = gm[0].predict(G[test_idx]) dm_test = dm[0].predict(D[test_idx]) newX_test = np.array([1, tm_test, pm_test, gm_test, dm_test]) ybar = lm.predict(newX_test) y_error = ybar - Y[test_idx] # if np.abs(y_error / Y[test_idx]) > 0.8: # print test_idx, ybar, Y[test_idx], newX_test er.append(y_error) mre = np.mean(np.abs(er)) / np.mean(Y) print "MVL with linear fusion function MRE: {0}".format(mre)
def _train_clf(self, X, y, n_estimators=10): clf = RandomForestClassifier(n_estimators, n_jobs=self.threads, class_weight=self.class_weights) scores = scores_accuracy = np.array([0]) cv_algo = None if self.cv_method is not None: if self.cv_method == "LOO": cv_algo = LeaveOneOut(len(y)) elif self.cv_method == "SKFold": cv_algo = StratifiedKFold(y) logger.info("Running cross-validation...") scores = model_selection.cross_val_score( clf, X, y, cv=cv_algo, scoring='neg_log_loss', n_jobs=self.threads, verbose=1, ) clf.fit(X, y) return clf, scores.mean(), scores.std()
def local_homography_loocv_error(theta, args): src, tgt = args errs = [ local_homography_error(theta, src[t_ix], tgt[t_ix], src[v_ix], tgt[v_ix]) for t_ix, v_ix in LeaveOneOut(len(src)) ] return np.mean(errs)
def calBestBandwidth(data): bandwidths = 10**np.linspace(-1, 1, 100) grid = GridSearchCV(KernelDensity(kernel="gaussian"), {"bandwidth": bandwidths}, cv=LeaveOneOut(len(data))) grid.fit(data[:, None]) ban = grid.best_params_.get("bandwidth") return ban
def score(model, X, y): return np.mean( cross_val_score(model, X, y, cv=LeaveOneOut(X.shape[0]), scoring=scoring, n_jobs=-1))
def test_KCSD2D_cross_validation_five_electrodes(self): lambdas = np.array([100.0 / 2**n for n in range(1, 20)]) n_elec = self.k.elec_pos.shape[0] index_generator = LeaveOneOut(n_elec) #, indices=True) self.k.lambd = cv.choose_lambda(lambdas, self.k.sampled_pots, self.k.k_pot, self.k.elec_pos, index_generator) self.assertGreater(self.k.lambd, 25.0)
def CV_determination(Y, Method): from sklearn.cross_validation import KFold, LeaveOneOut if Method == 'loo': kf = LeaveOneOut(len(Y)) else: ind_k = [ind for ind, val in enumerate(list(Method)) if val == '-'] k = int(Method[:ind_k[0]]) kf = KFold(len(Y), k, shuffle=True, random_state=1) return kf
def getErrorAcrossDays(normedDays, period, phase, gamma): days = array(normedDays) dailyErrors = [] for (train, test) in LeaveOneOut(len(days)): training = appendUnique(days[train]) testing = days[test][0] tExt, seriesExt = getParams(training, phase, period) fit = fitModel(tExt, seriesExt, [gamma])[gamma]['model'] dailyErrors.append(getError(fit, testing, period, phase)) return dailyErrors
def test_application(self): from scot.var import VAR from sklearn.cross_validation import LeaveOneOut, KFold np.random.seed(42) x = np.random.randn(10, 3, 15) var = VAR(3, xvschema=lambda n, _: LeaveOneOut(n)).optimize_delta_bisection(x) self.assertGreater(var.delta, 0) var = VAR(3, xvschema=lambda n, _: KFold(n, 5)).optimize_delta_bisection(x) self.assertGreater(var.delta, 0)
def validate_each(known, model): loo = LeaveOneOut(len(known)) for train, test in loo: trainx = known.iloc[train, :].loc[:, XCOLS] trainy = known.iloc[train, :].loc[:, YCOLS] testx = known.iloc[test, :].loc[:, XCOLS] model.fit(trainx, trainy) testy = model.predict(testx) known.loc[known.iloc[test, :].index, 'pred_lat'] = testy[0][0] known.loc[known.iloc[test, :].index, 'pred_lon'] = testy[0][1]
def score_spatial_model(X, label, cv=None, two_level=False, null=False): """Give a score to a data labelling With/out cross-validation Parameters ========== X: array of shape(n_voxels, n_subjects) the data to be parcelled label: array of shape (n_voxels) an index array describing the parcellation cv: string, optional, cross validation scheme, one of (None, 'loo', 'kfold', 'll', 'log_lr') two_level: bool, optional, whether a one-or two level variance partition scheme is used null: bool, optional whether the likelihood is estimated under H0 (mu=0) or not Returns ======= score: float, the sumed log-likelihood of the data under the parcellation """ from sklearn.cross_validation import LeaveOneOut, KFold score = 0 if cv in ['bic', 'll', None]: ll, _, _, _, bic = parameter_map(X, label, two_level, null) if cv == 'bic': score = bic.sum() else: score = ll.sum() elif cv == 'log_lr': ll1, _, _, _, _ = parameter_map(X, label, two_level, False) ll2, _, _, _, _ = parameter_map(X, label, two_level, True) score = ll1.sum() - ll2.sum() elif cv in ['loo', 'kfold']: score = 0 if cv == 'loo': cv = LeaveOneOut(X.shape[1]) elif cv == 'kfold': cv = KFold(X.shape[1], min(10, X.shape[1])) for k in np.unique(label): for (train, test) in cv: mu = None if null: mu = 0 mu, sigma1, sigma2, _ = em_inference_regular( X[label == k][:, train], two_level=two_level, mu=mu) test_ll = log_likelihood_regular(X[label == k][:, test], mu, sigma1, sigma2, two_level=two_level) score += test_ll else: raise ValueError( 'unknown keyword from evaluation scheme (cv argument)') return score
def loo_cv(X_train,y_train,clf): # Perform Leave-One-Out cross validation loo = LeaveOneOut(X_train[:].shape[0]) scores=np.zeros(X_train[:].shape[0]) for train_index,test_index in loo: X_train_cv, X_test_cv= X_train[train_index], X_train[test_index] y_train_cv, y_test_cv= y_train[train_index], y_train[test_index] clf = clf.fit(X_train_cv,y_train_cv) y_pred=clf.predict(X_test_cv) scores[test_index]=metrics.accuracy_score(y_test_cv.astype(int), y_pred.astype(int)) print ("Mean score: {0:.3f} (+/-{1:.3f})").format(np.mean(scores), sem(scores))
def rbf_analysis(X, Y, c, g, title, filename): print "Performing Cross Validation on Penalty: {}".format(c) dataLength = len(X) loo = LeaveOneOut(dataLength) predictions = [] expected = [] TP, FN, TN, FP = 0, 0, 0, 0 Accuracy = 0 for train_index, test_index in loo: X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index][0] clf = SVC(C=c, gamma=g, kernel='rbf') clf.fit(X_train, Y_train) prediction = clf.predict(X_test)[0] predictions.append(prediction) expected.append(Y_test) print("Calculating.....") for i, prediction in enumerate(predictions): if(prediction == 1 and expected[i] == 1): TP += 1 elif(prediction == 0 and expected[i] == 1): FN += 1 elif(prediction == 0 and expected[i] == 0): TN += 1 elif(prediction == 1 and expected[i] == 0): FP += 1 else: pass Sensitivity = TP/float(TP + FN) Specificity = TN/float(TN + FP) Accuracy = (TP + TN)/float(TP + TN + FP + FN) # Saving data to file with open(filename, 'ab') as f: f.write("Sensitivity of Prediction: {} @ Penalty: {} @ Gamma: {}\n".format(Sensitivity, c, g)) f.write("Specificity of Prediction: {} @ Penalty: {} @ Gamma: {}\n".format(Specificity, c, g)) f.write("Accuracy of Prediction: {} @ Penalty: {} @ Gamma: {}\n".format(Accuracy, c, g)) f.write("Matthews Correlation Coeefficient Value: {}\n".format(matthews_corrcoef(predictions, expected))) f.write("Classification Report:\n") f.write(classification_report(predictions, expected)) f.write("Confusion Matrix\n") cm = confusion_matrix(predictions, expected) f.write(str(cm)) cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] label1 = "Negative" label2 = "Positive" plt.figure() plot_confusion_matrix(cm, title, label1, label2)
def loo_cv(X_train, y_train, clf): loo = LeaveOneOut(X_train[:].shape[0]) # number of rows scores = np.zeros(X_train[:].shape[0]) for train_index, test_index in loo: X_train_cv, X_test_cv = X_train[train_index], X_train[test_index] y_train_cv, y_test_cv = y_train[train_index], y_train[test_index] clf.clf.fit(X_train_cv, y_train_cv) y_pred = clf.predict(X_test_cv) scores[test_index] = metrics.accuracy_score(y_test_cv.astype(int), y_pred.astype(int)) print("Mean score: {0:.3f} {+/-{1:.3f}}".format(npp.mean(scores), sem(scores)))
def loadVideos(self): """ Load the video data, Extract feature and train hmm model """ mat_contents = sio.loadmat('data/original_masks.mat') mat_contents = mat_contents['original_masks'] for category_name in self.categories: """Each category""" images = [] for person in self.persons: """Each person""" if person == 'lena_' and (category_name == 'run' or category_name == 'skip' or category_name == 'walk'): """Person is Lena and category run, skip or walk""" video = mat_contents[person + category_name + '1'][0][0] if self.args.mhi: data = self.extractMhiFeature(video) else: data = self.extractFeature(video) images.append(data) video = mat_contents[person + category_name + '2'][0][0] if self.args.mhi: data = self.extractMhiFeature(video) else: data = self.extractFeature(video) images.append(data) else: video = mat_contents[person + category_name][0][0] if self.args.mhi: data = self.extractMhiFeature(video) else: data = self.extractFeature(video) images.append(data) if images.__len__() != 0: loo = LeaveOneOut(images.__len__()) images = np.array(images) """train hmm with category all video""" self.fullDataTrainHmm[ category_name], std_scale, std_scale1 = self.train(images) self.model[category_name] = {} self.model[category_name]['hmm'] = [] self.model[category_name]['std_scale'] = [] self.model[category_name]['std_scale1'] = [] self.model[category_name]['data'] = [] for train, test in loo: markov_model, std_scale, std_scale1 = self.train( images[train]) self.model[category_name]['hmm'].append(markov_model) self.model[category_name]['std_scale'].append(std_scale) self.model[category_name]['std_scale1'].append(std_scale1) self.model[category_name]['data'].append(images[test]) self.target_names = self.categories
def loo_cv(X_train, Y_train, clf): loo = LeaveOneOut(X_train[:].shape[0]) scores = np.zeros(X_train[:].shape[0]) for train_index, test_index in loo: X_train_cv, X_test_cv = X_train[train_index], X_train[test_index] Y_train_cv, Y_test_cv = Y_train[train_index], Y_train[test_index] clf = clf.fit(X_train_cv, Y_train_cv) Y_pred = clf.predict(X_test_cv) scores[test_index] = metrics.accuracy_score( Y_test_cv.astype(int), Y_pred.astype(int)) #这里astype(int)有问题吗? print("Loo_cv mean score: {0:.3f} (+/-{1:.3f})").format( np.mean(scores), sem(scores))
def rbf(X, Y): # Performing Grid Search for Parameter Selection C = [1,2,5,10,15,20,25,30,50,100,200,500,1000,2000,5000,10000] gamma = [0.1,0.01,0.001,0.0001,0.00001,0.000001,0.5,0.05,0.005,0.0005,0.00005,0,000005] parameters = [{'kernel': ['rbf'], 'gamma': gamma,'C': C}] dataLength = len(X) svm = SVC() lv = LeaveOneOut(dataLength) clf = GridSearchCV(svm, parameters, cv= lv) clf.fit(X, Y) print("Best Params for RBF: {}".format(clf.best_params_)) print("Accuracy: {}".format(clf.best_score_)) return clf.best_params_