def get_scores_from_gat(epochs, seed): from sklearn.svm import LinearSVC X_train, X_test, y_train, y_test = train_test_split(epochs.get_data(), epochs.events[:, 2] == 2, test_size=0.2, random_state=seed) clf = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5, penalty='l2')) # clf = time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=-2) time_gen.fit(X_train, y_train) scores = time_gen.score(X_test, y_test) print( 'Units with highest weights of a classifier trained to predict subject' 's number:') print([(i, j) for (i, j) in zip( np.transpose( np.argsort( np.negative( np.abs(time_gen.estimators_[1]._final_estimator.coef_)))) [0:20], np.transpose( np.sort( np.negative( np.abs(time_gen.estimators_[1]._final_estimator.coef_)))) [0:20])]) return time_gen, scores
def train_test(X_train, y_train, X_test, y_test, clf=clf, scoring=scoring, n_jobs=n_jobs): # train and test time_gen = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) time_gen.fit(X=X_train, y=y_train) return time_gen.score(X=X_test, y=y_test)
def run_gat(name, decoder="ridge"): """ Function to run Generalization Across Time (GAT). Parameters ---------- name: str Name (pseudonym) of individual subject. decoder: str Specify type of classifier -'ridge' for Ridge Regression (default),'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg' for Logistic Regression """ # load high cloze epochs epochs = get_epochs(name)['song', 'voice'] # specify whether to use a linear or nonlinear SVM if SVM is used lin = '' # if not svm it doesn't matter, both log_reg and ridge are linear if "svm" in decoder: decoder, lin = decoder.split("-") # build classifier pipeline # # pick a machine learning algorithm to use (ridge/SVM/logistic regression) decoder_dict = { "ridge": RidgeClassifier(class_weight='balanced', random_state=42, solver="sag"), "svm": SVC(class_weight='balanced', kernel=("rbf" if "non" in lin else "linear"), random_state=42), "log_reg": LogisticRegression(class_weight='balanced', random_state=42) } clf = make_pipeline(StandardScaler(), decoder_dict[decoder]) gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=4) scores = cross_val_multiscore(gen_clf, epochs.get_data(), epochs.events[:, -1], cv=5, n_jobs=4).mean(0) data = epochs.get_data() labels = epochs.events[:, -1] cv = StratifiedKFold(n_splits=5, random_state=42) # calculate prediction confidence scores preds = np.empty((len(labels), 225, 225)) for train, test in cv.split(data, labels): gen_clf.fit(data[train], labels[train]) d = gen_clf.decision_function(data[test]) preds[test] = d return scores, preds # return subject scores and prediction confidence
tmax = 0.400 decim = 2 # decimate to make the example faster to run epochs = mne.Epochs(raw, events, event_id=event_id, tmin=tmin, tmax=tmax, proj=True, picks=picks, baseline=None, preload=True, reject=dict(mag=5e-12), decim=decim) ############################################################################### # We will train the classifier on all left visual vs auditory trials # and test on all right visual vs auditory trials. clf = make_pipeline(StandardScaler(), LogisticRegression()) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=1, verbose=True) # Fit classifiers on the epochs where the stimulus was presented to the left. # Note that the experimental condition y indicates auditory or visual time_gen.fit(X=epochs['Left'].get_data(), y=epochs['Left'].events[:, 2] > 2) ############################################################################### # Score on the epochs where the stimulus was presented to the right. scores = time_gen.score(X=epochs['Right'].get_data(), y=epochs['Right'].events[:, 2] > 2) ############################################################################### # Plot fig, ax = plt.subplots(1) im = ax.matshow(scores, vmin=0, vmax=1., cmap='RdBu_r', origin='lower', extent=epochs.times[[0, -1, 0, -1]]) ax.axhline(0., color='k') ax.axvline(0., color='k') ax.xaxis.set_ticks_position('bottom') ax.set_xlabel('Testing Time (s)')
# Extract half of the epochs for similar SNR in all conditions epochs_nr = len(epochs['left/grating']) epochs_range = np.random.permutation(np.arange(0, epochs_nr, 1)) np.save('%s_epochs_range.npy' % subject, epochs_range) X = np.concatenate( (epochs["face"][epochs_range[:int(epochs_nr / 2)]].get_data(), epochs["grating"][epochs_range[:int(epochs_nr / 2)]].get_data())) y = np.concatenate( (np.zeros(int(epochs_nr / 2)), np.ones(int(epochs_nr / 2)))) cv = StratifiedKFold(n_splits=10, shuffle=True) clf = make_pipeline(StandardScaler(), LogisticRegression()) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=n_jobs) time_gen.fit(X, y) scores = cross_val_multiscore(time_gen, X, y, cv=cv) # Save results joblib.dump(time_gen, "%s_time_gen_svm.jbl" % subject) np.save("%s_time_gen_score_svm.npy" % subject, scores) X_left = np.concatenate( (epochs["left/face"].get_data(), epochs["left/grating"].get_data())) y_left = np.concatenate((np.zeros(len(epochs["left/face"].get_data())), np.ones(len(epochs["left/grating"].get_data())))) clf = make_pipeline(StandardScaler(), LogisticRegression()) time_gen_left = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=n_jobs) time_gen_left.fit(X_left, y_left)
X2, y2 = get_Xy_balanced(sleep_epochs, contrast2, n_sample=nsample) X3, y3 = get_Xy_balanced(sleep_epochs, contrast3, n_sample=nsample) X4, y4 = get_Xy_balanced(sleep_epochs, contrast4, n_sample=nsample) X5, y5 = get_Xy_balanced(sleep_epochs, contrast5, n_sample=nsample) del wake_epochs del sleep_epochs clf = GeneralizingEstimator(make_pipeline( StandardScaler(), LogisticRegression(max_iter=4000)), scoring='accuracy', n_jobs=6) # clf = GeneralizingEstimator(make_pipeline(StandardScaler(), SVC(kernel='linear')), # scoring='accuracy', n_jobs=6) cv = StratifiedKFold(n_splits=2, shuffle=True) scores1, scores2, scores3, scores4, scores5 = [[] for i in range(5)] for train_idx, test_idx in cv.split(X1, y1): clf.fit(X1[train_idx], y=y1[train_idx]) scores1.append(clf.score(X1[test_idx], y=y1[test_idx])) scores2.append(clf.score(X2, y=y2)) scores3.append(clf.score(X3, y=y3)) scores4.append(clf.score(X4, y=y4)) scores5.append(clf.score(X5, y=y5)) results = [scores1, scores2, scores3, scores4, scores5] results = [results[i] for i in saveorder] pickle_save(os.path.join(save_path, sbj + '.p'), results)
gat = GeneralizingEstimator(clf, scoring=make_scorer(scorer), n_jobs=24, **kwargs) y = np.array(y, dtype=float) elif ('cue_side' in analysis or 'cue_type' in analysis): clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) kwargs = dict() gat = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=24, **kwargs) le = preprocessing.LabelEncoder() le.fit(y) y = le.transform(y) # only consider non NaN values if ('cue_side' in analysis or 'cue_type' in analysis): sel = np.where(y != 0)[0] else: sel = np.where(~np.isnan(y))[0] # Run decoding gat.fit(epochs._data[sel], y=y[sel]) scores = cross_val_multiscore(gat, epochs._data[sel], y=y[sel], cv=StratifiedKFold(12)) scores = scores.mean(axis=0) # save cross-validated scores np.save(fname, np.array(scores))
#--------------------------- # Use AUC because chance level is same regardless of the class balance clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) time_gen = GeneralizingEstimator(clf, n_jobs=1, scoring='roc_auc') # # We will train the classifier on all stim face vs house trials # and test on all images face vs house trials. #le = LabelEncoder() # train on stim time_gen.fit(X=np.array(mean_train1 + mean_train2), y=np.array([0] * len(mean_train1) + [1] * len(mean_train2))) # score on imagery scores = time_gen.score(X=np.array(mean_test1 + mean_test2), y=np.array([0] * len(mean_test1) + [1] * len(mean_test2))) # let's save the scores now fname_td = os.path.join( results_path, '%s-causal-highpass-2Hz-temp-gene-across-conditions-stim_vs_imag-ave4trials.mat' % (subject)) savemat(fname_td, { 'scores': scores,
def run_gat(subj, decoder="ridge", n_jobs=2): """ Function to run Generalization Across Time (GAT). Parameters ---------- subj: int decoder: str Specify type of classifier -'ridge' for Ridge Regression (default), 'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg' for Logistic Regression n_jobs: int The number of jobs to run in parallel. """ # load cue A and cue B epochs epochs = get_epochs(subj)['Correct A', 'Correct B'] # specify whether to use a linear or nonlinear SVM if SVM is used lin = '' # if not svm it doesn't matter, both log_reg and ridge are linear if "svm" in decoder: decoder, lin = decoder.split("-") # build classifier pipeline # # pick a machine learning algorithm to use (ridge/SVM/logistic regression) decoder_dict = { "ridge": RidgeClassifier(class_weight='balanced', random_state=42, solver="sag"), "svm": SVC(class_weight='balanced', kernel=("rbf" if "non" in lin else "linear"), random_state=42), "log_reg": LogisticRegression(class_weight='balanced', random_state=42) } # get data and targets data = epochs.get_data() labels = epochs.events[:, -1] # create classifier pipeline clf = make_pipeline(StandardScaler(), decoder_dict[decoder]) gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=n_jobs) # compute cross validated performance scores scores = cross_val_multiscore(gen_clf, data, labels, cv=5, n_jobs=n_jobs).mean(0) # calculate prediction confidence scores cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) preds = np.empty((len(labels), data.shape[2], data.shape[2])) for train, test in cv.split(data, labels): gen_clf.fit(data[train], labels[train]) d = gen_clf.decision_function(data[test]) preds[test] = d # compute topographical patterns dat = Vectorizer().fit_transform(data) clf.fit(dat, labels) dat = dat - dat.mean(0, keepdims=True) # look for the type of classifier and get the weights if decoder == 'ridge': filt_ = clf.named_steps.ridgeclassifier.coef_.copy() elif decoder == 'svm': filt_ = clf.named_steps.svc.coef_.copy() elif decoder == 'log_reg': filt_ = clf.named_steps.logisticregression.coef_.copy() # Compute patterns using Haufe's trick: A = Cov_X . W . Precision_Y # cf.Haufe, et al., 2014, NeuroImage, # doi:10.1016/j.neuroimage.2013.10.067) inv_y = 1. patt_ = np.cov(dat.T).dot(filt_.T.dot(inv_y)).T # store the patterns accordingly if decoder == 'ridge': clf.named_steps.ridgeclassifier.patterns_ = patt_ elif decoder == 'svm': clf.named_steps.svc.patterns_ = patt_ elif decoder == 'log_reg': clf.named_steps.logisticregression.patterns_ = patt_ # back transform using steps in pipeline patterns = get_coef(clf, 'patterns_', inverse_transform=True) # return subject scores, prediction confidence and topographical patterns return scores, preds, patterns
y_train = np.array(y_train, dtype=float) # only consider trials with correct fixation sel = np.where(events['is_eye_fixed'] == 1)[0] y_train = y_train[sel] y_test = y_test[sel] X = np.concatenate((X0, X1, X2), axis=2) X = X[sel] # only consider non NaN values # Run decoding accross condition cv = StratifiedKFold(7) scores = list() scs = list() if np.isnan(y_train).any(): sel = np.where(~np.isnan(y_train))[0] for train, test in cv.split(X[sel], y_train[sel]): gat.fit(X[sel][train], y_train[sel][train]) score = gat.score(X[sel][test], y_test[sel][test]) sc = gat.score(X[sel][test], y_train[sel][test]) # test on same scores.append(score) scs.append(sc) scores = np.mean(scores, axis=0) scs = np.mean(scs, axis=0) else: for train, test in cv.split(X, y_train): y_te = y_test[test] X_te = X[test] y_te = y_te[np.where(~np.isnan(y_te))[0]] X_te = X_te[np.where(~np.isnan(y_te))[0]] y_tr = y_train[train] X_tr = X[train] y_tr = y_tr[np.where(~np.isnan(y_tr))[0]]
all_epochs.append(epochs) epochs = mne.concatenate_epochs(all_epochs) decim = 2 epochs.decimate(decim) # We will train the classifier on all stim face vs house trials # and test on all images face vs house trials. clf = make_pipeline(StandardScaler(), LogisticRegression()) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=6) le = LabelEncoder() # train on stim time_gen.fit(X=epochs['stim'].get_data(), y=le.fit_transform(epochs['stim'].events[:, 2])) # score on imagery scores = time_gen.score(X=epochs['imag'].get_data(), y=le.fit_transform(epochs['imag'].events[:, 2])) # Plot fig, ax = plt.subplots(1) im = ax.matshow(scores, vmin=0, vmax=1., cmap='RdBu_r', origin='lower', extent=epochs.times[[0, -1, 0, -1]]) ax.axhline(0., color='k') ax.axvline(0., color='k')
def decode(epochs, get_y_label_func, epoch_filter=None, decoding_method='standard', sliding_window_size=None, sliding_window_step=None, n_jobs=multiprocessing.cpu_count(), equalize_event_counts=True, only_fit=False, generalize_across_time=True): """ Basic flow for decoding """ config = dict(equalize_event_counts=equalize_event_counts, only_fit=only_fit, sliding_window_size=sliding_window_size, sliding_window_step=sliding_window_step, decoding_method=decoding_method, generalize_across_time=generalize_across_time, epoch_filter=str(epoch_filter)) if epoch_filter is not None: epochs = epochs[epoch_filter] #-- Classify epochs into groups (training epochs) y_labels = get_y_label_func(epochs) if equalize_event_counts: epochs.events[:, 2] = y_labels epochs.event_id = {str(label): label for label in np.unique(y_labels)} min_n_items_per_y_label = min( [len(epochs[cond]) for cond in epochs.event_id.keys()]) print("\nEqualizing the number of epochs to %d per condition..." % min_n_items_per_y_label) epochs.equalize_event_counts(epochs.event_id.keys()) y_labels = epochs.events[:, 2] print("The epochs were classified into %d groups:" % len(set(y_labels))) for g in set(y_labels): print("Group {:}: {:} epochs".format(g, sum(np.array(y_labels) == g))) #-- Create the decoding pipeline print("Creating the classification pipeline...") epochs_data = epochs.get_data() preprocess_pipeline = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: clf = make_pipeline(StandardScaler(), Ridge()) else: clf = make_pipeline( StandardScaler(), svm.SVC(C=1, kernel='linear', class_weight='balanced')) if 'raw' not in decoding_method: assert sliding_window_size is not None assert sliding_window_step is not None preprocess_pipeline = \ make_pipeline(umne.transformers.SlidingWindow(window_size=sliding_window_size, step=sliding_window_step, average=True)) elif decoding_method == 'ERP_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(20), average=False), ERPCovariances( estimator='lwf'), # todo how to apply sliding window? CSP(30, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) # todo why logistic regression? elif decoding_method == 'Xdawn_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(50), average=False), XdawnCovariances(12, estimator='lwf', xdawn_estimator='lwf'), TangentSpace('logeuclid'), LogisticRegression('l2')) elif decoding_method == 'Hankel_cov': clf = make_pipeline( UnsupervisedSpatialFilter(PCA(70), average=False), HankelCovariances(delays=[1, 8, 12, 64], estimator='oas'), CSP(15, log=False), TangentSpace('logeuclid'), LogisticRegression('l2')) else: raise Exception('Unknown decoding method: {:}'.format(decoding_method)) print('\nDecoding pipeline:') for i in range(len(clf.steps)): print('Step #{:}: {:}'.format(i + 1, clf.steps[i][1])) if preprocess_pipeline is not None: print('\nApplying the pre-processing pipeline:') for i in range(len(preprocess_pipeline.steps)): print('Step #{:}: {:}'.format(i + 1, preprocess_pipeline.steps[i][1])) epochs_data = preprocess_pipeline.fit_transform(epochs_data) if only_fit: #-- Only fit the decoders procedure = 'only_fit' scores = None cv = None if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = clf estimator.fit(X=epochs_data, y=y_labels) else: #-- Classify & score -- cross-validation procedure = 'fit_and_score' print( "\nCreating a classifier and calculating accuracy scores (this may take some time)..." ) cv = StratifiedKFold(n_splits=5) if decoding_method.startswith('standard'): if 'reg' in decoding_method: if 'r2' in decoding_method: scoring = metrics.make_scorer(metrics.r2_score) else: scoring = metrics.make_scorer(metrics.mean_squared_error) else: scoring = 'accuracy' if generalize_across_time: estimator = GeneralizingEstimator(clf, scoring=scoring, n_jobs=n_jobs) else: estimator = SlidingEstimator(clf, scoring=scoring, n_jobs=n_jobs) scores = cross_val_multiscore(estimator=estimator, X=epochs_data, y=np.array(y_labels), cv=cv) else: scores = _run_cross_validation(X=epochs_data, y=np.array(y_labels), clf=clf, cv=cv) estimator = 'None' # Estimator is not defined in the case of Riemannian decoding times = np.linspace(epochs.tmin, epochs.tmax, epochs_data.shape[2]) return dict(procedure=procedure, estimator=estimator, scores=scores, pipeline=clf, preprocess=preprocess_pipeline, cv=cv, times=times, config=config)
X = epochs._data[sel] le = LabelEncoder() le.fit(y_con) y_con = le.transform(y_con) sel = np.where(y_con != 0)[0] y_con = y_con[sel] X_con = epochs_con._data[sel] # Define estimators depending on the analysis clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) kwargs = dict() est = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=24, **kwargs) # Run decoding cv = StratifiedKFold(12) scores = list() scores_con = list() for train, test in cv.split(X, y): est.fit(X[train], y[train]) # train during WM task score = est.score(X[test], y[test]) # test during WM task score_con = est.score(X_con, y_con) # test during control task scores.append(score) scores_con.append(score_con) scores = np.mean(scores, axis=0) scores_con = np.mean(scores_con, axis=0) # save cross-validated scores fname = results_folder +\ '%s_scores_%s.npy' % (subject, analysis) np.save(fname, np.array(scores)) fname = results_folder +\ '%s_scores_%s_con.npy' % (subject, analysis) np.save(fname, np.array(scores_con))
del IX_sing, IX_plur assert all(Y > 0) print(list(Y).count(1), list(Y).count(2)) # clf = make_pipeline(StandardScaler(), LinearSVC(class_weight='balanced')) clf = make_pipeline(LinearSVC(class_weight='balanced')) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=-1, verbose=True) cv = StratifiedKFold(n_splits=5, random_state=0, shuffle=True) #cv = StratifiedShuffleSplit(n_splits=20, random_state=0) scores = [] for i, (train, test) in enumerate(cv.split(X, Y)): time_gen.fit(X[train], Y[train]) scores.append(time_gen.score(X[test], Y[test])) # list (len=#cv-splits) of sublists (len=#timepoints): curr_weights_clf = np.asarray([ np.squeeze(w._final_estimator.coef_) for w in time_gen.estimators_ ]) weights_clf[v + 1]['splits'].append(curr_weights_clf) weights_clf[v + 1]['mean'] = np.mean(np.asarray(weights_clf[v + 1]['splits']), axis=0) weights_clf[v + 1]['std'] = np.std(np.asarray(weights_clf[v + 1]['splits']), axis=0) ########
tmax=tmax, preload=True, baseline=(None, 0), decim=10) epochs.pick_types(meg=True, ref_meg=False) # Loop across analysis for analysis in analyses: fname = results_folder +\ '%s_scores_%s_%s.npy' % (subject, 'Cue', analysis) # define to-be-predicted values y = np.array(events_behavior[analysis]) clf = make_pipeline(StandardScaler(), LinearModel(LogisticRegression())) kwargs = dict() le = LabelEncoder() le.fit(y) y = le.transform(y) sel = np.where(y != 0)[0] # Run decoding cv = StratifiedKFold(12) scores = list() X = epochs._data gat = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=24, **kwargs) for train, test in cv.split(X[sel], y[sel]): gat.fit(X[sel][train], y[sel][train]) score = gat.score(X[sel][test], y[sel][test]) scores.append(score) scores = np.mean(scores, axis=0) # keep scores np.save(fname, np.array(scores))
scoring=make_scorer(scorer), n_jobs=24, **kwargs) y = np.array(y, dtype=float) # only consider non NaN values if ('cue_side' in analysis or 'cue_type' in analysis): sel = np.where(y != 0)[0] else: sel = np.where(~np.isnan(y))[0] # Run decoding cv = StratifiedKFold(12) scores = list() patterns = list() filters = list() for train, test in cv.split(X[sel], y[sel]): clf.fit(X[sel][train], y[sel][train]) score = clf.score(X[sel][test], y[sel][test]) scores.append(score) patterns.append(get_coef(clf, 'patterns_', inverse_transform=True)) filters.append(get_coef(clf, 'filters_', inverse_transform=True)) scores = np.mean(scores, axis=0) patterns = np.mean(patterns, axis=0) filters = np.mean(filters, axis=0) # save cross-validated scores, patterns and filters fname = results_folder +\ '%s_scores_%s_%s.npy' % (subject, epoch_type, analysis) np.save(fname, np.array(scores)) fname = results_folder +\ '%s_patterns_%s_%s.npy' % (subject, epoch_type, analysis) np.save(fname, np.array(patterns)) fname = results_folder +\
reject=dict(mag=5e-12), decim=decim, verbose='error') ############################################################################### # We will train the classifier on all left visual vs auditory trials # and test on all right visual vs auditory trials. clf = make_pipeline(StandardScaler(), LogisticRegression(solver='lbfgs')) time_gen = GeneralizingEstimator(clf, scoring='roc_auc', n_jobs=1, verbose=True) # Fit classifiers on the epochs where the stimulus was presented to the left. # Note that the experimental condition y indicates auditory or visual time_gen.fit(X=epochs['Left'].get_data(), y=epochs['Left'].events[:, 2] > 2) ############################################################################### # Score on the epochs where the stimulus was presented to the right. scores = time_gen.score(X=epochs['Right'].get_data(), y=epochs['Right'].events[:, 2] > 2) ############################################################################### # Plot fig, ax = plt.subplots(1) im = ax.matshow(scores, vmin=0, vmax=1., cmap='RdBu_r', origin='lower', extent=epochs.times[[0, -1, 0, -1]])
print(f'Splitting in {n_splits} splits') # Cross validation using sliding window ------------------------------- # Prepare predicted label matrix num_samples, num_times = X_all.shape[0], X_all.shape[2] y_pred_generalizing = np.zeros((num_samples, num_times, num_times)) # Cross validation skf = StratifiedKFold(n_splits=n_splits, shuffle=False) for train_index, test_index in skf.split(X_all, y_all): # Separate training and testing data X_train, y_train = X_all[train_index], y_all[train_index] X_test, y_test = X_all[test_index], y_all[test_index] # Fit estimator estimator.fit(X_train, y_train) # Predict y = estimator.predict(X_test) y_pred_generalizing[test_index] = y # Summary results output_dict = dict( times=times, y_all=y_all, y_pred_generalizing=y_pred_generalizing, ) # Save results with open(os.path.join(RESULTS_FOLDER, f'{running_name}_generalizing.pkl'), 'wb') as f: