def leaveMout(X, y): leaveout = LeavePOut(2) # taking p=2 leaveout.get_n_splits(X) # Number of splits of X # Printing the Train & Test Indices of splits for train_index, test_index in leaveout.split(X): print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] return X_train, X_test, y_train, y_test
def leave1out_fit(self): X = self.X Y = self.Y lpo = LeavePOut(1) n_splits = lpo.get_n_splits(Y) correct = 0 predictions = np.zeros((n_splits, ), object) for train_idx, test_idx in lpo.split(Y): lda = LDA(n_components=self.n_components) new_X = lda.fit_transform(X[train_idx, :], Y[train_idx]) y_pred = lda.predict(X[test_idx, :]) correct += (Y[test_idx] == y_pred).sum() predictions[test_idx] = y_pred[0] full_model = LDA(n_components=self.n_components) new_X = full_model.fit_transform(X, Y) self.model = full_model accuracy = 100 * ((predictions == Y).sum() / Y.shape[0]) self.result = ClassifierResult(accuracy, new_X, Y, predictions, row_id=self.row_id, model=full_model) return self.result
def leave1out_fit(self): X = self.X Y = self.Y lpo = LeavePOut(1) n_splits = lpo.get_n_splits(Y) correct = 0 predictions = np.zeros((n_splits, ), object) for train_idx, test_idx in lpo.split(Y): train_x = X[train_idx, :] train_y = Y[train_idx] test_x = X[test_idx, :] test_y = Y[test_idx] gnb = GaussianNB() y_pred = gnb.fit(train_x, train_y).predict(test_x) predictions[test_idx] = y_pred[0] correct += (test_y == y_pred).sum() full_model = GaussianNB() full_model.fit(X, Y) accuracy = 100 * (correct / n_splits) self.model = full_model self.result = ClassifierResult(accuracy, X, Y, predictions, row_id=self.row_id, model=full_model) return self.result
def Leave_P_Out(dataset, p): # 测试集留出量默认p = 10 X = auto["horsepower"] y = auto["mpg"] errors = [] lpo = LeavePOut(p) lpo.get_n_splits(X) for train_index, test_index in lpo.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] y_pred = 40 - 0.15 * X_test error = mean_squared_error(y_test, y_pred) errors.append(error) return np.mean(errors)
def generate_evaluation_sets(self, bipartite_network, gene_linkage, rand_seed=0): np.random.seed(rand_seed) lpo = LeavePOut(p=self.leave_p_out) number_of_groups = len(self.groups) evals_per_group = self.max_evals/number_of_groups seeds = [] seeds_weight = [] dropped = [] groups = [] # self.groups is a dictionary with a list of diseases per group for group_name, diseases_in_group in self.groups.items(): # to balance evaluation over groups number_of_diseases_in_group = len(diseases_in_group) evals_per_disease = evals_per_group/number_of_diseases_in_group for dis in diseases_in_group: # get weights of the genes in the disease and extra genes if the mode allows it (PRINCE). disgens_weight, extragens_weight = self.get_nodes_weights(bipartite_network, dis) disease_genes = list(set(list(disgens_weight.keys()))) # to avoid repeted evaluations evals_in_this_disease = int(np.min([lpo.get_n_splits(disease_genes), evals_per_disease])) # select subset of evaluations i = 0 for seeds_ix, dropped_ix in lpo.split(disease_genes): if i > evals_in_this_disease: # to avoid adding too many evals break # could be more than 1 gene if leave many out dropped_genes = [disease_genes[drop_ix] for drop_ix in dropped_ix] # if all the genes to drop are linkageables if all([gene_linkage.is_gene_available(gene) for gene in dropped_genes]): # add the disease genes to the seeds and also the extra genes given by PRINCE method except for # those genes already in the dropped set. seeds_genes = [disease_genes[seed] for seed in seeds_ix] seeds.append(seeds_genes + [gen for gen in extragens_weight.keys() if gen not in dropped_genes]) seeds_weight.append([disgens_weight[gen] for gen in seeds_genes] + [weight for gen, weight in extragens_weight.items() if gen not in dropped_genes]) dropped.append(dropped_genes) groups.append(group_name) i += 1 # force the cases to be exactly the number of evals. chosen_ixes = np.random.choice(len(seeds_weight), int(np.min([self.max_evals, len(seeds_weight)])), replace=False) print("Number of test cases to perform: ", len(chosen_ixes)) seeds_weight = [seeds_weight[chosen_ix] for chosen_ix in chosen_ixes] seeds = [seeds[chosen_ix] for chosen_ix in chosen_ixes] dropped = [dropped[chosen_ix] for chosen_ix in chosen_ixes] groups = [groups[chosen_ix] for chosen_ix in chosen_ixes] return seeds, seeds_weight, dropped, groups
def leave_pout(): """ 留p发 就是循环数据集次数N 每次留取p个数据作为测试数据集,(N-p)数据为训练数据集。 :return: """ X = np.array([[1, 2, 3, 4], [11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34]]) y = np.array([1, 5, 0, 0]) # 当p=1的时候和LeaveOneOut一样 leave_pout = LeavePOut(p=2) logger.info(leave_pout.get_n_splits(X)) for train_index, test_index in leave_pout.split(X, y): logger.info("Train Index:\n %s" % train_index) logger.info("Test Index:\n %s" % test_index) logger.info("X_train:\n %s" % X[train_index]) logger.info("X_test:\n %s" % X[test_index]) logger.info("y_train:\n %s" % y[train_index]) logger.info("y_test:\n %s" % y[test_index]) logger.info("\n\n")
import numpy as np from sklearn.model_selection import LeavePOut ''' يترك عدد عناصر معين تقوم بتحديده للاختبار و الباقي للتدريب ''' X = np.array([[1, 11], [2, 12], [3, 13], [4, 14], [5, 15], [6, 16], [7, 17], [8, 18], [9, 19], [10, 20]]) y = np.array([[1], [0], [1], [1], [0], [1], [1], [0], [0], [1]]) lpo = LeavePOut(4) print('number of splits = ', str(lpo.get_n_splits(X))) print("----------------------------------------------------------") folds = lpo.split(X) for train_index, test_index in folds: print('train : ', train_index, ' test : ', test_index) print('X_train \n ', X[train_index]) print('X_test \n ', X[test_index]) print('y_train \n ', y[train_index]) print('y_test \n ', y[test_index]) print("----------------------------------------------------------")
subj_permuts = joblib.load(permutations_path) if subj_ind == 0: allsubj_permuts = subj_permuts else: shift = allsubj_permuts.shape[1] allsubj_permuts = np.hstack([allsubj_permuts,shift+subj_permuts]) print(allsubj_permuts.shape) n_permuts = allsubj_permuts.shape[0] """ modality_list = ['A', 'V'] lnso_cv = LeavePOut(n_leftout_subjects) n_splits = lnso_cv.get_n_splits(subjects_list, subjects_list, subjects_list) print(n_splits) allsplits_xval_inds = [] for split_ind, (trainsubj_inds, testsubj_inds) in enumerate( lnso_cv.split(subjects_list, subjects_list, subjects_list)): # initialize struct for storing all train and test inds for this split xval_inds = dict() for modality in modality_list: xval_inds['train_{}'.format(modality)] = [] xval_inds['test_{}'.format(modality)] = [] shift_ind = 0
# ### Leave-p-out # # Este un tipo de validación en la que no se define un porcentaje para el conjunto de validación, sino un número $p$ de muestras para validación y las restantes $n-p$ quedan para el entrenamiento. En este caso el número de repeticiones estará definido por el número de combinaciones posibles. # In[19]: X=np.random.randn(10,2) # In[20]: from sklearn.model_selection import LeavePOut lpo = LeavePOut(2) lpo.get_n_splits(X) # Que corresponde al número de combinaciones posibles N combinado 2. # In[21]: from itertools import combinations len(list(combinations(range(X.shape[0]), 2))) LeavePOut(p=1) es igual a LeaveOneOut() # ## Metodología de validación para problemas desbalanceados # # # Si tenemos problemas desbalanceados y usamos una metodología de validación estándar, podemos tener problemas porque la clase minoritaria queda muy mal representada en el conjunto de training.
import numpy as np from sklearn.model_selection import LeavePOut # ---------------------------------------------------- ''' class sklearn.model_selection.LeavePOut(p) ''' # ---------------------------------------------------- X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) y = np.array([1, 2, 3, 4]) #lpo = LeavePOut(1) #lpo = LeavePOut(2) lpo = LeavePOut(3) print(lpo.get_n_splits(X)) print(lpo) lpo = LeavePOut(p=2) for train_index, test_index in lpo.split(X): print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] print('X_train \n', X_train) print('X_test \n', X_test) print('y_train \n', y_train) print('y_test \n', y_test) print('*********************')