def leaveMout(X, y):
    leaveout = LeavePOut(2)  # taking p=2
    leaveout.get_n_splits(X)  # Number of splits of X
    # Printing the Train & Test Indices of splits
    for train_index, test_index in leaveout.split(X):
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

    return X_train, X_test, y_train, y_test
Example #2
0
    def leave1out_fit(self):
        X = self.X
        Y = self.Y
        lpo = LeavePOut(1)
        n_splits = lpo.get_n_splits(Y)
        correct = 0
        predictions = np.zeros((n_splits, ), object)
        for train_idx, test_idx in lpo.split(Y):
            lda = LDA(n_components=self.n_components)
            new_X = lda.fit_transform(X[train_idx, :], Y[train_idx])
            y_pred = lda.predict(X[test_idx, :])
            correct += (Y[test_idx] == y_pred).sum()
            predictions[test_idx] = y_pred[0]

        full_model = LDA(n_components=self.n_components)
        new_X = full_model.fit_transform(X, Y)
        self.model = full_model
        accuracy = 100 * ((predictions == Y).sum() / Y.shape[0])
        self.result = ClassifierResult(accuracy,
                                       new_X,
                                       Y,
                                       predictions,
                                       row_id=self.row_id,
                                       model=full_model)
        return self.result
Example #3
0
    def leave1out_fit(self):
        X = self.X
        Y = self.Y
        lpo = LeavePOut(1)
        n_splits = lpo.get_n_splits(Y)
        correct = 0
        predictions = np.zeros((n_splits, ), object)
        for train_idx, test_idx in lpo.split(Y):
            train_x = X[train_idx, :]
            train_y = Y[train_idx]
            test_x = X[test_idx, :]
            test_y = Y[test_idx]
            gnb = GaussianNB()
            y_pred = gnb.fit(train_x, train_y).predict(test_x)
            predictions[test_idx] = y_pred[0]
            correct += (test_y == y_pred).sum()

        full_model = GaussianNB()
        full_model.fit(X, Y)
        accuracy = 100 * (correct / n_splits)
        self.model = full_model
        self.result = ClassifierResult(accuracy,
                                       X,
                                       Y,
                                       predictions,
                                       row_id=self.row_id,
                                       model=full_model)
        return self.result
Example #4
0
def Leave_P_Out(dataset, p):
    # 测试集留出量默认p = 10

    X = auto["horsepower"]
    y = auto["mpg"]
    errors = []
    lpo = LeavePOut(p)
    lpo.get_n_splits(X)

    for train_index, test_index in lpo.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        y_pred = 40 - 0.15 * X_test
        error = mean_squared_error(y_test, y_pred)
        errors.append(error)

    return np.mean(errors)
Example #5
0
    def generate_evaluation_sets(self, bipartite_network, gene_linkage, rand_seed=0):
        np.random.seed(rand_seed)
        lpo = LeavePOut(p=self.leave_p_out)

        number_of_groups = len(self.groups)
        evals_per_group = self.max_evals/number_of_groups

        seeds = []
        seeds_weight = []
        dropped = []
        groups = []
        # self.groups is a dictionary with a list of diseases per group
        for group_name, diseases_in_group in self.groups.items():
            # to balance evaluation over groups
            number_of_diseases_in_group = len(diseases_in_group)
            evals_per_disease = evals_per_group/number_of_diseases_in_group
            for dis in diseases_in_group:
                # get weights of the genes in the disease and extra genes if the mode allows it (PRINCE).
                disgens_weight, extragens_weight = self.get_nodes_weights(bipartite_network, dis)
                disease_genes = list(set(list(disgens_weight.keys())))

                # to avoid repeted evaluations
                evals_in_this_disease = int(np.min([lpo.get_n_splits(disease_genes), evals_per_disease]))
                # select subset of evaluations
                i = 0
                for seeds_ix, dropped_ix in lpo.split(disease_genes):
                    if i > evals_in_this_disease:  # to avoid adding too many evals
                        break

                    # could be more than 1 gene if leave many out
                    dropped_genes = [disease_genes[drop_ix] for drop_ix in dropped_ix]
                    # if all the genes to drop are linkageables
                    if all([gene_linkage.is_gene_available(gene) for gene in dropped_genes]):
                        # add the disease genes to the seeds and also the extra genes given by PRINCE method except for
                        # those genes already in the dropped set.

                        seeds_genes = [disease_genes[seed] for seed in seeds_ix]
                        seeds.append(seeds_genes + [gen for gen in extragens_weight.keys() if gen not in dropped_genes])
                        seeds_weight.append([disgens_weight[gen] for gen in seeds_genes] +
                                            [weight for gen, weight in extragens_weight.items() if
                                             gen not in dropped_genes])

                        dropped.append(dropped_genes)
                        groups.append(group_name)
                        i += 1

        # force the cases to be exactly the number of evals.
        chosen_ixes = np.random.choice(len(seeds_weight), int(np.min([self.max_evals, len(seeds_weight)])), replace=False)
        print("Number of test cases to perform: ", len(chosen_ixes))
        seeds_weight = [seeds_weight[chosen_ix] for chosen_ix in chosen_ixes]
        seeds = [seeds[chosen_ix] for chosen_ix in chosen_ixes]
        dropped = [dropped[chosen_ix] for chosen_ix in chosen_ixes]
        groups = [groups[chosen_ix] for chosen_ix in chosen_ixes]

        return seeds, seeds_weight, dropped, groups
Example #6
0
def leave_pout():
    """
    留p发 就是循环数据集次数N 每次留取p个数据作为测试数据集,(N-p)数据为训练数据集。
    :return:
    """
    X = np.array([[1, 2, 3, 4], [11, 12, 13, 14], [21, 22, 23, 24],
                  [31, 32, 33, 34]])
    y = np.array([1, 5, 0, 0])
    # 当p=1的时候和LeaveOneOut一样
    leave_pout = LeavePOut(p=2)
    logger.info(leave_pout.get_n_splits(X))
    for train_index, test_index in leave_pout.split(X, y):
        logger.info("Train Index:\n %s" % train_index)
        logger.info("Test Index:\n %s" % test_index)
        logger.info("X_train:\n %s" % X[train_index])
        logger.info("X_test:\n %s" % X[test_index])
        logger.info("y_train:\n %s" % y[train_index])
        logger.info("y_test:\n %s" % y[test_index])
        logger.info("\n\n")
Example #7
0
import numpy as np
from sklearn.model_selection import LeavePOut
'''
يترك عدد عناصر معين تقوم بتحديده للاختبار و الباقي للتدريب

'''
X = np.array([[1, 11], [2, 12], [3, 13], [4, 14], [5, 15], [6, 16], [7, 17],
              [8, 18], [9, 19], [10, 20]])

y = np.array([[1], [0], [1], [1], [0], [1], [1], [0], [0], [1]])

lpo = LeavePOut(4)
print('number of splits = ', str(lpo.get_n_splits(X)))

print("----------------------------------------------------------")

folds = lpo.split(X)

for train_index, test_index in folds:
    print('train : ', train_index, ' test : ', test_index)
    print('X_train \n ', X[train_index])
    print('X_test  \n ', X[test_index])
    print('y_train \n ', y[train_index])
    print('y_test  \n ', y[test_index])
    print("----------------------------------------------------------")
    subj_permuts = joblib.load(permutations_path)
    if subj_ind == 0:
        allsubj_permuts = subj_permuts
    else:
        shift = allsubj_permuts.shape[1]
        allsubj_permuts = np.hstack([allsubj_permuts,shift+subj_permuts])

print(allsubj_permuts.shape)

n_permuts = allsubj_permuts.shape[0]
"""

modality_list = ['A', 'V']

lnso_cv = LeavePOut(n_leftout_subjects)
n_splits = lnso_cv.get_n_splits(subjects_list, subjects_list, subjects_list)

print(n_splits)

allsplits_xval_inds = []

for split_ind, (trainsubj_inds, testsubj_inds) in enumerate(
        lnso_cv.split(subjects_list, subjects_list, subjects_list)):
    # initialize struct for storing all train and test inds for this split
    xval_inds = dict()
    for modality in modality_list:
        xval_inds['train_{}'.format(modality)] = []
        xval_inds['test_{}'.format(modality)] = []

    shift_ind = 0
Example #9
0
# ### Leave-p-out
# 
# Este un tipo de validación en la que no se define un porcentaje para el conjunto de validación, sino un número $p$ de muestras para validación y las restantes $n-p$ quedan para el entrenamiento. En este caso el número de repeticiones estará definido por el número de combinaciones posibles.

# In[19]:


X=np.random.randn(10,2)


# In[20]:


from sklearn.model_selection import LeavePOut
lpo = LeavePOut(2)
lpo.get_n_splits(X)


# Que corresponde al número de combinaciones posibles N combinado 2.

# In[21]:


from itertools import combinations 
len(list(combinations(range(X.shape[0]), 2)))

LeavePOut(p=1) es igual a LeaveOneOut()
# ## Metodología de validación para problemas desbalanceados
# 
# 
# Si tenemos problemas desbalanceados y usamos una metodología de validación estándar, podemos tener problemas porque la clase minoritaria queda muy mal representada en el conjunto de training.
Example #10
0
import numpy as np
from sklearn.model_selection import LeavePOut
# ----------------------------------------------------
'''

class sklearn.model_selection.LeavePOut(p)

'''
# ----------------------------------------------------

X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
y = np.array([1, 2, 3, 4])

#lpo = LeavePOut(1)
#lpo = LeavePOut(2)
lpo = LeavePOut(3)
print(lpo.get_n_splits(X))

print(lpo)
lpo = LeavePOut(p=2)

for train_index, test_index in lpo.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    print('X_train \n', X_train)
    print('X_test \n', X_test)
    print('y_train \n', y_train)
    print('y_test \n', y_test)
    print('*********************')