def data_objects_fusion(obj1, obj2): # Initialize data with parameters form top obj = Data(splits=config['SPLITS'], holdout=config['HOLDOUT']) # simply merge all obj.Xs_train = merge_nparr(obj1.Xs_train, obj2.Xs_train) obj.Xs_val = merge_nparr(obj1.Xs_val, obj2.Xs_val) obj.ys_train = merge_nparr(obj1.ys_train, obj2.ys_train) obj.ys_val = merge_nparr(obj1.ys_val, obj2.ys_val) if config['HOLDOUT']: obj.X_train_holdout = np.concatenate( (obj1.X_train_holdout, obj2.X_train_holdout), axis=None) obj.X_test_holdout = np.concatenate( (obj1.X_test_holdout, obj2.X_test_holdout), axis=None) obj.y_train_holdout = np.concatenate( (obj1.y_train_holdout, obj2.y_train_holdout), axis=None) obj.y_test_holdout = np.concatenate( (obj1.y_test_holdout, obj2.y_test_holdout), axis=None) return obj
def kfold_holdout(X, y, groups, splits, holdout): group_kfold = GroupKFold(n_splits=splits) group_kfold.get_n_splits(X, y, groups) d_obj = Data(splits=splits, holdout=holdout) for train_index, test_index in group_kfold.split(X, y, groups): # inplace shuffeling shuffle(train_index) shuffle(test_index) # generate folds if holdout == True: if d_obj.X_test_holdout is None: # first folds are for test only d_obj.X_train_holdout, d_obj.X_test_holdout = X[ train_index], X[test_index] d_obj.y_train_holdout, d_obj.y_test_holdout = y[ train_index], y[test_index] store_test_index = test_index else: # holdout idx if re-occuring in train train_index = [ x for x in train_index if x not in store_test_index ] d_obj.Xs_train.append(X[train_index]) d_obj.Xs_val.append(X[test_index]) d_obj.ys_train.append(y[train_index]) d_obj.ys_val.append(y[test_index]) elif holdout == False: d_obj.Xs_train.append(X[train_index]) d_obj.Xs_val.append(X[test_index]) d_obj.ys_train.append(y[train_index]) d_obj.ys_val.append(y[test_index]) else: print("Something is wrong here") exit() return d_obj