def rgf_cv(X, y, n_folds, cv_params): L2 = cv_params['L2'] sL2 = cv_params['sL2'] reg_depth = cv_params['reg_depth'] algorithm = cv_params['algorithm'] loss = cv_params['loss'] max_leaf_forest = cv_params['max_leaf_forest'] test_interval = cv_params['test_interval'] simdir = cv_params['simdir'] n, p = X.shape # create cv number of files for cross validation kf = cross_validation.KFold(n, n_folds=n_folds, shuffle=True, random_state=1234) ll = [] ncv = 0 for train_index, test_index in kf: simdir_cv = simdir + '/' + str(ncv) X_train, X_test = X[train_index, :], X[test_index, :] y_train, y_test = y[train_index], y[test_index] mkdir_p(simdir+'/data') np.savetxt(simdir + '/data/y_test-%d' % ncv, y_test) rg = RGFC(simdir=simdir_cv, algorithm=algorithm, loss=loss, reg_L2=L2, reg_sL2=sL2, reg_depth=reg_depth, max_leaf_forest=max_leaf_forest, test_interval=test_interval, n_labels=n_labels, n_jobs=-1) rg.fit(X_train, y_train) proba = rg.predict_proba(X_test) ll.append(ru.calc_ll_from_proba(proba, y_test)) ncv += 1 ll = np.array(ll) return ll.mean(), ll.std()