def _train_svr(mat_files, out_file, C=1000, gamma=0.001, epsilon=0.01): """ Trains SVR with the given parameters """ peptides, signals = _get_peptides_signals(mat_files) model = SvrBlockade() model.train(peptides, signals, C, gamma, epsilon) store_model(model, out_file)
def _train_random_forest(mat_files, out_file): """ Trains Random Forest """ peptides, signals = _get_peptides_signals(mat_files) model = RandomForestBlockade() model.train(peptides, signals) store_model(model, out_file)
def _train_svr(mat_files, out_file, C=1000, gamma=0.001, epsilon=0.01): """ Trains SVR with the given parameters """ peptides, signals = _get_peptides_signals(mat_files) model = SvrBlockade() model.train(peptides, signals, C, gamma, epsilon) store_model(model, out_file)
def _train_random_forest(mat_files, out_file): """ Trains Random Forest """ peptides, signals = _get_peptides_signals(mat_files) model = RandomForestBlockade() model.train(peptides, signals) store_model(model, out_file)
def _cross_validate(train_mats, cv_mats, db_file, out_file): """ Choosing the best parameters through cross-validation """ CLUSTER_SIZE = 10 eps_vec = [0.01, 0.001, 0.0001, 0.00001] C_vec = [1, 10, 100, 1000, 10000, 100000] gamma_vec = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1] best_score = sys.maxint best_params = None print("C\tGam\tEps\tScore", file=sys.stderr) for C in C_vec: for gamma in gamma_vec: for eps in eps_vec: temp_model = _train_svr(train_mats, C, gamma, eps) scores = [] for cv_mat in cv_mats: pval, rank = pvalues_test(cv_mat, CLUSTER_SIZE, temp_model, db_file, False, open(os.devnull, "w")) scores.append(rank) score = np.mean(scores) print("{0}\t{1}\t{2}\t{3}".format(C, gamma, eps, score), file=sys.stderr) if score < best_score: best_score = score best_params = (C, gamma, eps) print(*best_params, file=sys.stderr) best_model = _train_svr(train_mats, *best_params) store_model(best_model, out_file)
def _cross_validate(train_mats, cv_mats, db_file, out_file): """ Choosing the best parameters through cross-validation """ CLUSTER_SIZE = 10 eps_vec = [0.01, 0.001, 0.0001, 0.00001] C_vec = [1, 10, 100, 1000, 10000, 100000] gamma_vec = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1] best_score = sys.maxint best_params = None print("C\tGam\tEps\tScore", file=sys.stderr) for C in C_vec: for gamma in gamma_vec: for eps in eps_vec: temp_model = _train_svr(train_mats, C, gamma, eps) scores = [] for cv_mat in cv_mats: pval, rank = pvalues_test(cv_mat, CLUSTER_SIZE, temp_model, db_file, False, open(os.devnull, "w")) scores.append(rank) score = np.mean(scores) print("{0}\t{1}\t{2}\t{3}".format(C, gamma, eps, score), file=sys.stderr) if score < best_score: best_score = score best_params = (C, gamma, eps) print(*best_params, file=sys.stderr) best_model = _train_svr(train_mats, *best_params) store_model(best_model, out_file)