def main(): parser = argparse.ArgumentParser(description="Nano-Align protein " "identification", formatter_class= \ argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("blockades_file", metavar="blockades_file", help="path to blockades file (in mat format)") parser.add_argument("model_file", metavar="model_file", help="path to trained model file ('-' for MV model)") parser.add_argument("-c", "--cluster-size", dest="cluster_size", type=int, default=10, help="blockades cluster size") parser.add_argument("-d", "--database", dest="database", metavar="database", help="database file (in FASTA " "format). If not set, random database is generated", default=None) parser.add_argument("-s", "--single-blockades", action="store_true", default=False, dest="single_blockades", help="print statistics for each blockade in a cluster") parser.add_argument("--version", action="version", version=__version__) args = parser.parse_args() model = load_model(args.model_file) pvalues_test(args.blockades_file, args.cluster_size, model, args.database, args.single_blockades, sys.stderr) return 0
def main(): parser = argparse.ArgumentParser(description="Nano-Align protein " "identification", formatter_class= \ argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("nanospectra_file", metavar="nanospectra_file", help="path to nanospectra file (in mat format)") parser.add_argument("model_file", metavar="model_file", help="path to trained model file ('-' for MV model)") parser.add_argument("-c", "--cluster-size", dest="cluster_size", type=int, default=10, help="blockades cluster size") parser.add_argument("-d", "--database", dest="database", metavar="database", help="database file (in FASTA " "format). If not set, random database is generated", default=None) parser.add_argument( "-s", "--single-nanospectra", action="store_true", default=False, dest="single_nanospectra", help="print statistics for each nanospectra in a cluster") parser.add_argument("--version", action="version", version=__version__) args = parser.parse_args() model = load_model(args.model_file) pvalues_test(args.nanospectra_file, args.cluster_size, model, args.database, args.single_nanospectra, sys.stderr) return 0
def full_identify(blockades_file, model_file, db_file): """ Computes pvalues """ blockade_model = load_model(model_file) #svr_model = SvrBlockade() #svr_model.load_from_pickle(svr_file) boxes = [] for avg in xrange(1, 21): p_values = [] for _ in xrange(avg): p_value, rank = pvalues_test(blockades_file, avg, blockade_model, db_file, False, open(os.devnull, "w")) p_values.append(p_value) boxes.append(p_values) print(avg, np.median(p_values), file=sys.stderr) plot_pvalues(boxes)
def _cross_validate(train_mats, cv_mats, db_file, out_file): """ Choosing the best parameters through cross-validation """ CLUSTER_SIZE = 10 eps_vec = [0.01, 0.001, 0.0001, 0.00001] C_vec = [1, 10, 100, 1000, 10000, 100000] gamma_vec = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1] best_score = sys.maxint best_params = None print("C\tGam\tEps\tScore", file=sys.stderr) for C in C_vec: for gamma in gamma_vec: for eps in eps_vec: temp_model = _train_svr(train_mats, C, gamma, eps) scores = [] for cv_mat in cv_mats: pval, rank = pvalues_test(cv_mat, CLUSTER_SIZE, temp_model, db_file, False, open(os.devnull, "w")) scores.append(rank) score = np.mean(scores) print("{0}\t{1}\t{2}\t{3}".format(C, gamma, eps, score), file=sys.stderr) if score < best_score: best_score = score best_params = (C, gamma, eps) print(*best_params, file=sys.stderr) best_model = _train_svr(train_mats, *best_params) store_model(best_model, out_file)