コード例 #1
0
ファイル: identify.py プロジェクト: fenderglass/Nano-Align
def main():
    parser = argparse.ArgumentParser(description="Nano-Align protein "
                                     "identification", formatter_class= \
                                     argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("blockades_file", metavar="blockades_file",
                        help="path to blockades file (in mat format)")
    parser.add_argument("model_file", metavar="model_file",
                        help="path to trained model file ('-' for MV model)")
    parser.add_argument("-c", "--cluster-size", dest="cluster_size", type=int,
                        default=10, help="blockades cluster size")
    parser.add_argument("-d", "--database", dest="database",
                        metavar="database", help="database file (in FASTA "
                        "format). If not set, random database is generated",
                        default=None)
    parser.add_argument("-s", "--single-blockades", action="store_true",
                        default=False, dest="single_blockades",
                        help="print statistics for each blockade in a cluster")

    parser.add_argument("--version", action="version", version=__version__)
    args = parser.parse_args()

    model = load_model(args.model_file)
    pvalues_test(args.blockades_file, args.cluster_size, model,
                 args.database, args.single_blockades, sys.stderr)
    return 0
コード例 #2
0
ファイル: identify.py プロジェクト: fenderglass/Nano-Align
def main():
    parser = argparse.ArgumentParser(description="Nano-Align protein "
                                     "identification", formatter_class= \
                                     argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument("nanospectra_file",
                        metavar="nanospectra_file",
                        help="path to nanospectra file (in mat format)")
    parser.add_argument("model_file",
                        metavar="model_file",
                        help="path to trained model file ('-' for MV model)")
    parser.add_argument("-c",
                        "--cluster-size",
                        dest="cluster_size",
                        type=int,
                        default=10,
                        help="blockades cluster size")
    parser.add_argument("-d",
                        "--database",
                        dest="database",
                        metavar="database",
                        help="database file (in FASTA "
                        "format). If not set, random database is generated",
                        default=None)
    parser.add_argument(
        "-s",
        "--single-nanospectra",
        action="store_true",
        default=False,
        dest="single_nanospectra",
        help="print statistics for each nanospectra in a cluster")

    parser.add_argument("--version", action="version", version=__version__)
    args = parser.parse_args()

    model = load_model(args.model_file)
    pvalues_test(args.nanospectra_file, args.cluster_size, model,
                 args.database, args.single_nanospectra, sys.stderr)
    return 0
コード例 #3
0
def full_identify(blockades_file, model_file, db_file):
    """
    Computes pvalues
    """
    blockade_model = load_model(model_file)
    #svr_model = SvrBlockade()
    #svr_model.load_from_pickle(svr_file)

    boxes = []
    for avg in xrange(1, 21):
        p_values = []
        for _ in xrange(avg):
            p_value, rank = pvalues_test(blockades_file, avg, blockade_model,
                                         db_file, False, open(os.devnull, "w"))
            p_values.append(p_value)

        boxes.append(p_values)
        print(avg, np.median(p_values), file=sys.stderr)

    plot_pvalues(boxes)
コード例 #4
0
def full_identify(blockades_file, model_file, db_file):
    """
    Computes pvalues
    """
    blockade_model = load_model(model_file)
    #svr_model = SvrBlockade()
    #svr_model.load_from_pickle(svr_file)

    boxes = []
    for avg in xrange(1, 21):
        p_values = []
        for _ in xrange(avg):
            p_value, rank = pvalues_test(blockades_file, avg, blockade_model, db_file,
                                         False, open(os.devnull, "w"))
            p_values.append(p_value)

        boxes.append(p_values)
        print(avg, np.median(p_values), file=sys.stderr)

    plot_pvalues(boxes)
コード例 #5
0
def _cross_validate(train_mats, cv_mats, db_file, out_file):
    """
    Choosing the best parameters through cross-validation
    """
    CLUSTER_SIZE = 10

    eps_vec = [0.01, 0.001, 0.0001, 0.00001]
    C_vec = [1, 10, 100, 1000, 10000, 100000]
    gamma_vec = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]

    best_score = sys.maxint
    best_params = None

    print("C\tGam\tEps\tScore", file=sys.stderr)
    for C in C_vec:
        for gamma in gamma_vec:
            for eps in eps_vec:
                temp_model = _train_svr(train_mats, C, gamma, eps)

                scores = []
                for cv_mat in cv_mats:
                    pval, rank = pvalues_test(cv_mat, CLUSTER_SIZE, temp_model,
                                              db_file, False,
                                              open(os.devnull, "w"))
                    scores.append(rank)
                score = np.mean(scores)

                print("{0}\t{1}\t{2}\t{3}".format(C, gamma, eps, score),
                      file=sys.stderr)
                if score < best_score:
                    best_score = score
                    best_params = (C, gamma, eps)

    print(*best_params, file=sys.stderr)
    best_model = _train_svr(train_mats, *best_params)
    store_model(best_model, out_file)
コード例 #6
0
def _cross_validate(train_mats, cv_mats, db_file, out_file):
    """
    Choosing the best parameters through cross-validation
    """
    CLUSTER_SIZE = 10

    eps_vec = [0.01, 0.001, 0.0001, 0.00001]
    C_vec = [1, 10, 100, 1000, 10000, 100000]
    gamma_vec = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]

    best_score = sys.maxint
    best_params = None

    print("C\tGam\tEps\tScore", file=sys.stderr)
    for C in C_vec:
        for gamma in gamma_vec:
            for eps in eps_vec:
                temp_model = _train_svr(train_mats, C, gamma, eps)

                scores = []
                for cv_mat in cv_mats:
                    pval, rank = pvalues_test(cv_mat, CLUSTER_SIZE, temp_model,
                                              db_file, False,
                                              open(os.devnull, "w"))
                    scores.append(rank)
                score = np.mean(scores)

                print("{0}\t{1}\t{2}\t{3}".format(C, gamma, eps, score),
                      file=sys.stderr)
                if score < best_score:
                    best_score = score
                    best_params = (C, gamma, eps)

    print(*best_params, file=sys.stderr)
    best_model = _train_svr(train_mats, *best_params)
    store_model(best_model, out_file)