コード例 #1
0
def flip(blockades, model_file):
    """
    Flips blockades
    """
    blockade_model = load_model(model_file)
    identifier = Identifier(blockade_model)

    peptide = blockades[0].peptide
    clusters = sp.preprocess_blockades(blockades, cluster_size=1,
                                       min_dwell=0.0, max_dwell=1000)

    print("Num\tFwd_dst\tRev_dst\t\tNeeds_flip", file=sys.stderr)

    num_reversed = 0
    new_blockades = []
    for num, cluster in enumerate(clusters):
        discr_signal = sp.discretize(cluster.consensus, len(peptide))

        fwd_dist = identifier.signal_protein_distance(discr_signal, peptide)
        rev_dist = identifier.signal_protein_distance(discr_signal,
                                                      peptide[::-1])
        print("{0}\t{1:5.2f}\t{2:5.2f}\t\t{3}"
                .format(num + 1, fwd_dist, rev_dist, fwd_dist > rev_dist),
                file=sys.stderr)

        new_blockades.append(cluster.blockades[0])
        if fwd_dist > rev_dist:
            new_blockades[-1].eventTrace = new_blockades[-1].eventTrace[::-1]
            num_reversed += 1

    print("Reversed:", num_reversed, "of", len(blockades), file=sys.stderr)
    return new_blockades
コード例 #2
0
def pvalues_test(blockades_file, cluster_size, blockade_model, db_file,
                 single_blockades, ostream):
    """
    Performs protein identification and report results
    """
    RANDOM_DB_SIZE = 10000
    identifier = Identifier(blockade_model)

    blockades = read_mat(blockades_file)
    true_peptide = blockades[0].peptide
    if db_file is None:
        identifier.random_database(true_peptide, RANDOM_DB_SIZE)
        target_id = "target"
        db_len = RANDOM_DB_SIZE
    else:
        database, target_id = _make_database(db_file, true_peptide)
        identifier.set_database(database)
        db_len = len(database)

    clusters = sp.preprocess_blockades(blockades,
                                       cluster_size=cluster_size,
                                       min_dwell=0.5,
                                       max_dwell=20)

    ostream.write("\nNo\tSize\tBest_id\t\tBest_dst\tTrg_dst\t\tTrg_rank\t"
                  "Trg_pval\n")
    p_values = []
    ranks = []
    for num, cluster in enumerate(clusters):
        db_ranking = identifier.rank_db_proteins(cluster.consensus)

        target_rank = None
        target_dist = None
        for rank, (prot_id, prot_dist) in enumerate(db_ranking):
            if prot_id == target_id:
                target_rank = rank
                target_dist = prot_dist
        p_value = float(target_rank) / db_len

        p_values.append(p_value)
        ranks.append(target_rank)

        ostream.write(
            "{0}\t{1}\t{2:10}\t{3:5.2f}\t\t{4:5.2f}\t\t{5}\t\t{6:6.4}\n".
            format(num + 1, len(cluster.blockades), db_ranking[0][0],
                   db_ranking[0][1], target_dist, target_rank + 1, p_value))
        if single_blockades:
            _detalize_cluster(identifier, cluster, db_ranking[0][0], target_id,
                              ostream)

    ostream.write("\nMedian p-value: {0:7.4f}\n".format(np.median(p_values)))
    ostream.write("Median target rank: {0:d}\n".format(int(np.median(ranks))))

    return np.median(p_values), int(np.median(ranks))
コード例 #3
0
def pvalues_test(blockades_file, cluster_size, blockade_model, db_file,
                 single_blockades, ostream):
    """
    Performs protein identification and report results
    """
    RANDOM_DB_SIZE = 10000
    identifier = Identifier(blockade_model)

    blockades = read_mat(blockades_file)
    true_peptide = blockades[0].peptide
    if db_file is None:
        identifier.random_database(true_peptide, RANDOM_DB_SIZE)
        target_id = "target"
        db_len = RANDOM_DB_SIZE
    else:
        database, target_id = _make_database(db_file, true_peptide)
        identifier.set_database(database)
        db_len = len(database)

    clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size,
                                       min_dwell=0.5, max_dwell=20)

    ostream.write("\nNo\tSize\tBest_id\t\tBest_dst\tTrg_dst\t\tTrg_rank\t"
                     "Trg_pval\n")
    p_values = []
    ranks = []
    for num, cluster in enumerate(clusters):
        db_ranking = identifier.rank_db_proteins(cluster.consensus)

        target_rank = None
        target_dist = None
        for rank, (prot_id, prot_dist) in enumerate(db_ranking):
            if prot_id == target_id:
                target_rank = rank
                target_dist = prot_dist
        p_value = float(target_rank) / db_len

        p_values.append(p_value)
        ranks.append(target_rank)

        ostream.write("{0}\t{1}\t{2:10}\t{3:5.2f}\t\t{4:5.2f}\t\t{5}\t\t{6:6.4}\n"
               .format(num + 1, len(cluster.blockades), db_ranking[0][0],
                       db_ranking[0][1], target_dist, target_rank + 1, p_value))
        if single_blockades:
            _detalize_cluster(identifier, cluster, db_ranking[0][0],
                              target_id, ostream)

    ostream.write("\nMedian p-value: {0:7.4f}\n".format(np.median(p_values)))
    ostream.write("Median target rank: {0:d}\n".format(int(np.median(ranks))))

    return np.median(p_values), int(np.median(ranks))