def flip(blockades, model_file): """ Flips blockades """ blockade_model = load_model(model_file) identifier = Identifier(blockade_model) peptide = blockades[0].peptide clusters = sp.preprocess_blockades(blockades, cluster_size=1, min_dwell=0.0, max_dwell=1000) print("Num\tFwd_dst\tRev_dst\t\tNeeds_flip", file=sys.stderr) num_reversed = 0 new_blockades = [] for num, cluster in enumerate(clusters): discr_signal = sp.discretize(cluster.consensus, len(peptide)) fwd_dist = identifier.signal_protein_distance(discr_signal, peptide) rev_dist = identifier.signal_protein_distance(discr_signal, peptide[::-1]) print("{0}\t{1:5.2f}\t{2:5.2f}\t\t{3}" .format(num + 1, fwd_dist, rev_dist, fwd_dist > rev_dist), file=sys.stderr) new_blockades.append(cluster.blockades[0]) if fwd_dist > rev_dist: new_blockades[-1].eventTrace = new_blockades[-1].eventTrace[::-1] num_reversed += 1 print("Reversed:", num_reversed, "of", len(blockades), file=sys.stderr) return new_blockades
def pvalues_test(blockades_file, cluster_size, blockade_model, db_file, single_blockades, ostream): """ Performs protein identification and report results """ RANDOM_DB_SIZE = 10000 identifier = Identifier(blockade_model) blockades = read_mat(blockades_file) true_peptide = blockades[0].peptide if db_file is None: identifier.random_database(true_peptide, RANDOM_DB_SIZE) target_id = "target" db_len = RANDOM_DB_SIZE else: database, target_id = _make_database(db_file, true_peptide) identifier.set_database(database) db_len = len(database) clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size, min_dwell=0.5, max_dwell=20) ostream.write("\nNo\tSize\tBest_id\t\tBest_dst\tTrg_dst\t\tTrg_rank\t" "Trg_pval\n") p_values = [] ranks = [] for num, cluster in enumerate(clusters): db_ranking = identifier.rank_db_proteins(cluster.consensus) target_rank = None target_dist = None for rank, (prot_id, prot_dist) in enumerate(db_ranking): if prot_id == target_id: target_rank = rank target_dist = prot_dist p_value = float(target_rank) / db_len p_values.append(p_value) ranks.append(target_rank) ostream.write( "{0}\t{1}\t{2:10}\t{3:5.2f}\t\t{4:5.2f}\t\t{5}\t\t{6:6.4}\n". format(num + 1, len(cluster.blockades), db_ranking[0][0], db_ranking[0][1], target_dist, target_rank + 1, p_value)) if single_blockades: _detalize_cluster(identifier, cluster, db_ranking[0][0], target_id, ostream) ostream.write("\nMedian p-value: {0:7.4f}\n".format(np.median(p_values))) ostream.write("Median target rank: {0:d}\n".format(int(np.median(ranks)))) return np.median(p_values), int(np.median(ranks))
def pvalues_test(blockades_file, cluster_size, blockade_model, db_file, single_blockades, ostream): """ Performs protein identification and report results """ RANDOM_DB_SIZE = 10000 identifier = Identifier(blockade_model) blockades = read_mat(blockades_file) true_peptide = blockades[0].peptide if db_file is None: identifier.random_database(true_peptide, RANDOM_DB_SIZE) target_id = "target" db_len = RANDOM_DB_SIZE else: database, target_id = _make_database(db_file, true_peptide) identifier.set_database(database) db_len = len(database) clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size, min_dwell=0.5, max_dwell=20) ostream.write("\nNo\tSize\tBest_id\t\tBest_dst\tTrg_dst\t\tTrg_rank\t" "Trg_pval\n") p_values = [] ranks = [] for num, cluster in enumerate(clusters): db_ranking = identifier.rank_db_proteins(cluster.consensus) target_rank = None target_dist = None for rank, (prot_id, prot_dist) in enumerate(db_ranking): if prot_id == target_id: target_rank = rank target_dist = prot_dist p_value = float(target_rank) / db_len p_values.append(p_value) ranks.append(target_rank) ostream.write("{0}\t{1}\t{2:10}\t{3:5.2f}\t\t{4:5.2f}\t\t{5}\t\t{6:6.4}\n" .format(num + 1, len(cluster.blockades), db_ranking[0][0], db_ranking[0][1], target_dist, target_rank + 1, p_value)) if single_blockades: _detalize_cluster(identifier, cluster, db_ranking[0][0], target_id, ostream) ostream.write("\nMedian p-value: {0:7.4f}\n".format(np.median(p_values))) ostream.write("Median target rank: {0:d}\n".format(int(np.median(ranks)))) return np.median(p_values), int(np.median(ranks))