def get_bias(blockades_file, model_file, cluster_size): """ Gets AA-specific bias between the empirical and theoretical signals """ WINDOW = 4 blockades = read_mat(blockades_file) clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size, min_dwell=0.5, max_dwell=20) peptide = clusters[0].blockades[0].peptide blockade_model = load_model(model_file) errors = defaultdict(list) model_signal = blockade_model.peptide_signal(peptide) for cluster in clusters: discr_signal = sp.discretize(cluster.consensus, len(peptide)) flanked_peptide = ("-" * (WINDOW - 1) + peptide + "-" * (WINDOW - 1)) num_peaks = len(peptide) + WINDOW - 1 for i in xrange(0, num_peaks): kmer = flanked_peptide[i : i + WINDOW] if "-" not in kmer: for aa in kmer: errors[aa].append(discr_signal[i] - model_signal[i]) return errors
def get_bias(blockades_file, model_file, cluster_size): """ Gets AA-specific bias between the empirical and theoretical signals """ WINDOW = 4 blockades = read_mat(blockades_file) clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size, min_dwell=0.5, max_dwell=20) peptide = clusters[0].blockades[0].peptide blockade_model = load_model(model_file) errors = defaultdict(list) model_signal = blockade_model.peptide_signal(peptide) for cluster in clusters: discr_signal = sp.discretize(cluster.consensus, len(peptide)) flanked_peptide = ("-" * (WINDOW - 1) + peptide + "-" * (WINDOW - 1)) num_peaks = len(peptide) + WINDOW - 1 for i in xrange(0, num_peaks): kmer = flanked_peptide[i:i + WINDOW] if "-" not in kmer: for aa in kmer: errors[aa].append(discr_signal[i] - model_signal[i]) return errors
def flip(blockades, model_file): """ Flips blockades """ blockade_model = load_model(model_file) identifier = Identifier(blockade_model) peptide = blockades[0].peptide clusters = sp.preprocess_blockades(blockades, cluster_size=1, min_dwell=0.0, max_dwell=1000) print("Num\tFwd_dst\tRev_dst\t\tNeeds_flip", file=sys.stderr) num_reversed = 0 new_blockades = [] for num, cluster in enumerate(clusters): discr_signal = sp.discretize(cluster.consensus, len(peptide)) fwd_dist = identifier.signal_protein_distance(discr_signal, peptide) rev_dist = identifier.signal_protein_distance(discr_signal, peptide[::-1]) print("{0}\t{1:5.2f}\t{2:5.2f}\t\t{3}" .format(num + 1, fwd_dist, rev_dist, fwd_dist > rev_dist), file=sys.stderr) new_blockades.append(cluster.blockades[0]) if fwd_dist > rev_dist: new_blockades[-1].eventTrace = new_blockades[-1].eventTrace[::-1] num_reversed += 1 print("Reversed:", num_reversed, "of", len(blockades), file=sys.stderr) return new_blockades
def main(): parser = argparse.ArgumentParser(description="Nano-Align protein " "identification", formatter_class= \ argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("blockades_file", metavar="blockades_file", help="path to blockades file (in mat format)") parser.add_argument("model_file", metavar="model_file", help="path to trained model file ('-' for MV model)") parser.add_argument("-c", "--cluster-size", dest="cluster_size", type=int, default=10, help="blockades cluster size") parser.add_argument("-d", "--database", dest="database", metavar="database", help="database file (in FASTA " "format). If not set, random database is generated", default=None) parser.add_argument("-s", "--single-blockades", action="store_true", default=False, dest="single_blockades", help="print statistics for each blockade in a cluster") parser.add_argument("--version", action="version", version=__version__) args = parser.parse_args() model = load_model(args.model_file) pvalues_test(args.blockades_file, args.cluster_size, model, args.database, args.single_blockades, sys.stderr) return 0
def full_identify(blockades_file, model_file, db_file): """ Computes pvalues """ blockade_model = load_model(model_file) #svr_model = SvrBlockade() #svr_model.load_from_pickle(svr_file) boxes = [] for avg in xrange(1, 21): p_values = [] for _ in xrange(avg): p_value, rank = pvalues_test(blockades_file, avg, blockade_model, db_file, False, open(os.devnull, "w")) p_values.append(p_value) boxes.append(p_values) print(avg, np.median(p_values), file=sys.stderr) plot_pvalues(boxes)
def main(): parser = argparse.ArgumentParser(description="Nano-Align protein " "identification", formatter_class= \ argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("nanospectra_file", metavar="nanospectra_file", help="path to nanospectra file (in mat format)") parser.add_argument("model_file", metavar="model_file", help="path to trained model file ('-' for MV model)") parser.add_argument("-c", "--cluster-size", dest="cluster_size", type=int, default=10, help="blockades cluster size") parser.add_argument("-d", "--database", dest="database", metavar="database", help="database file (in FASTA " "format). If not set, random database is generated", default=None) parser.add_argument( "-s", "--single-nanospectra", action="store_true", default=False, dest="single_nanospectra", help="print statistics for each nanospectra in a cluster") parser.add_argument("--version", action="version", version=__version__) args = parser.parse_args() model = load_model(args.model_file) pvalues_test(args.nanospectra_file, args.cluster_size, model, args.database, args.single_nanospectra, sys.stderr) return 0
def plot_blockades(blockades_file, model_files, cluster_size, show_text): """ Pretty plotting """ WINDOW = 4 blockades = read_mat(blockades_file) clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size, min_dwell=0.5, max_dwell=20) peptide = clusters[0].blockades[0].peptide models = [] for model_file in model_files: models.append(load_model(model_file)) #svr_signal = model.peptide_signal(peptide) #mv_signal = MvBlockade().peptide_signal(peptide) for cluster in clusters: #cluster.consensus = sp.discretize(cluster.consensus, len(peptide)) signal_length = len(cluster.consensus) x_axis = np.linspace(0, len(peptide) + 1, signal_length) matplotlib.rcParams.update({"font.size": 16}) fig = plt.subplot() fig.spines["right"].set_visible(False) fig.spines["top"].set_visible(False) fig.get_xaxis().tick_bottom() fig.get_yaxis().tick_left() fig.set_xlim(0, len(peptide) + 1) fig.set_xlabel("Putative AA position") fig.set_ylabel("Normalized signal") fig.plot(x_axis, cluster.consensus, label="Empirical signal", linewidth=1.5) ################ for model in models: model_signal = model.peptide_signal(peptide) model_grid = [i * signal_length / (len(model_signal) - 1) for i in xrange(len(model_signal))] interp_fun = interp1d(model_grid, model_signal, kind="linear") model_interp = interp_fun(xrange(signal_length)) corr = 1 - distance.correlation(cluster.consensus, model_interp) print("{0} correlation: {1:5.2f}\t".format(model.name, corr), file=sys.stderr) fig.plot(x_axis, model_interp, label=model.name, linewidth=2) ############## legend = fig.legend(loc="lower left", frameon=False) for label in legend.get_lines(): label.set_linewidth(2) for label in legend.get_texts(): label.set_fontsize(16) if show_text: #adding AAs text: event_mean = np.mean(cluster.consensus) acids_pos = _get_aa_positions(peptide, WINDOW, x_axis[-1]) for i, aa in enumerate(peptide): fig.text(acids_pos[i], event_mean - 2, aa, fontsize=16) plt.show()
def plot_blockades(blockades_file, model_files, cluster_size, show_text): """ Pretty plotting """ WINDOW = 4 blockades = read_mat(blockades_file) clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size, min_dwell=0.5, max_dwell=20) peptide = clusters[0].blockades[0].peptide models = [] for model_file in model_files: models.append(load_model(model_file)) #svr_signal = model.peptide_signal(peptide) #mv_signal = MvBlockade().peptide_signal(peptide) for cluster in clusters: #cluster.consensus = sp.discretize(cluster.consensus, len(peptide)) signal_length = len(cluster.consensus) x_axis = np.linspace(0, len(peptide) + 1, signal_length) matplotlib.rcParams.update({"font.size": 16}) fig = plt.subplot() fig.spines["right"].set_visible(False) fig.spines["top"].set_visible(False) fig.get_xaxis().tick_bottom() fig.get_yaxis().tick_left() fig.set_xlim(0, len(peptide) + 1) fig.set_xlabel("Putative AA position") fig.set_ylabel("Normalized signal") fig.plot(x_axis, cluster.consensus, label="Empirical signal", linewidth=1.5) ################ for model in models: model_signal = model.peptide_signal(peptide) model_grid = [ i * signal_length / (len(model_signal) - 1) for i in xrange(len(model_signal)) ] interp_fun = interp1d(model_grid, model_signal, kind="linear") model_interp = interp_fun(xrange(signal_length)) corr = 1 - distance.correlation(cluster.consensus, model_interp) print("{0} correlation: {1:5.2f}\t".format(model.name, corr), file=sys.stderr) fig.plot(x_axis, model_interp, label=model.name, linewidth=2) ############## legend = fig.legend(loc="lower left", frameon=False) for label in legend.get_lines(): label.set_linewidth(2) for label in legend.get_texts(): label.set_fontsize(16) if show_text: #adding AAs text: event_mean = np.mean(cluster.consensus) acids_pos = _get_aa_positions(peptide, WINDOW, x_axis[-1]) for i, aa in enumerate(peptide): fig.text(acids_pos[i], event_mean - 2, aa, fontsize=16) plt.show()