Ejemplo n.º 1
0
def correlation(mat_file_1, mat_file_2):
    """
    Draws the plot
    """

    blockades_1 = read_mat(mat_file_1)
    blockades_1 = sp._fractional_blockades(blockades_1)
    blockades_1 = sp._filter_by_duration(blockades_1, 0.5, 20)
    blockades_1 = map(lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20), blockades_1)

    blockades_2 = read_mat(mat_file_2)
    blockades_2 = sp._fractional_blockades(blockades_2)
    blockades_2 = sp._filter_by_duration(blockades_2, 0.5, 20)
    blockades_2 = map(lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20), blockades_2)

    self_corr = []
    cross_corr = []
    for blockade in blockades_1:
        block_self = []
        for other in blockades_1:
            block_self.append(1 - distance.correlation(blockade, other))
        block_cross = []
        for other in blockades_2:
            block_cross.append(1 - distance.correlation(blockade, other))
        self_corr.append(np.mean(block_self))
        cross_corr.append(np.mean(block_cross))

    mean_self = np.median(self_corr)
    mean_cross = np.median(cross_corr)

    matplotlib.rcParams.update({"font.size": 16})
    fig = plt.subplot()

    fig.spines["right"].set_visible(False)
    fig.spines["top"].set_visible(False)
    fig.get_xaxis().tick_bottom()
    fig.get_yaxis().tick_left()
    fig.set_xlim(-0.6, 0.6)
    fig.set_ylim(-0.6, 0.6)
    fig.set_xlabel("(H3 tail, H3 tail) correlation")
    fig.set_ylabel("(H3 tail, CCL5) correlation")

    for y in [-0.4, -0.2, 0, 0.2, 0.4]:
        plt.plot((-0.6, 0.6), (y, y), "--",
                 lw=0.5, color="black")
        plt.plot((y, y), (-0.6, 0.6), "--",
                 lw=0.5, color="black")

    plt.plot((-0.6, 0.6), (mean_cross, mean_cross), "--",
             lw=1.5, color="red")
    plt.plot((mean_self, mean_self), (-0.6, 0.6), "--",
             lw=1.5, color="red")

    fig.scatter(self_corr, cross_corr, linewidth=0.5, c="dodgerblue", 
                s=30, edgecolor="blue")

    plt.tight_layout()
    plt.show()
Ejemplo n.º 2
0
def frequency_distribution(blockades_file, detailed):
    """
    Plots the frequency distribution
    """
    blockades = read_mat(blockades_file)
    blockades = sp._fractional_blockades(blockades)
    blockades = sp._filter_by_duration(blockades, 0.5, 20)

    peaks_count = {}
    for blockade in blockades:
        if detailed:
            detailed_plots(blockade)

        signal = blockade.eventTrace[1000:-1000]
        xx, yy = sp.find_peaks(signal)
        peaks_count[blockade] = len(xx) / blockade.ms_Dwell * 5 / 4

    mean = np.mean(peaks_count.values())
    errors = map(lambda e: peaks_count[e] - mean, blockades)
    lengths = map(lambda e: e.ms_Dwell, blockades)

    f, (s1, s2) = plt.subplots(2)
    s1.scatter(lengths, errors)
    s2.hist(peaks_count.values(), bins=100)
    plt.show()
Ejemplo n.º 3
0
def get_bias(blockades_file, model_file, cluster_size):
    """
    Gets AA-specific bias between the empirical and theoretical signals
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size,
                                       min_dwell=0.5, max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    blockade_model = load_model(model_file)

    errors = defaultdict(list)
    model_signal = blockade_model.peptide_signal(peptide)
    for cluster in clusters:
        discr_signal = sp.discretize(cluster.consensus, len(peptide))

        flanked_peptide = ("-" * (WINDOW - 1) + peptide +
                           "-" * (WINDOW - 1))
        num_peaks = len(peptide) + WINDOW - 1

        for i in xrange(0, num_peaks):
            kmer = flanked_peptide[i : i + WINDOW]
            if "-" not in kmer:
                for aa in kmer:
                    errors[aa].append(discr_signal[i] - model_signal[i])

    return errors
Ejemplo n.º 4
0
def get_bias(blockades_file, model_file, cluster_size):
    """
    Gets AA-specific bias between the empirical and theoretical signals
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades,
                                       cluster_size=cluster_size,
                                       min_dwell=0.5,
                                       max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    blockade_model = load_model(model_file)

    errors = defaultdict(list)
    model_signal = blockade_model.peptide_signal(peptide)
    for cluster in clusters:
        discr_signal = sp.discretize(cluster.consensus, len(peptide))

        flanked_peptide = ("-" * (WINDOW - 1) + peptide + "-" * (WINDOW - 1))
        num_peaks = len(peptide) + WINDOW - 1

        for i in xrange(0, num_peaks):
            kmer = flanked_peptide[i:i + WINDOW]
            if "-" not in kmer:
                for aa in kmer:
                    errors[aa].append(discr_signal[i] - model_signal[i])

    return errors
Ejemplo n.º 5
0
def frequency_distribution(blockades_file, detailed):
    """
    Plots the frequency distribution
    """
    blockades = read_mat(blockades_file)
    blockades = sp._fractional_blockades(blockades)
    blockades = sp._filter_by_duration(blockades, 0.5, 20)

    peaks_count = {}
    for blockade in blockades:
        if detailed:
            detailed_plots(blockade)

        signal = blockade.eventTrace[1000:-1000]
        xx, yy = sp.find_peaks(signal)
        peaks_count[blockade] = len(xx) / blockade.ms_Dwell * 5 / 4

    mean = np.mean(peaks_count.values())
    errors = map(lambda e: peaks_count[e] - mean, blockades)
    lengths = map(lambda e: e.ms_Dwell, blockades)

    f, (s1, s2) = plt.subplots(2)
    s1.scatter(lengths, errors)
    s2.hist(peaks_count.values(), bins=100)
    plt.show()
Ejemplo n.º 6
0
def pvalues_test(blockades_file, cluster_size, blockade_model, db_file,
                 single_blockades, ostream):
    """
    Performs protein identification and report results
    """
    RANDOM_DB_SIZE = 10000
    identifier = Identifier(blockade_model)

    blockades = read_mat(blockades_file)
    true_peptide = blockades[0].peptide
    if db_file is None:
        identifier.random_database(true_peptide, RANDOM_DB_SIZE)
        target_id = "target"
        db_len = RANDOM_DB_SIZE
    else:
        database, target_id = _make_database(db_file, true_peptide)
        identifier.set_database(database)
        db_len = len(database)

    clusters = sp.preprocess_blockades(blockades,
                                       cluster_size=cluster_size,
                                       min_dwell=0.5,
                                       max_dwell=20)

    ostream.write("\nNo\tSize\tBest_id\t\tBest_dst\tTrg_dst\t\tTrg_rank\t"
                  "Trg_pval\n")
    p_values = []
    ranks = []
    for num, cluster in enumerate(clusters):
        db_ranking = identifier.rank_db_proteins(cluster.consensus)

        target_rank = None
        target_dist = None
        for rank, (prot_id, prot_dist) in enumerate(db_ranking):
            if prot_id == target_id:
                target_rank = rank
                target_dist = prot_dist
        p_value = float(target_rank) / db_len

        p_values.append(p_value)
        ranks.append(target_rank)

        ostream.write(
            "{0}\t{1}\t{2:10}\t{3:5.2f}\t\t{4:5.2f}\t\t{5}\t\t{6:6.4}\n".
            format(num + 1, len(cluster.blockades), db_ranking[0][0],
                   db_ranking[0][1], target_dist, target_rank + 1, p_value))
        if single_blockades:
            _detalize_cluster(identifier, cluster, db_ranking[0][0], target_id,
                              ostream)

    ostream.write("\nMedian p-value: {0:7.4f}\n".format(np.median(p_values)))
    ostream.write("Median target rank: {0:d}\n".format(int(np.median(ranks))))

    return np.median(p_values), int(np.median(ranks))
Ejemplo n.º 7
0
def main():
    if len(sys.argv) < 3:
        print("usage: merge-mat.py mat_1[,mat_2..] out_mat\n\n"
              "Merge multiple files with blockades into one")
        return 1

    blockades = []
    for mat_file in sys.argv[1:-1]:
        blockades.extend(read_mat(mat_file))

    write_mat(blockades, sys.argv[-1])
    return 0
Ejemplo n.º 8
0
def main():
    if len(sys.argv) != 3:
        print("usage: protein-label.py mat_file prot_sequence\n\n"
              "Add protein sequence record into the mat file "
              "with blockades", file=sys.stderr)
        return 1

    blockades = read_mat(sys.argv[1])
    for blockade in blockades:
        blockade.peptide = sys.argv[2]
    write_mat(blockades, sys.argv[1])
    return 0
Ejemplo n.º 9
0
def main():
    if len(sys.argv) < 3:
        print("usage: merge-mat.py mat_1[,mat_2..] out_mat\n\n"
              "Merge multiple files with blockades into one")
        return 1

    blockades = []
    for mat_file in sys.argv[1:-1]:
        blockades.extend(read_mat(mat_file))

    write_mat(blockades, sys.argv[-1])
    return 0
Ejemplo n.º 10
0
def pvalues_test(blockades_file, cluster_size, blockade_model, db_file,
                 single_blockades, ostream):
    """
    Performs protein identification and report results
    """
    RANDOM_DB_SIZE = 10000
    identifier = Identifier(blockade_model)

    blockades = read_mat(blockades_file)
    true_peptide = blockades[0].peptide
    if db_file is None:
        identifier.random_database(true_peptide, RANDOM_DB_SIZE)
        target_id = "target"
        db_len = RANDOM_DB_SIZE
    else:
        database, target_id = _make_database(db_file, true_peptide)
        identifier.set_database(database)
        db_len = len(database)

    clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size,
                                       min_dwell=0.5, max_dwell=20)

    ostream.write("\nNo\tSize\tBest_id\t\tBest_dst\tTrg_dst\t\tTrg_rank\t"
                     "Trg_pval\n")
    p_values = []
    ranks = []
    for num, cluster in enumerate(clusters):
        db_ranking = identifier.rank_db_proteins(cluster.consensus)

        target_rank = None
        target_dist = None
        for rank, (prot_id, prot_dist) in enumerate(db_ranking):
            if prot_id == target_id:
                target_rank = rank
                target_dist = prot_dist
        p_value = float(target_rank) / db_len

        p_values.append(p_value)
        ranks.append(target_rank)

        ostream.write("{0}\t{1}\t{2:10}\t{3:5.2f}\t\t{4:5.2f}\t\t{5}\t\t{6:6.4}\n"
               .format(num + 1, len(cluster.blockades), db_ranking[0][0],
                       db_ranking[0][1], target_dist, target_rank + 1, p_value))
        if single_blockades:
            _detalize_cluster(identifier, cluster, db_ranking[0][0],
                              target_id, ostream)

    ostream.write("\nMedian p-value: {0:7.4f}\n".format(np.median(p_values)))
    ostream.write("Median target rank: {0:d}\n".format(int(np.median(ranks))))

    return np.median(p_values), int(np.median(ranks))
Ejemplo n.º 11
0
def _get_peptides_signals(mat_files):
    TRAIN_AVG = 1

    peptides = []
    signals = []
    for mat in mat_files:
        blockades = read_mat(mat)
        clusters = sp.preprocess_blockades(blockades, cluster_size=TRAIN_AVG, min_dwell=0.5, max_dwell=20)
        mat_peptide = clusters[0].blockades[0].peptide
        peptides.extend([mat_peptide] * len(clusters))

        for cluster in clusters:
            signals.append(sp.discretize(cluster.consensus, len(mat_peptide)))

    return peptides, signals
Ejemplo n.º 12
0
def main():
    if len(sys.argv) != 4:
        print("usage: flip-blockades.py blockades_in model_file flipped_out\n\n"
              "Orients blockade signals according to the AA order "
              "in the protein of origin")
        return 1

    blockades_in = sys.argv[1]
    blockades_out = sys.argv[3]
    svr_file = sys.argv[2]

    blockades = read_mat(blockades_in)
    rev_blockades = flip(blockades, svr_file)
    write_mat(rev_blockades, blockades_out)

    return 0
Ejemplo n.º 13
0
def frequency_plot(blockade_files):
    """
    Draws the plot
    """
    datasets_names = []
    frequencies = []
    for file in blockade_files:
        blockades = read_mat(file)
        blockades = sp._fractional_blockades(blockades)
        blockades = sp._filter_by_duration(blockades, 0.5, 20)

        dataset_freqs = []
        for blockade in blockades:
            xx, yy = sp.find_peaks(blockade.eventTrace[1000:-1000])
            dataset_freqs.append(len(xx) / blockade.ms_Dwell * 5 / 4)

        frequencies.append(dataset_freqs)
        datasets_names.append(os.path.basename(file).split(".")[0])

    x_axis = np.arange(min(sum(frequencies, [])) - 10, max(sum(frequencies, [])) + 10, 0.1)
    matplotlib.rcParams.update({"font.size": 16})
    fig = plt.subplot()

    colors = ["blue", "green", "red", "cyan"]
    for distr, name, color in zip(frequencies, datasets_names, colors):
        density = gaussian_kde(distr)
        density.covariance_factor = lambda: 0.25
        density._compute_covariance
        gauss_dens = density(x_axis)

        fig.spines["right"].set_visible(False)
        fig.spines["top"].set_visible(False)
        fig.get_xaxis().tick_bottom()
        fig.get_yaxis().tick_left()
        fig.set_ylim(0, 0.16)

        fig.plot(x_axis, gauss_dens, antialiased=True, linewidth=2, color=color, alpha=0.7, label=name)
        fig.fill_between(x_axis, gauss_dens, alpha=0.5, antialiased=True, color=color)

    fig.set_xlabel("Fluctuation frequency, 1/ms")
    legend = fig.legend(loc="upper left", frameon=False)
    for label in legend.get_lines():
        label.set_linewidth(3)
    for label in legend.get_texts():
        label.set_fontsize(16)
    plt.show()
Ejemplo n.º 14
0
def _get_peptides_signals(mat_files):
    TRAIN_AVG = 1

    peptides = []
    signals = []
    for mat in mat_files:
        blockades = read_mat(mat)
        clusters = sp.preprocess_blockades(blockades,
                                           cluster_size=TRAIN_AVG,
                                           min_dwell=0.5,
                                           max_dwell=20)
        mat_peptide = clusters[0].blockades[0].peptide
        peptides.extend([mat_peptide] * len(clusters))

        for cluster in clusters:
            signals.append(sp.discretize(cluster.consensus, len(mat_peptide)))

    return peptides, signals
Ejemplo n.º 15
0
def plot_blockades(blockades_file, model_files,
                   cluster_size, show_text):
    """
    Pretty plotting
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size,
                                       min_dwell=0.5, max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    models = []
    for model_file in model_files:
        models.append(load_model(model_file))
    #svr_signal = model.peptide_signal(peptide)
    #mv_signal = MvBlockade().peptide_signal(peptide)

    for cluster in clusters:
        #cluster.consensus = sp.discretize(cluster.consensus, len(peptide))
        signal_length = len(cluster.consensus)

        x_axis = np.linspace(0, len(peptide) + 1, signal_length)
        matplotlib.rcParams.update({"font.size": 16})
        fig = plt.subplot()

        fig.spines["right"].set_visible(False)
        fig.spines["top"].set_visible(False)
        fig.get_xaxis().tick_bottom()
        fig.get_yaxis().tick_left()
        fig.set_xlim(0, len(peptide) + 1)
        fig.set_xlabel("Putative AA position")
        fig.set_ylabel("Normalized signal")

        fig.plot(x_axis, cluster.consensus, label="Empirical signal", linewidth=1.5)

        ################
        for model in models:
            model_signal = model.peptide_signal(peptide)
            model_grid = [i * signal_length / (len(model_signal) - 1)
                          for i in xrange(len(model_signal))]

            interp_fun = interp1d(model_grid, model_signal, kind="linear")
            model_interp = interp_fun(xrange(signal_length))

            corr = 1 - distance.correlation(cluster.consensus, model_interp)
            print("{0} correlation: {1:5.2f}\t".format(model.name, corr),
                  file=sys.stderr)
            fig.plot(x_axis, model_interp, label=model.name, linewidth=2)
        ##############

        legend = fig.legend(loc="lower left", frameon=False)
        for label in legend.get_lines():
            label.set_linewidth(2)
        for label in legend.get_texts():
            label.set_fontsize(16)

        if show_text:
            #adding AAs text:
            event_mean = np.mean(cluster.consensus)
            acids_pos = _get_aa_positions(peptide, WINDOW, x_axis[-1])
            for i, aa in enumerate(peptide):
                fig.text(acids_pos[i], event_mean - 2, aa, fontsize=16)

        plt.show()
Ejemplo n.º 16
0
def plot_blockades(blockades_file, model_files, cluster_size, show_text):
    """
    Pretty plotting
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades,
                                       cluster_size=cluster_size,
                                       min_dwell=0.5,
                                       max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    models = []
    for model_file in model_files:
        models.append(load_model(model_file))
    #svr_signal = model.peptide_signal(peptide)
    #mv_signal = MvBlockade().peptide_signal(peptide)

    for cluster in clusters:
        #cluster.consensus = sp.discretize(cluster.consensus, len(peptide))
        signal_length = len(cluster.consensus)

        x_axis = np.linspace(0, len(peptide) + 1, signal_length)
        matplotlib.rcParams.update({"font.size": 16})
        fig = plt.subplot()

        fig.spines["right"].set_visible(False)
        fig.spines["top"].set_visible(False)
        fig.get_xaxis().tick_bottom()
        fig.get_yaxis().tick_left()
        fig.set_xlim(0, len(peptide) + 1)
        fig.set_xlabel("Putative AA position")
        fig.set_ylabel("Normalized signal")

        fig.plot(x_axis,
                 cluster.consensus,
                 label="Empirical signal",
                 linewidth=1.5)

        ################
        for model in models:
            model_signal = model.peptide_signal(peptide)
            model_grid = [
                i * signal_length / (len(model_signal) - 1)
                for i in xrange(len(model_signal))
            ]

            interp_fun = interp1d(model_grid, model_signal, kind="linear")
            model_interp = interp_fun(xrange(signal_length))

            corr = 1 - distance.correlation(cluster.consensus, model_interp)
            print("{0} correlation: {1:5.2f}\t".format(model.name, corr),
                  file=sys.stderr)
            fig.plot(x_axis, model_interp, label=model.name, linewidth=2)
        ##############

        legend = fig.legend(loc="lower left", frameon=False)
        for label in legend.get_lines():
            label.set_linewidth(2)
        for label in legend.get_texts():
            label.set_fontsize(16)

        if show_text:
            #adding AAs text:
            event_mean = np.mean(cluster.consensus)
            acids_pos = _get_aa_positions(peptide, WINDOW, x_axis[-1])
            for i, aa in enumerate(peptide):
                fig.text(acids_pos[i], event_mean - 2, aa, fontsize=16)

        plt.show()
Ejemplo n.º 17
0
def correlation(mat_file_1, mat_file_2):
    """
    Draws the plot
    """

    blockades_1 = read_mat(mat_file_1)
    blockades_1 = sp._fractional_blockades(blockades_1)
    blockades_1 = sp._filter_by_duration(blockades_1, 0.5, 20)
    blockades_1 = map(
        lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20),
        blockades_1)

    blockades_2 = read_mat(mat_file_2)
    blockades_2 = sp._fractional_blockades(blockades_2)
    blockades_2 = sp._filter_by_duration(blockades_2, 0.5, 20)
    blockades_2 = map(
        lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20),
        blockades_2)

    self_corr = []
    cross_corr = []
    for blockade in blockades_1:
        block_self = []
        for other in blockades_1:
            block_self.append(1 - distance.correlation(blockade, other))
        block_cross = []
        for other in blockades_2:
            block_cross.append(1 - distance.correlation(blockade, other))
        self_corr.append(np.mean(block_self))
        cross_corr.append(np.mean(block_cross))

    mean_self = np.median(self_corr)
    mean_cross = np.median(cross_corr)

    matplotlib.rcParams.update({"font.size": 16})
    fig = plt.subplot()

    fig.spines["right"].set_visible(False)
    fig.spines["top"].set_visible(False)
    fig.get_xaxis().tick_bottom()
    fig.get_yaxis().tick_left()
    fig.set_xlim(-0.6, 0.6)
    fig.set_ylim(-0.6, 0.6)
    fig.set_xlabel("(H3 tail, H3 tail) correlation")
    fig.set_ylabel("(H3 tail, CCL5) correlation")

    for y in [-0.4, -0.2, 0, 0.2, 0.4]:
        plt.plot((-0.6, 0.6), (y, y), "--", lw=0.5, color="black")
        plt.plot((y, y), (-0.6, 0.6), "--", lw=0.5, color="black")

    plt.plot((-0.6, 0.6), (mean_cross, mean_cross), "--", lw=1.5, color="red")
    plt.plot((mean_self, mean_self), (-0.6, 0.6), "--", lw=1.5, color="red")

    fig.scatter(self_corr,
                cross_corr,
                linewidth=0.5,
                c="dodgerblue",
                s=30,
                edgecolor="blue")

    plt.tight_layout()
    plt.show()