예제 #1
0
def correlation(mat_file_1, mat_file_2):
    """
    Draws the plot
    """

    blockades_1 = read_mat(mat_file_1)
    blockades_1 = sp._fractional_blockades(blockades_1)
    blockades_1 = sp._filter_by_duration(blockades_1, 0.5, 20)
    blockades_1 = map(lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20), blockades_1)

    blockades_2 = read_mat(mat_file_2)
    blockades_2 = sp._fractional_blockades(blockades_2)
    blockades_2 = sp._filter_by_duration(blockades_2, 0.5, 20)
    blockades_2 = map(lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20), blockades_2)

    self_corr = []
    cross_corr = []
    for blockade in blockades_1:
        block_self = []
        for other in blockades_1:
            block_self.append(1 - distance.correlation(blockade, other))
        block_cross = []
        for other in blockades_2:
            block_cross.append(1 - distance.correlation(blockade, other))
        self_corr.append(np.mean(block_self))
        cross_corr.append(np.mean(block_cross))

    mean_self = np.median(self_corr)
    mean_cross = np.median(cross_corr)

    matplotlib.rcParams.update({"font.size": 16})
    fig = plt.subplot()

    fig.spines["right"].set_visible(False)
    fig.spines["top"].set_visible(False)
    fig.get_xaxis().tick_bottom()
    fig.get_yaxis().tick_left()
    fig.set_xlim(-0.6, 0.6)
    fig.set_ylim(-0.6, 0.6)
    fig.set_xlabel("(H3 tail, H3 tail) correlation")
    fig.set_ylabel("(H3 tail, CCL5) correlation")

    for y in [-0.4, -0.2, 0, 0.2, 0.4]:
        plt.plot((-0.6, 0.6), (y, y), "--",
                 lw=0.5, color="black")
        plt.plot((y, y), (-0.6, 0.6), "--",
                 lw=0.5, color="black")

    plt.plot((-0.6, 0.6), (mean_cross, mean_cross), "--",
             lw=1.5, color="red")
    plt.plot((mean_self, mean_self), (-0.6, 0.6), "--",
             lw=1.5, color="red")

    fig.scatter(self_corr, cross_corr, linewidth=0.5, c="dodgerblue", 
                s=30, edgecolor="blue")

    plt.tight_layout()
    plt.show()
예제 #2
0
def get_bias(blockades_file, model_file, cluster_size):
    """
    Gets AA-specific bias between the empirical and theoretical signals
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades,
                                       cluster_size=cluster_size,
                                       min_dwell=0.5,
                                       max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    blockade_model = load_model(model_file)

    errors = defaultdict(list)
    model_signal = blockade_model.peptide_signal(peptide)
    for cluster in clusters:
        discr_signal = sp.discretize(cluster.consensus, len(peptide))

        flanked_peptide = ("-" * (WINDOW - 1) + peptide + "-" * (WINDOW - 1))
        num_peaks = len(peptide) + WINDOW - 1

        for i in xrange(0, num_peaks):
            kmer = flanked_peptide[i:i + WINDOW]
            if "-" not in kmer:
                for aa in kmer:
                    errors[aa].append(discr_signal[i] - model_signal[i])

    return errors
예제 #3
0
def flip(blockades, model_file):
    """
    Flips blockades
    """
    blockade_model = load_model(model_file)
    identifier = Identifier(blockade_model)

    peptide = blockades[0].peptide
    clusters = sp.preprocess_blockades(blockades, cluster_size=1,
                                       min_dwell=0.0, max_dwell=1000)

    print("Num\tFwd_dst\tRev_dst\t\tNeeds_flip", file=sys.stderr)

    num_reversed = 0
    new_blockades = []
    for num, cluster in enumerate(clusters):
        discr_signal = sp.discretize(cluster.consensus, len(peptide))

        fwd_dist = identifier.signal_protein_distance(discr_signal, peptide)
        rev_dist = identifier.signal_protein_distance(discr_signal,
                                                      peptide[::-1])
        print("{0}\t{1:5.2f}\t{2:5.2f}\t\t{3}"
                .format(num + 1, fwd_dist, rev_dist, fwd_dist > rev_dist),
                file=sys.stderr)

        new_blockades.append(cluster.blockades[0])
        if fwd_dist > rev_dist:
            new_blockades[-1].eventTrace = new_blockades[-1].eventTrace[::-1]
            num_reversed += 1

    print("Reversed:", num_reversed, "of", len(blockades), file=sys.stderr)
    return new_blockades
예제 #4
0
def get_bias(blockades_file, model_file, cluster_size):
    """
    Gets AA-specific bias between the empirical and theoretical signals
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size,
                                       min_dwell=0.5, max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    blockade_model = load_model(model_file)

    errors = defaultdict(list)
    model_signal = blockade_model.peptide_signal(peptide)
    for cluster in clusters:
        discr_signal = sp.discretize(cluster.consensus, len(peptide))

        flanked_peptide = ("-" * (WINDOW - 1) + peptide +
                           "-" * (WINDOW - 1))
        num_peaks = len(peptide) + WINDOW - 1

        for i in xrange(0, num_peaks):
            kmer = flanked_peptide[i : i + WINDOW]
            if "-" not in kmer:
                for aa in kmer:
                    errors[aa].append(discr_signal[i] - model_signal[i])

    return errors
예제 #5
0
def _get_peptides_signals(mat_files):
    TRAIN_AVG = 1

    peptides = []
    signals = []
    for mat in mat_files:
        blockades = read_mat(mat)
        clusters = sp.preprocess_blockades(blockades, cluster_size=TRAIN_AVG, min_dwell=0.5, max_dwell=20)
        mat_peptide = clusters[0].blockades[0].peptide
        peptides.extend([mat_peptide] * len(clusters))

        for cluster in clusters:
            signals.append(sp.discretize(cluster.consensus, len(mat_peptide)))

    return peptides, signals
예제 #6
0
def _get_peptides_signals(mat_files):
    TRAIN_AVG = 1

    peptides = []
    signals = []
    for mat in mat_files:
        blockades = read_mat(mat)
        clusters = sp.preprocess_blockades(blockades,
                                           cluster_size=TRAIN_AVG,
                                           min_dwell=0.5,
                                           max_dwell=20)
        mat_peptide = clusters[0].blockades[0].peptide
        peptides.extend([mat_peptide] * len(clusters))

        for cluster in clusters:
            signals.append(sp.discretize(cluster.consensus, len(mat_peptide)))

    return peptides, signals
예제 #7
0
    def rank_db_proteins(self, signal):
        """
        Rank database proteins wrt to the similarity to a given signal
        """
        assert self.database is not None

        distances = {}
        discretized = {}

        for prot_id, prot_seq in self.database.items():
            if len(prot_seq) not in discretized:
                discretized[len(prot_seq)] = sp.discretize(signal, len(prot_seq))

            distance = self.signal_protein_distance(discretized[len(prot_seq)],
                                                    prot_seq)
            distances[prot_id] = distance

        return sorted(distances.items(), key=lambda i: i[1])
예제 #8
0
def correlation(mat_file_1, mat_file_2):
    """
    Draws the plot
    """

    blockades_1 = read_mat(mat_file_1)
    blockades_1 = sp._fractional_blockades(blockades_1)
    blockades_1 = sp._filter_by_duration(blockades_1, 0.5, 20)
    blockades_1 = map(
        lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20),
        blockades_1)

    blockades_2 = read_mat(mat_file_2)
    blockades_2 = sp._fractional_blockades(blockades_2)
    blockades_2 = sp._filter_by_duration(blockades_2, 0.5, 20)
    blockades_2 = map(
        lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20),
        blockades_2)

    self_corr = []
    cross_corr = []
    for blockade in blockades_1:
        block_self = []
        for other in blockades_1:
            block_self.append(1 - distance.correlation(blockade, other))
        block_cross = []
        for other in blockades_2:
            block_cross.append(1 - distance.correlation(blockade, other))
        self_corr.append(np.mean(block_self))
        cross_corr.append(np.mean(block_cross))

    mean_self = np.median(self_corr)
    mean_cross = np.median(cross_corr)

    matplotlib.rcParams.update({"font.size": 16})
    fig = plt.subplot()

    fig.spines["right"].set_visible(False)
    fig.spines["top"].set_visible(False)
    fig.get_xaxis().tick_bottom()
    fig.get_yaxis().tick_left()
    fig.set_xlim(-0.6, 0.6)
    fig.set_ylim(-0.6, 0.6)
    fig.set_xlabel("(H3 tail, H3 tail) correlation")
    fig.set_ylabel("(H3 tail, CCL5) correlation")

    for y in [-0.4, -0.2, 0, 0.2, 0.4]:
        plt.plot((-0.6, 0.6), (y, y), "--", lw=0.5, color="black")
        plt.plot((y, y), (-0.6, 0.6), "--", lw=0.5, color="black")

    plt.plot((-0.6, 0.6), (mean_cross, mean_cross), "--", lw=1.5, color="red")
    plt.plot((mean_self, mean_self), (-0.6, 0.6), "--", lw=1.5, color="red")

    fig.scatter(self_corr,
                cross_corr,
                linewidth=0.5,
                c="dodgerblue",
                s=30,
                edgecolor="blue")

    plt.tight_layout()
    plt.show()