Ejemplo n.º 1
0
    def plot_scores(self, motifs, name=True, max_len=50):
        """Create motif scores boxplot of different clusters.
        Motifs can be specified as either motif or factor names.
        The motif scores will be scaled and plotted as z-scores.

        Parameters
        ----------
        motifs : iterable or str
            List of motif or factor names.

        name : bool, optional
            Use factor names instead of motif names for plotting.

        max_len : int, optional
            Truncate the list of factors to this maximum length.

        Returns
        -------

        g : FacetGrid
            Returns the seaborn FacetGrid object with the plot.
        """
        if self.input.shape[1] != 1:
            raise ValueError(
                "Can't make a categorical plot with real-valued data")

        if type("") == type(motifs):
            motifs = [motifs]

        plot_motifs = []
        for motif in motifs:
            if motif in self.motifs:
                plot_motifs.append(motif)
            else:
                for m in self.motifs.values():
                    if motif in m.factors:
                        plot_motifs.append(m.id)

        data = self.scores[plot_motifs]
        data[:] = data.scale(data, axix=0)
        if name:
            data = data.T
            data["factors"] = [
                join_max(self.motifs[n].factors, max_len, ",", suffix=",(...)")
                for n in plot_motifs
            ]
            data = data.set_index("factors").T

        data = pd.melt(self.input.join(data), id_vars=["cluster"])
        data.columns = ["cluster", "motif", "z-score"]
        g = sns.factorplot(data=data,
                           y="motif",
                           x="z-score",
                           hue="cluster",
                           kind="box",
                           aspect=2)
        return g
Ejemplo n.º 2
0
 def plot_scores(self, motifs, name=True, max_len=50):
     """Create motif scores boxplot of different clusters.
     Motifs can be specified as either motif or factor names.
     The motif scores will be scaled and plotted as z-scores.
     
     Parameters
     ----------
     motifs : iterable or str
         List of motif or factor names.
     
     name : bool, optional
         Use factor names instead of motif names for plotting.
     
     max_len : int, optional
         Truncate the list of factors to this maximum length.
     
     Returns
     -------
     
     g : FacetGrid
         Returns the seaborn FacetGrid object with the plot.
     """
     if self.input.shape[1] != 1:
         raise ValueError("Can't make a categorical plot with real-valued data")
     
     if type("") == type(motifs):
         motifs = [motifs]
         
     plot_motifs = []
     for motif in motifs:
         if motif in self.motifs:
             plot_motifs.append(motif)
         else:
             for m in self.motifs.values():
                 if motif in m.factors:
                     plot_motifs.append(m.id)
     
     data = self.scores[plot_motifs]
     data[:] = data.scale(data, axix=0)
     if name:
         data = data.T
         data["factors"] = [join_max(self.motifs[n].factors, max_len, ",", suffix=",(...)") for n in plot_motifs]
         data = data.set_index("factors").T
     
     data = pd.melt(self.input.join(data), id_vars=["cluster"])
     data.columns = ["cluster", "motif", "z-score"]
     g = sns.factorplot(data=data, y="motif", x="z-score", hue="cluster", kind="box", aspect=2)
     return g
Ejemplo n.º 3
0
    def plot_heatmap(self,
                     kind="final",
                     min_freq=0.01,
                     threshold=2,
                     name=True,
                     indirect=False,
                     figsize=None,
                     max_len=50,
                     aspect=1,
                     **kwargs):
        """Plot clustered heatmap of predicted motif activity.

        Parameters
        ----------
        kind : str, optional
            Which data type to use for plotting. Default is 'final', which will
            plot the result of the rang aggregation. Other options are 'freq'
            for the motif frequencies, or any of the individual activities such
            as 'rf.score'.

        min_freq : float, optional
            Minimum frequency of motif occurrence.

        threshold : float, optional
            Minimum activity (absolute) of the rank aggregation result.

        name : bool, optional
            Use factor names instead of motif names for plotting.

        indirect : bool, optional
            Include indirect factors. Default is False.

        max_len : int, optional
            Truncate the list of factors to this maximum length.

        figsize : tuple, optional
            Tuple of figure size (width, height).

        aspect : int, optional
            Aspect ratio for tweaking the plot.

        kwargs : other keyword arguments
            All other keyword arguments are passed to sns.clustermap

        Returns
        -------
        cg : ClusterGrid
            A seaborn ClusterGrid instance.
        """

        filt = np.any(np.abs(self.result) >= threshold, 1)
        if hasattr(self, "freq"):
            filt = filt & np.any(np.abs(self.freq.T) >= min_freq, 1)
        else:
            filt = filt & (self.counts.sum() / self.counts.shape[0] > min_freq)

        idx = self.result.loc[filt].index
        if idx.shape[0] >= 100:
            logger.warning("The filtered matrix has more than 100 rows.")
            logger.warning(
                "It might be worthwhile to increase the threshold for visualization"
            )

        cmap = "RdBu_r"
        if kind == "final":
            data = self.result
        elif kind == "freq":
            if hasattr(self, "freq"):
                data = self.freq.T
                cmap = "Reds"
            else:
                raise ValueError(
                    "frequency plot only works with maelstrom output from clusters"
                )
        elif kind in self.activity:
            data = self.activity[kind]
            if kind in ["hypergeom.count", "mwu.score"]:
                cmap = "Reds"
        else:
            raise ValueError("Unknown dtype")

        # print(data.head())
        # plt.figure(
        m = data.loc[idx]
        vmax = max(abs(np.percentile(m, 1)), np.percentile(m, 99))
        vmin = -vmax
        if name:
            m["factors"] = [
                join_max(
                    _get_factor_list(self.motifs[n], indirect),
                    max_len,
                    ",",
                    suffix=",(...)",
                ) for n in m.index
            ]
            m = m.set_index("factors")
        h, w = m.shape

        if figsize is None:
            figsize = (3 + m.shape[1] / 4, 1 + m.shape[0] / 3)
        fig = plt.figure(figsize=figsize)
        npixels = 30
        g = GridSpec(2,
                     1,
                     height_ratios=(fig.get_figheight() * fig.dpi - npixels,
                                    npixels))
        ax1 = fig.add_subplot(g[0, :])
        ax2 = fig.add_subplot(g[1, :])
        ax2.set_title("Significance (-log10(p-value))")
        dm = pdist(m, metric="euclidean")
        hc = linkage(dm, method="ward")
        leaves = dendrogram(hc, no_plot=True)["leaves"]
        cg = sns.heatmap(
            m.iloc[leaves],
            ax=ax1,
            cbar_ax=ax2,
            cbar_kws={"orientation": "horizontal"},
            cmap=cmap,
            linewidths=1,
            vmin=vmin,
            vmax=vmax,
        )
        plt.tight_layout()
        # cg.ax_col_dendrogram.set_visible(False)
        # plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
        return cg
Ejemplo n.º 4
0
    def plot_heatmap(self, kind="final", min_freq=0.01, threshold=2, name=True, max_len=50, aspect=1, **kwargs):
        """Plot clustered heatmap of predicted motif activity.
        
        Parameters
        ----------
        kind : str, optional
            Which data type to use for plotting. Default is 'final', which will plot the 
            result of the rang aggregation. Other options are 'freq' for the motif frequencies,
            or any of the individual activities such as 'rf.score'.
            
        min_freq : float, optional
            Minimum frequency of motif occurrence.
            
        threshold : float, optional
            Minimum activity (absolute) of the rank aggregation result. 
        
        name : bool, optional
            Use factor names instead of motif names for plotting.
        
        max_len : int, optional
            Truncate the list of factors to this maximum length.
            
        aspect : int, optional
            Aspect ratio for tweaking the plot.
            
        kwargs : other keyword arguments
            All other keyword arguments are passed to sns.clustermap

        Returns
        -------
        cg : ClusterGrid
            A seaborn ClusterGrid instance.
        """
        
        filt = np.any(np.abs(self.result) >= threshold, 1) & np.any(np.abs(self.freq.T) >= min_freq, 1)
        
        idx = self.result[filt].index
        
        cmap = "RdBu_r" 
        if kind == "final":
            data = self.result
        elif kind == "freq":
            data = self.freq.T
            cmap = "Reds"
        elif kind in self.activity:
            data = self.activity[dtype]
            if kind in ["hypergeom.count", "mwu.score"]:
                cmap = "Reds"
        else:
            raise ValueError("Unknown dtype")
        
        #print(data.head())
        #plt.figure(
        m = data.loc[idx]
        if name:
            m["factors"] = [join_max(self.motifs[n].factors, max_len, ",", suffix=",(...)") for n in m.index]
            m = m.set_index("factors")
        h,w = m.shape
        cg = sns.clustermap(m, cmap=cmap, col_cluster=False, 
                            figsize=(2 + w * 0.5 * aspect, 0.5 * h), linewidths=1,
                           **kwargs)
        cg.ax_col_dendrogram.set_visible(False)
        plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0);
        return cg
Ejemplo n.º 5
0
    def plot_heatmap(self,
                     kind="final",
                     min_freq=0.01,
                     threshold=2,
                     name=True,
                     max_len=50,
                     aspect=1,
                     **kwargs):
        """Plot clustered heatmap of predicted motif activity.
        
        Parameters
        ----------
        kind : str, optional
            Which data type to use for plotting. Default is 'final', which will plot the 
            result of the rang aggregation. Other options are 'freq' for the motif frequencies,
            or any of the individual activities such as 'rf.score'.
            
        min_freq : float, optional
            Minimum frequency of motif occurrence.
            
        threshold : float, optional
            Minimum activity (absolute) of the rank aggregation result. 
        
        name : bool, optional
            Use factor names instead of motif names for plotting.
        
        max_len : int, optional
            Truncate the list of factors to this maximum length.
            
        aspect : int, optional
            Aspect ratio for tweaking the plot.
            
        kwargs : other keyword arguments
            All other keyword arguments are passed to sns.clustermap

        Returns
        -------
        cg : ClusterGrid
            A seaborn ClusterGrid instance.
        """

        filt = np.any(np.abs(self.result) >= threshold, 1) & np.any(
            np.abs(self.freq.T) >= min_freq, 1)

        idx = self.result[filt].index

        cmap = "RdBu_r"
        if kind == "final":
            data = self.result
        elif kind == "freq":
            data = self.freq.T
            cmap = "Reds"
        elif kind in self.activity:
            data = self.activity[dtype]
            if kind in ["hypergeom.count", "mwu.score"]:
                cmap = "Reds"
        else:
            raise ValueError("Unknown dtype")

        #print(data.head())
        #plt.figure(
        m = data.loc[idx]
        if name:
            m["factors"] = [
                join_max(self.motifs[n].factors, max_len, ",", suffix=",(...)")
                for n in m.index
            ]
            m = m.set_index("factors")
        h, w = m.shape
        cg = sns.clustermap(m,
                            cmap=cmap,
                            col_cluster=False,
                            figsize=(2 + w * 0.5 * aspect, 0.5 * h),
                            linewidths=1,
                            **kwargs)
        cg.ax_col_dendrogram.set_visible(False)
        plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
        return cg