Example #1
0
    def test_rotate_axis0_input(self):
        kws = self.default_kws.copy()
        kws['rotate'] = True
        kws['axis'] = 0
        p = mat._DendrogramPlotter(self.df_norm.T, **kws)

        npt.assert_array_equal(p.reordered_ind, self.x_norm_leaves)
Example #2
0
    def test_fastcluster_other_method(self):
        import fastcluster

        kws = self.default_kws.copy()
        kws['method'] = 'average'
        linkage = fastcluster.linkage(self.x_norm.T,
                                      method='average',
                                      metric='euclidean')
        p = mat._DendrogramPlotter(self.x_norm, **kws)
        npt.assert_array_equal(p.linkage, linkage)
Example #3
0
 def test_label_false(self):
     kws = self.default_kws.copy()
     kws['label'] = False
     p = mat._DendrogramPlotter(self.df_norm, **kws)
     assert p.xticks == []
     assert p.yticks == []
     assert p.xticklabels == []
     assert p.yticklabels == []
     assert p.xlabel == ""
     assert p.ylabel == ""
Example #4
0
    def test_rotate_input(self):
        kws = self.default_kws.copy()
        kws['rotate'] = True
        p = mat._DendrogramPlotter(self.df_norm, **kws)
        npt.assert_array_equal(p.array.T, np.asarray(self.df_norm))
        pdt.assert_frame_equal(p.data.T, self.df_norm)

        npt.assert_array_equal(p.xticklabels, [])
        npt.assert_array_equal(p.yticklabels, self.df_norm_leaves)

        assert p.xlabel == ''
        assert p.ylabel == 'letters'
Example #5
0
    def test_linkage_scipy(self):
        p = mat._DendrogramPlotter(self.x_norm, **self.default_kws)

        scipy_linkage = p._calculate_linkage_scipy()

        from scipy.spatial import distance
        from scipy.cluster import hierarchy

        dists = distance.pdist(self.x_norm.T,
                               metric=self.default_kws['metric'])
        linkage = hierarchy.linkage(dists, method=self.default_kws['method'])

        npt.assert_array_equal(scipy_linkage, linkage)
Example #6
0
    def test_df_input(self):
        p = mat._DendrogramPlotter(self.df_norm, **self.default_kws)
        npt.assert_array_equal(p.array.T, np.asarray(self.df_norm))
        pdt.assert_frame_equal(p.data.T, self.df_norm)

        npt.assert_array_equal(p.linkage, self.x_norm_linkage)
        assert p.dendrogram == self.x_norm_dendrogram

        npt.assert_array_equal(
            p.xticklabels,
            np.asarray(self.df_norm.columns)[self.x_norm_leaves])
        npt.assert_array_equal(p.yticklabels, [])

        assert p.xlabel == 'letters'
        assert p.ylabel == ''
Example #7
0
    def test_ndarray_input(self):
        p = mat._DendrogramPlotter(self.x_norm, **self.default_kws)
        npt.assert_array_equal(p.array.T, self.x_norm)
        pdt.assert_frame_equal(p.data.T, pd.DataFrame(self.x_norm))

        npt.assert_array_equal(p.linkage, self.x_norm_linkage)
        assert p.dendrogram == self.x_norm_dendrogram

        npt.assert_array_equal(p.reordered_ind, self.x_norm_leaves)

        npt.assert_array_equal(p.xticklabels, self.x_norm_leaves)
        npt.assert_array_equal(p.yticklabels, [])

        assert p.xlabel is None
        assert p.ylabel == ''
Example #8
0
    def test_axis0_input(self):
        kws = self.default_kws.copy()
        kws['axis'] = 0
        p = mat._DendrogramPlotter(self.df_norm.T, **kws)

        npt.assert_array_equal(p.array, np.asarray(self.df_norm.T))
        pdt.assert_frame_equal(p.data, self.df_norm.T)

        npt.assert_array_equal(p.linkage, self.x_norm_linkage)
        assert p.dendrogram == self.x_norm_dendrogram

        npt.assert_array_equal(p.xticklabels, self.df_norm_leaves)
        npt.assert_array_equal(p.yticklabels, [])

        assert p.xlabel == 'letters'
        assert p.ylabel == ''
Example #9
0
    def test_df_multindex_input(self):

        df = self.df_norm.copy()
        index = pd.MultiIndex.from_tuples([("A", 1), ("B", 2), ("C", 3),
                                           ("D", 4)],
                                          names=["letter", "number"])
        index.name = "letter-number"
        df.index = index
        kws = self.default_kws.copy()
        kws['label'] = True

        p = mat._DendrogramPlotter(df.T, **kws)

        xticklabels = ["A-1", "B-2", "C-3", "D-4"]
        xticklabels = [xticklabels[i] for i in p.reordered_ind]
        npt.assert_array_equal(p.xticklabels, xticklabels)
        npt.assert_array_equal(p.yticklabels, [])
        assert p.xlabel == "letter-number"
Example #10
0
    def test_custom_linkage(self):
        kws = self.default_kws.copy()

        try:
            import fastcluster

            linkage = fastcluster.linkage_vector(self.x_norm,
                                                 method='single',
                                                 metric='euclidean')
        except ImportError:
            d = distance.pdist(self.x_norm, metric='euclidean')
            linkage = hierarchy.linkage(d, method='single')
        dendrogram = hierarchy.dendrogram(linkage,
                                          no_plot=True,
                                          color_threshold=-np.inf)
        kws['linkage'] = linkage
        p = mat._DendrogramPlotter(self.df_norm, **kws)

        npt.assert_array_equal(p.linkage, linkage)
        assert p.dendrogram == dendrogram
Example #11
0
def heatmap(df, corr_types=None, map_type='zaric', ax=None, face_color=None,
            annot=None, cbar=True, cbar_kws=None, mask=None,
            row_cluster=False, row_cluster_metric='euclidean',
            row_cluster_method='average', row_cluster_linkage=None,
            col_cluster=False, col_cluster_metric='euclidean',
            col_cluster_method='average', col_cluster_linkage=None,
            **kwargs):
    """
    Plots a heatmap.

    Arguments:
        df: The dataframe to plot.
        corr_types: Optionally specify correlation type using a dataframe of
            CorrType enums for each entry (can be obtained from the corr
            function). When specified, numeric correlations are plotted using
            different markers.
        map_type: One of 'zaric', 'standard', 'dendrograms':
            * 'zaric' (default): a special heatmap, where magnitude is
                indicated by size of the elements as well as their colour.
            * 'standard': a standard heatmap plotted using sns.heatmap;
            * 'dendrograms': a heatmap with dendrograms, using sns.clustermap.
        ax: The matplotlib axis to use for the plotting (not supported for 
            map_type 'dendrograms').
        annot: Whether to also annotate the squares with numbers (defaults to
            True for map_type 'standard' and 'dendrograms'; for 'zaric'
            annotations are currently not displayed).
        cbar: Whether to include a colorbar.
        cbar_kws: Additional kwargs to use when plotting the colorbar.
        mask: An array or a dataframe that indicates whether a value should
            be masked out (True) or displayed (False).
        row_cluster: Whether to use hierarchical clustering to reorder the rows.
        row_cluster_metric: The metric to use for clustering the rows 
            (see _DendrogramPlotter in seaborn.matrix).
        row_cluster_method: The method to use for clustering the rows
            (see _DendrogramPlotter in seaborn.matrix).
        row_cluster_linkage: The linkage to use for clustering the rows
            (see _DendrogramPlotter in seaborn.matrix).
        col_cluster: Whether to use hierarchical clustering to reorder the cols.
        col_cluster_metric: The metric to use for clustering the columns 
            (see _DendrogramPlotter in seaborn.matrix).
        col_cluster_method: The method to use for clustering the columns
            (see _DendrogramPlotter in seaborn.matrix).
        col_cluster_linkage: The linkage to use for clustering the columns
            (see _DendrogramPlotter in seaborn.matrix).
        square: Whether equal aspect ratio should be used for the axes or not
            (defaults to True).
        **kwargs: Any remaining kwargs are passed to the plotting function.
    """
    if map_type == 'dendrograms':
        if not ax is None:
            raise ValueError("Argument 'ax' is not supported for map_type == 'dendrograms'.")
    else:
        if ax is None:
            ax = plt.gca()

    if not mask is None:
        mask = np.asarray(mask)

    if not corr_types is None:
        corr_types = np.asarray(corr_types)

    if row_cluster and not map_type == 'dendrograms':
        row_ind = _DendrogramPlotter(
            df, axis=0, metric=row_cluster_metric,
            method=row_cluster_method, linkage=row_cluster_linkage,
            label=False, rotate=False
        ).reordered_ind

        df = df.reindex(df.index[row_ind])

        if not mask is None:
            mask = mask[row_ind, :]
        
        if not corr_types is None:
            corr_types = corr_types[row_ind, :]

    if col_cluster and not map_type == 'dendrograms':
        col_ind = _DendrogramPlotter(
            df, axis=1, metric=col_cluster_metric,
            method=col_cluster_method, linkage=col_cluster_linkage,
            label=False, rotate=False
        ).reordered_ind
        
        df = df.reindex(df.columns[col_ind], axis=1)
        
        if not mask is None:
            mask = mask[:, col_ind]

        if not corr_types is None:
            corr_types = corr_types[:, col_ind]

    if map_type == "zaric":
        l = np.asarray(list(itertools.product(df.index, df.columns)))
        x = l[:, 0]
        y = l[:, 1]
        v = df.values.reshape(-1)
        m = mask.reshape(-1) if not mask is None else None
        circ = np.zeros(len(x))
        if not corr_types is None:
            circ[np.where(corr_types.reshape(-1) == CorrType.num_vs_num)] = True

        default_kwargs = dict(
            color=v,
            size=np.abs(v),
            circular=circ
        )
        default_kwargs.update(**kwargs)
        kwargs = default_kwargs

        _zaric_heatmap(
            x, y,
            ax=ax,
            face_color=face_color,
            cbar=cbar,
            cbar_kws=cbar_kws,
            mask=m,
            x_order=df.columns,
            y_order=df.index,
            **kwargs
        )

        ax.set_xlabel(df.columns.name)
        ax.set_ylabel(df.index.name)

    elif map_type == 'standard' or map_type == 'dendrograms':
        if annot is None:
            annot = True

        if face_color is None:
            face_color = 'black'

        default_kwargs = dict(center=0, square=True, linewidths=1,
                              annot=annot)
        default_kwargs.update(**kwargs)
        kwargs = default_kwargs

        if map_type == 'dendrograms':
            del kwargs['square']
            sns.clustermap(df, cbar=cbar, cbar_kws=cbar_kws,
                           mask=mask, **kwargs)
        else:
            sns.heatmap(df, ax=ax, cbar=cbar, cbar_kws=cbar_kws,
                        mask=mask, **kwargs)
        
        if ax is None: ax = plt.gcf().axes[2]
        ax.set_facecolor(face_color)
        ax.xaxis.set_tick_params(rotation=45)
        plt.setp(ax.get_xticklabels(),
            rotation_mode="anchor", horizontalalignment="right")
    
    else:
        raise ValueError("Unknown map_type '{}'.".format(map_type))