예제 #1
0
 def __init__(self, image_height, image_width, show_display=True):
     self.image_height = image_height
     self.image_width = image_width
     self.show_display = show_display
     self.heatmaps = []
     self.object_history = []
     self.max_n_heatmaps = 2
     self.heatmap_threshold = 75
     self.heat_drawer = Drawer(bbox_settings=BBoxSettings(
         color=DynamicColor(cmap=cmap_builder('black', 'red', 'yellow'),
                            value_range=[0, 255],
                            colorbar=Colorbar(ticks=np.array([0, 255]),
                                              pos=np.array([0.03, 0.96]),
                                              size=np.array([0.3, 0.01])))),
                               inplace=False)
     self.cluster_drawer = Drawer(
         bbox_settings=BBoxSettings(color=DynamicColor(
             cmap=cmap_builder('yellow', 'lime (w3c)', 'cyan'),
             value_range=[0, 20],
             colorbar=Colorbar(ticks=np.array([0, 10, 20]),
                               pos=np.array([0.03, 0.90]),
                               size=np.array([0.3, 0.01])))),
         inplace=True)
     if self.show_display:
         self._init_heatmap_display()
예제 #2
0
    def get_html_table(self, collapse_table=False, clip_threshold=2,
            index=False, header=True, escape=False):
        cmap_clip = cmap_builder('#ffffff', '#0070FF')
        cmap_absmax = cmap_builder('green', 'white', 'red')

        columns = ANOVAResults().colnames_subset

        # The copy is used because we'll change it afterwards
        df = self.df[self.colnames_subset].copy()

        colname = 'ANOVA_FEATURE_FDR'

        df.loc[self.df[colname] < 0.01, colname] = '<0.01'

        html = HTMLTable(self.df, 'notused')
        # Those columns should be links
        for this in ['FEATURE', 'DRUG_ID', 'ASSOC_ID']:
            html.add_href(this)

        for this in ['FEATURE_IC50_effect_size', 'FEATURE_neg_Glass_delta',
                'FEATURE_pos_Glass_delta']:
            html.add_bgcolor(this, cmap_clip, mode='clip',
                    threshold=clip_threshold)

        # normalise data and annotate with color
        html.add_bgcolor('FEATURE_delta_MEAN_IC50', cmap_absmax,
            mode='absmax')

        html.df.columns = [x.replace("_", " ") for x in html.df.columns]
        return html.to_html(escape=escape, header=header, index=index,
                collapse_table=collapse_table, justify='center')
예제 #3
0
    def get_html_table(self,
                       collapse_table=False,
                       clip_threshold=2,
                       index=False,
                       header=True,
                       escape=False,
                       add_href=True):
        """Return an HTML table for the reports


        :param add_href: add href to the FEATURE, DRUG ID and ASSOC ID

        """
        cmap_clip = cmap_builder('#ffffff', '#0070FF')
        cmap_absmax = cmap_builder('green', 'white', 'red')

        # The copy is used because we'll change it afterwards
        df = self.df[self.colnames_subset].copy()

        colname = 'ANOVA_FEATURE_FDR'

        df.loc[df[colname] < 0.01, colname] = '<0.01'
        # In the assoc column, we remove the first "a" letter so that
        # the column is properly sorted by Id but the link should be with the
        # "a" as prefix
        df.ASSOC_ID = df.ASSOC_ID.apply(lambda x: int(str(x).replace("a", "")))

        html = HTMLTable(df, 'notused')
        # Those columns should be links
        if add_href:
            html.add_href("FEATURE")
            html.add_href("ASSOC_ID", url="a",
                          suffix=".html")  # here url works like a prefix
            html.add_href("DRUG_ID", url="drug_",
                          suffix=".html")  # here url works like a prefix

        for this in [
                'FEATURE_IC50_effect_size', 'FEATURE_neg_Glass_delta',
                'FEATURE_pos_Glass_delta'
        ]:
            html.add_bgcolor(this,
                             cmap_clip,
                             mode='clip',
                             threshold=clip_threshold)

        # normalise data and annotate with color
        html.add_bgcolor('FEATURE_delta_MEAN_IC50', cmap_absmax, mode='absmax')

        html.df.columns = [x.replace("_", " ") for x in html.df.columns]
        return html.to_html(escape=escape,
                            header=header,
                            index=index,
                            collapse_table=collapse_table,
                            justify='center')
예제 #4
0
    def __init__(self,
                 grid_generator,
                 force_train=False,
                 use_cache=True,
                 show_display=True):
        self.grid_generator = grid_generator
        self.use_cache = use_cache
        self.show_display = show_display
        if self._classifier_available() and not force_train:
            self._load_classifier()
        else:
            self._train_classifier()
            self._store_classifier()

        if self.use_cache:
            self.cache = ClassifierCache(classifier_path=classifier_path)

        self.low_threshold = cache_threshold
        self.medium_threshold = cluster_threshold
        self.high_threshold = tracking_threshold

        self.drawer = Drawer(bbox_settings=BBoxSettings(color=DynamicColor(
            cmap=cmap_builder('yellow', 'lime (w3c)', 'cyan'),
            value_range=[0.5, 1.0],
            colorbar=Colorbar(ticks=np.array([0.5, 0.75, 1.0]),
                              pos=np.array([0.03, 0.97]),
                              size=np.array([0.3, 0.01])))),
                             inplace=False)

        if self.show_display:
            self._init_display()
예제 #5
0
    def plot_confusion(self, species=None, cmap=None, tight_layout=False):
        if cmap is None:
            import colormap
            cmap = colormap.cmap_builder('white',
                    'blue','darkblue')
        from biokit import imshow
        if species is not None:
            imshow(self.confusion[species], cmap=cmap)
            pylab.title(species,fontsize=20)
            if tight_layout is True:
                pylab.tight_layout()

        else:
            imshow(self.confusion_all, cmap=cmap)
예제 #6
0
    def __init__(self, show_display=True):
        self.tracked_objects = []
        self.show_display = show_display
        self._init_params()
        self.raw_tracks_drawer = Drawer(
            bbox_settings=BBoxSettings(color=DynamicColor(
                cmap=cmap_builder('yellow', 'lime (w3c)', 'cyan'),
                value_range=[0, 65],
                colorbar=Colorbar(ticks=np.array([0, 30, 60]),
                                  pos=np.array([0.03, 0.96]),
                                  size=np.array([0.3, 0.01])))),
            inplace=True)
        self.match_drawer = Drawer(bbox_settings=BBoxSettings(
            color=StaticColor((0, 0, 0)), border_thickness=2),
                                   inplace=True)
        if self.show_display:
            self._init_display()

        self.first_frame = True
예제 #7
0
 def _set_default_cmap(self):
     self.cm = cmap_builder('#AA0000', 'white', 'darkblue')
예제 #8
0
    def plot(self, fig=None, grid=True,
             rotation=30, lower=None, upper=None,
             shrink=0.9, axisbg='white', colorbar=True, label_color='black',
             fontsize='small', edgecolor='black', method='ellipse',
             order_method='complete', order_metric='euclidean', cmap=None,
             ax=None, binarise_color=False, figsize=None):
        """
        plot the correlation matrix from the content of :attr:`df`
        (dataframe)

        By default, the correlation is shown on the upper and lower triangle and is
        symmetric wrt to the diagonal. The symbols are ellipses. The symbols can
        be changed to e.g. rectangle. The symbols are shown on upper and lower sides but
        you could choose a symbol for the upper side and another for the lower side using
        the **lower** and **upper** parameters.

        :param fig: Create a new figure by default. If an instance of an existing
            figure is provided, the corrplot is overlayed on the figure provided.
            Can also be the number of the figure.
        :param grid: add grid (Defaults to grey color). You can set it to False or a color.
        :param rotation: rotate labels on y-axis
        :param lower: if set to a valid method, plots the data on the lower
            left triangle
        :param upper: if set to a valid method, plots the data on the upper
            left triangle
        :param float shrink: maximum space used (in percent) by a symbol.
            If negative values are provided, the absolute value is taken.
            If greater than 1, the symbols wiill overlap.
        :param axisbg: color of the background (defaults to white).
        :param colorbar: add the colorbar (defaults to True).
        :param str label_color: (defaults to black).
        :param fontsize: size of the fonts defaults to 'small'.
        :param method: shape to be used in 'ellipse', 'square', 'rectangle',
            'color', 'text', 'circle',  'number', 'pie'.

        :param order_method: see :meth:`order`.
        :param order_metric: see : meth:`order`.
        :param cmap: a valid cmap from matplotlib or colormap package (e.g.,
            'jet', or 'copper'). Default is red/white/blue colors.
        :param ax: a matplotlib axes.
        :param figsize: gives that parameter to the new created figure
        :return: ax (matplotlib axes)

        The colorbar can be tuned with the parameters stored in :attr:`params`.

        Here is an example. See notebook for other examples::

            c = corrplot.Corrplot(dataframe)
            c.plot(cmap=('Orange', 'white', 'green'))
            c.plot(method='circle')
            c.plot(colorbar=False, shrink=.8, upper='circle'  )

        """
        # default
        if cmap is not None:
            try:
                if isinstance(cmap, str):
                    self.cm = cmap_builder(cmap)
                else:
                    self.cm = cmap_builder(*cmap)
            except:
                print("incorrect cmap. Use default one")
                self._set_default_cmap()
        else:
            self._set_default_cmap()

        self.shrink = abs(shrink)
        self.fontsize = fontsize
        self.edgecolor = edgecolor

        df = self.order(method=order_method, metric=order_metric)

        # figure can be a number or an instance; otherwise creates it
        params = dict(facecolor=axisbg)
        if isinstance(fig, int):
            params["num"] = fig.number
        elif fig is not None:
            params["num"] = fig.number
        else:
            params["num"] = None
        if figsize is not None:
            params["figsize"] = figsize
        fig = plt.figure(**params)

        # do we have an axes to plot the data in ?
        if ax is None:
            ax = plt.subplot(1, 1, 1, aspect='equal', axisbg=axisbg)
        else:
            # if so, clear the axes. Colorbar cannot be removed easily.
            plt.sca(ax)
            ax.clear()

        # subplot resets the bg color, let us set it again
        fig.set_facecolor(axisbg)

        width, height = df.shape
        labels = (df.columns)

        # add all patches to the figure
        # TODO check value of lower and upper

        if upper is None and lower is None:
            mode = 'method'
        elif upper and lower:
            mode = 'both'
        elif lower is not None:
            mode = 'lower'
        elif upper is not None:
            mode = 'upper'

        self.binarise_color = binarise_color
        if mode == 'upper':
            self._add_patches(df, upper, 'upper',  ax, diagonal=True)
        elif mode == 'lower':
            self._add_patches(df, lower, 'lower',  ax, diagonal=True)
        elif mode == 'method':
            self._add_patches(df, method, 'both',  ax, diagonal=True)
        elif mode == 'both':
            self._add_patches(df, upper, 'upper',  ax, diagonal=False)
            self._add_patches(df, lower, 'lower',  ax, diagonal=False)

        # shift the limits to englobe the patches correctly
        ax.set_xlim(-0.5, width - .5)
        ax.set_ylim(-0.5, height - .5)

        # set xticks/xlabels on top
        ax.xaxis.tick_top()
        xtickslocs = np.arange(len(labels))
        ax.set_xticks(xtickslocs)
        ax.set_xticklabels(labels, rotation=rotation, color=label_color,
                           fontsize=fontsize, ha='left')

        ax.invert_yaxis()
        ytickslocs = np.arange(len(labels))
        ax.set_yticks(ytickslocs)
        ax.set_yticklabels(labels, fontsize=fontsize, color=label_color)
        plt.tight_layout()

        if grid is not False:
            if grid is True:
                grid = 'grey'
            for i in range(0, width):
                ratio1 = float(i) / width
                ratio2 = float(i + 2) / width
                # TODO 1- set axis off
                # 2 - set xlabels along the diagonal
                # set colorbar either on left or bottom
                if mode == 'lower':
                    plt.axvline(i + .5, ymin=1 - ratio1, ymax=0., color=grid)
                    plt.axhline(i + .5, xmin=0, xmax=ratio2, color=grid)
                if mode == 'upper':
                    plt.axvline(i + .5, ymin=1 - ratio2, ymax=1, color=grid)
                    plt.axhline(i + .5, xmin=ratio1, xmax=1, color=grid)
                if mode in ['method', 'both']:
                    plt.axvline(i + .5, color=grid)
                    plt.axhline(i + .5, color=grid)

            # can probably be simplified
            if mode == 'lower':
                plt.axvline(-.5, ymin=0, ymax=1, color='grey')
                plt.axvline(width - .5, ymin=0, ymax=1. /
                            width, color='grey', lw=2)
                plt.axhline(width - .5, xmin=0, xmax=1, color='grey', lw=2)
                plt.axhline(-.5, xmin=0, xmax=1. / width, color='grey', lw=2)
                plt.xticks([])
                for i in range(0, width):
                    plt.text(i, i - .6, labels[i], fontsize=fontsize,
                             color=label_color,
                             rotation=rotation, verticalalignment='bottom')
                    plt.text(-.6, i, labels[i], fontsize=fontsize,
                             color=label_color,
                             rotation=0, horizontalalignment='right')
                plt.axis('off')
            # can probably be simplified
            elif mode == 'upper':
                plt.axvline(width - .5, ymin=0, ymax=1, color='grey', lw=2)
                plt.axvline(-.5, ymin=1 - 1. / width,
                            ymax=1, color='grey', lw=2)
                plt.axhline(-.5, xmin=0, xmax=1, color='grey', lw=2)
                plt.axhline(width - .5, xmin=1 - 1. / width,
                            xmax=1, color='grey', lw=2)
                plt.yticks([])
                for i in range(0, width):
                    plt.text(-.6 + i, i, labels[i], fontsize=fontsize,
                             color=label_color, horizontalalignment='right',
                             rotation=0)
                    plt.text(i, -.5, labels[i], fontsize=fontsize,
                             color=label_color, rotation=rotation, verticalalignment='bottom')
                plt.axis('off')

        # set all ticks length to zero
        ax = plt.gca()
        ax.tick_params(axis='both', which='both', length=0)

        if colorbar:
            N = self.params['colorbar.N'] + 1
            assert N >= 2
            cb = plt.gcf().colorbar(self.collection,
                                    orientation=self.params[
                                        'colorbar.orientation'],
                                    shrink=self.params['colorbar.shrink'],
                                    boundaries=np.linspace(0, 1, N), ticks=[0, .25, 0.5, 0.75, 1])
            cb.ax.set_yticklabels([-1, -.5, 0, .5, 1])
            # make sure it goes from -1 to 1 even though actual values may not
            # reach that range
            cb.set_clim(0, 1)

        return ax
예제 #9
0
    def plot(self, num=1, grid=True,
            rotation=30, colorbar_width=10, lower=None, upper=None,
            shrink=0.9, axisbg='white', colorbar=True, label_color='black',
            fontsize='small', edgecolor='black', method='ellipse', order=None,
            cmap=None
            ):
        """plot the correlation matrix from the content of :attr:`df`
        (dataframe)

        :param grid: add grid (Defaults to True)
        :param rotation: rotate labels on y-axis
        :param lower: if set to a valid method, plots the data on the lower
            left triangle
        :param upper: if set to a valid method, plots the data on the upper
            left triangle
        :param method: shape to be used in 'ellipse', 'square', 'rectangle', 
            'color', 'text', 'circle',  'number', 'pie'.
        :param cmap: a valid cmap from matplotlib of colormap package (e.g.,
        jet, or 

        Here are some examples provided that the data is created and pass to c::

            c = corrplot.Corrplor(dataframe)
            c.plot(cmap=('Orange', 'white', 'green'))
            c.plot(method='circle')
            c.plot(colorbar=False, shrink=.8, upper='circle'  )


        """

        # default
        if cmap != None:
            try:
                if isinstance(cmap, str):
                    self.cm = cmap_builder(cmap)
                else:
                    self.cm = cmap_builder(*cmap)
            except:
                print("incorrect cmap. Use default one")
                self._set_default_cmap()
        else:
            self._set_default_cmap()

        self.shrink = shrink
        self.fontsize = fontsize
        self.edgecolor = edgecolor

        if order == 'hclust':
            df = self.order(method='hclust')
        else:
            df = self.df

        plt.clf()
        fig = plt.figure(num=num, facecolor=axisbg)

        ax = plt.subplot(1, 1, 1, aspect='equal', axisbg=axisbg)
        # subplot resets the bg color, let us set it again
        fig.set_facecolor(axisbg)

        width, height = df.shape
        labels = (df.columns)

        # add all patches to the figure
        # TODO check value of lower and upper

        if upper is None and lower is None:
            mode = 'method'
            diagonal = True
        elif upper and lower:
            mode = 'both'
            diagonal = False
        elif lower is not None:
            mode = 'lower'
            diagonal = True
        elif upper is not None:
            mode = 'upper'
            diagonal = True
        else:
            raise ValueError

        if mode == 'upper':
            self._add_patches(df, upper, 'upper',  ax, diagonal=True)
        elif mode == 'lower':
            self._add_patches(df, lower, 'lower',  ax, diagonal=True)
        elif mode == 'method':
            self._add_patches(df, method, 'both',  ax, diagonal=True)
        elif mode == 'both':
            self._add_patches(df, upper, 'upper',  ax, diagonal=False)
            self._add_patches(df, lower, 'lower',  ax, diagonal=False)

        # shift the limits to englobe the patches correctly
        ax.set_xlim(-0.5, width-.5)
        ax.set_ylim(-0.5, height-.5)

        # set xticks/xlabels on top
        ax.xaxis.tick_top()
        xtickslocs = np.arange(len(labels))
        ax.set_xticks(xtickslocs)
        ax.set_xticklabels(labels, rotation=rotation, color=label_color,
                fontsize=fontsize, ha='left')

        ax.invert_yaxis()
        ytickslocs = np.arange(len(labels))
        ax.set_yticks(ytickslocs)
        ax.set_yticklabels(labels, fontsize=fontsize, color=label_color)
        plt.tight_layout()

        if grid is True:
            for i in range(0, width):
                ratio1 = float(i)/width
                ratio2 = float(i+2)/width
                # TODO 1- set axis off
                # 2 - set xlabels along the diagonal
                # set colorbar either on left or bottom
                if mode == 'lower':
                    plt.axvline(i+.5, ymin=1-ratio1, ymax=0., color='grey')
                    plt.axhline(i+.5, xmin=0, xmax=ratio2, color='grey')
                if mode == 'upper':
                    plt.axvline(i+.5, ymin=1 - ratio2, ymax=1, color='grey')
                    plt.axhline(i+.5, xmin=ratio1, xmax=1, color='grey')
                if mode in ['method', 'both']:
                    plt.axvline(i+.5, color='grey')
                    plt.axhline(i+.5, color='grey')

            # can probably be simplified
            if mode == 'lower':
                plt.axvline(-.5, ymin=0, ymax=1, color='grey')
                plt.axvline(width-.5, ymin=0, ymax=1./width, color='grey', lw=2)
                plt.axhline(width-.5, xmin=0, xmax=1, color='grey',lw=2)
                plt.axhline(-.5, xmin=0, xmax=1./width, color='grey',lw=2)
                plt.xticks([])
                for i in range(0, width):
                    plt.text(i, i-.6 ,labels[i],fontsize=fontsize,
                            color=label_color,
                            rotation=rotation, verticalalignment='bottom')
                    plt.text(-.6, i ,labels[i],fontsize=fontsize,
                            color=label_color,
                            rotation=0, horizontalalignment='right')
                plt.axis('off')
            # can probably be simplified
            elif mode == 'upper':
                plt.axvline(width-.5, ymin=0, ymax=1, color='grey', lw=2)
                plt.axvline(-.5, ymin=1-1./width, ymax=1, color='grey', lw=2)
                plt.axhline(-.5, xmin=0, xmax=1, color='grey',lw=2)
                plt.axhline(width-.5, xmin=1-1./width, xmax=1, color='grey',lw=2)
                plt.yticks([])
                for i in range(0, width):
                    plt.text(-.6+i, i ,labels[i],fontsize=fontsize,
                            color=label_color, horizontalalignment='right',
                            rotation=0)
                    plt.text(i, -.5 ,labels[i],fontsize=fontsize,
                            color=label_color, rotation=rotation, verticalalignment='bottom')
                plt.axis('off')

        # set all ticks length to zero
        ax = plt.gca()
        ax.tick_params(axis='both',which='both', length=0)

        if colorbar:
            N = self.params['colorbar.N']
            cb = plt.gcf().colorbar(self.collection,
                    orientation=self.params['colorbar.orientation'], shrink=.9,
                boundaries= np.linspace(0,1,N), ticks=[0,.25, 0.5, 0.75,1])
            cb.ax.set_yticklabels([-1,-.5,0,.5,1])
            cb.set_clim(0,1) # make sure it goes from -1 to 1 even though actual values may not reach that range
예제 #10
0
    def plot(self,
             num=1,
             cmap=None,
             colorbar=True,
             vmin=None,
             vmax=None,
             colorbar_position='right',
             gradient_span='None',
             figsize=(12, 8),
             fontsize=None):
        """

        Using as input::

            df = pd.DataFrame({'A':[1,0,1,1],
                               'B':[.9,0.1,.6,1],
                            'C':[.5,.2,0,1],
                            'D':[.5,.2,0,1]})

        we can plot the heatmap + dendogram as follows::

            h = Heatmap(df)
            h.plot(vmin=0, vmax=1.1)


        .. plot::
            :include-source:
            :width: 80%

            from sequana.viz import heatmap
            df = heatmap.get_heatmap_df()
            h = heatmap.Heatmap(df)
            h.category_column['A'] = 1
            h.category_column['C'] = 1
            h.category_column['D'] = 2
            h.category_column['B'] = 2
            h.plot()


        """
        # save all parameters in a dict
        layout = {}

        if cmap is None:
            cmap = self.params.cmap
        try:
            cmap = colormap.cmap_builder(cmap)
        except:
            pass

        # keep track of row and column names for later.
        row_header = self.frame.index
        column_header = self.frame.columns

        # FIXME something clever for the fontsize
        if len(row_header) > 100 or len(column_header) > 100:
            matplotlib.rcParams['font.size'] = 6
        if len(row_header) > 50 or len(column_header) > 50:
            matplotlib.rcParams['font.size'] = 7
        if len(row_header) > 30 or len(column_header) > 30:
            matplotlib.rcParams['font.size'] = 8
        else:
            matplotlib.rcParams['font.size'] = 12
        if fontsize:
            matplotlib.rcParams['font.size'] = fontsize

        # scaling min/max range
        self.gradient_span = gradient_span  #'only_max'
        # min_to_max, min_to_max_centered, only_max, only_min

        if self.gradient_span == 'min_to_max_centered':
            vmax = self.frame.max().max()
            vmin = self.frame.min().min()
            vmax = max([vmax, abs(vmin)])
            vmin = vmax * -1
        if self.gradient_span == 'only_max':
            vmin = 0
            vmax = self.frame.max().max()
        if self.gradient_span == 'only_min':
            vmin = self.frame.min().min()
            vmax = 0
        norm = matplotlib.colors.Normalize(vmin, vmax)

        # Scale the figure window size #
        fig = pylab.figure(num=num, figsize=figsize)
        fig.clf()

        # LAYOUT --------------------------------------------------
        # ax1 (dendrogram 1) on the left of the heatmap
        [ax1_x, ax1_y, ax1_w, ax1_h] = [0.05, 0.22, 0.2, 0.6]
        width_between_ax1_axr = 0.004
        # distance between the top color bar axis and the matrix
        height_between_ax1_axc = 0.004
        # Sufficient size to show
        color_bar_w = 0.015

        # axr, placement of row side colorbar
        # second to last controls the width of the side color bar - 0.015 when showing
        [axr_x, axr_y, axr_w, axr_h] = [0.31, 0.1, color_bar_w, 0.6]
        axr_x = ax1_x + ax1_w + width_between_ax1_axr
        axr_y = ax1_y
        axr_h = ax1_h
        width_between_axr_axm = 0.004

        # axc, placement of column side colorbar #
        # last one controls the hight of the top color bar - 0.015 when showing
        [axc_x, axc_y, axc_w, axc_h] = [0.4, 0.63, 0.5, color_bar_w]
        axc_x = axr_x + axr_w + width_between_axr_axm
        axc_y = ax1_y + ax1_h + height_between_ax1_axc
        height_between_axc_ax2 = 0.004

        # axm, placement of heatmap for the data matrix # why larger than 1?
        [axm_x, axm_y, axm_w, axm_h] = [0.4, 0.9, 2.5, 0.5]
        axm_x = axr_x + axr_w + width_between_axr_axm
        axm_y = ax1_y
        axm_h = ax1_h
        axm_w = axc_w

        # ax2 (dendrogram 2), on the top of the heatmap #
        [ax2_x, ax2_y, ax2_w, ax2_h] = [0.3, 0.72, 0.6, 0.15]
        ax2_x = axr_x + axr_w + width_between_axr_axm
        ax2_y = ax1_y + ax1_h + height_between_ax1_axc + axc_h + height_between_axc_ax2
        ax2_w = axc_w

        # axcb - placement of the color legend #
        if colorbar_position == 'top left':
            [axcb_x, axcb_y, axcb_w, axcb_h] = [0.07, 0.88, 0.18, 0.09]
        elif colorbar_position == 'right':
            [axcb_x, axcb_y, axcb_w, axcb_h] = [0.85, 0.2, 0.08, 0.6]
        else:
            raise ValueError("'top left' or 'right' accepted for now")

        # COMPUTATION DENDOGRAM 1 -------------------------------------
        if self.column_method:
            Y = self.linkage(self.frame.transpose(), self.column_method,
                             self.column_metric)
            ax2 = fig.add_axes([ax2_x, ax2_y, ax2_w, ax2_h], frame_on=True)

            #     p=30,    truncate_mode=None,    color_threshold=None,    get_leaves=True,
            # orientation='top    labels=None,    count_sort=False,    distance_sort=False,
            #     show_leaf_counts=True,    no_plot=False,    no_labels=False,    leaf_font_size=None,
            #     leaf_rotation=None,    leaf_label_func=None,    show_contracted=False,
            #     link_color_func=None,    ax=None,    above_threshold_color='b',            #

            # color_threshold=0 and above_threshold_color='k' colors all
            # dendogram into black
            Z = hierarchy.dendrogram(Y,
                                     color_threshold=0,
                                     above_threshold_color="k",
                                     distance_sort="descending")
            ind2 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]),
                                      self.cluster_criterion)

            ax2.set_xticks([])
            ax2.set_yticks([])
            # apply the clustering for the array-dendrograms to the actual matrix data
            idx2 = Z['leaves']
            self.frame = self.frame.iloc[:, idx2]
            # reorder the flat cluster to match the order of the leaves the dendrogram
            ind2 = ind2[idx2]
            layout['dendogram2'] = ax2
        else:
            idx2 = range(self.frame.shape[1])

        # COMPUTATION DENDOGRAM 2 ---------------------------------
        if self.row_method:
            Y = self.linkage(self.frame, self.row_method, self.row_metric)

            ax1 = fig.add_axes([ax1_x, ax1_y, ax1_w, ax1_h], frame_on=True)
            Z = hierarchy.dendrogram(Y,
                                     orientation='right',
                                     color_threshold=0,
                                     above_threshold_color="k",
                                     distance_sort="descending")
            ind1 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]),
                                      self.cluster_criterion)

            ax1.set_xticks([])
            ax1.set_yticks([])
            # apply the clustering for the array-dendrograms to the actual matrix data
            idx1 = Z['leaves']
            self.frame = self.frame.iloc[idx1, :]
            # reorder the flat cluster to match the order of the leaves the dendrogram
            ind1 = ind1[idx1]
            layout['dendogram1'] = ax1
        else:
            idx1 = range(self.frame.shape[0])

        # HEATMAP itself
        axm = fig.add_axes([axm_x, axm_y, axm_w, axm_h])
        axm.imshow(self.frame,
                   aspect='auto',
                   origin='lower',
                   interpolation='None',
                   cmap=cmap,
                   norm=norm)
        axm.set_xticks([])
        axm.set_yticks([])
        layout['heatmap'] = axm

        # TEXT
        new_row_header = []
        new_column_header = []
        for i in range(self.frame.shape[0]):
            axm.text(self.frame.shape[1] - 0.5,
                     i,
                     '  ' + str(row_header[idx1[i]]),
                     verticalalignment="center")
            new_row_header.append(
                row_header[idx1[i]] if self.row_method else row_header[i])

        for i in range(self.frame.shape[1]):
            axm.text(i,
                     -0.9,
                     ' ' + str(column_header[idx2[i]]),
                     rotation=90,
                     verticalalignment="top",
                     horizontalalignment="center")
            new_column_header.append(column_header[idx2[i]] if self.
                                     column_method else column_header[i])

        # CATEGORY column ------------------------------
        if self.category_column:
            axc = fig.add_axes([axc_x, axc_y, axc_w, axc_h])

            category_col = [
                self.category_column[self.df.columns[i]] for i in idx2
            ]

            dc = np.array(category_col, dtype=int)
            dc.shape = (1, len(ind2))
            cmap_c = matplotlib.colors.ListedColormap(
                self.params.col_side_colors)
            axc.matshow(dc, aspect='auto', origin='lower', cmap=cmap_c)
            axc.set_xticks([])
            axc.set_yticks([])
            layout['category_column'] = axc

        # CATEGORY row -------------------------------
        if self.category_row:
            axr = fig.add_axes([axr_x, axr_y, axr_w, axr_h])
            # self.category_row must be a dictionary with names as found in the columns
            # of the dataframe.

            category_row = [self.category_row[self.df.index[i]] for i in idx1]

            dr = np.array(category_row, dtype=int)
            dr.shape = (len(category_row), 1)
            cmap_r = matplotlib.colors.ListedColormap(
                self.params.col_side_colors)
            axr.matshow(dr, aspect='auto', origin='lower', cmap=cmap_r)
            axr.set_xticks([])
            axr.set_yticks([])
            layout['category_row'] = axr

        # COLORBAR ----------------------
        if colorbar == True:
            axcb = fig.add_axes([axcb_x, axcb_y, axcb_w, axcb_h],
                                frame_on=False)
            if colorbar_position == 'right':
                orientation = 'vertical'
            else:
                orientation = 'horizontal'
            cb = matplotlib.colorbar.ColorbarBase(ax=axcb,
                                                  cmap=cmap,
                                                  norm=norm,
                                                  orientation=orientation)
            #axcb.set_title("whatever")
            #max_cb_ticks = 5
            #axcb.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(max_cb_ticks))
            layout['colorbar'] = cb
            layout['colorbar_scalablemap'] = axcb

        #   could be useful
        self.d = {'ordered': self.frame.copy(), 'rorder': idx1, 'corder': idx2}

        return layout
예제 #11
0
    def add_features(self):
        # feature summary
        df_features = self.report.feature_summary("feature_summary.png")
        filename = 'OUTPUT' + os.sep + 'features_summary.csv'
        df_features.to_csv(self.directory + os.sep + filename, sep=',')

        not_tested = ""
        self.jinja['drug_not_tested'] = not_tested

        df_drugs = self.report.drug_summary(filename="drug_summary.png")
        get_name = self.report.drug_decode.get_name
        if len(self.report.drug_decode.df) > 0:
            df_drugs.index = [
                "{}-{}".format(x, get_name(x)) for x in df_drugs.index
            ]
        filename = 'OUTPUT' + os.sep + 'drugs_summary.csv'
        df_drugs.to_csv(self.directory + os.sep + filename, sep=',')

        if len(self.report.df) == 0:
            return

        # --------------------------- Create table with links to all drugs
        groups = self.report.df.groupby('DRUG_ID')
        try:
            df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values()
        except:
            # note double brackets for pythonn3.3
            df = groups.mean()[['ANOVA_FEATURE_FDR']].sort()

        df = df.reset_index()  # get back the Drug id in the dframe columns
        # let us add also the drug name
        df = self.report.drug_decode.drug_annotations(df)

        # let us also add number of associations computed
        counts = [len(groups.groups[k]) for k in df.DRUG_ID]
        df['Number of associations computed'] = counts
        groups = self.report.get_significant_set().groupby('DRUG_ID').groups
        count = []
        for drug in df['DRUG_ID'].values:
            if drug in groups.keys():
                count.append(len(groups[drug]))
            else:
                count.append(0)
        df['hits'] = count

        # add another set of drug_id but sorted in alpha numerical order
        table = HTMLTable(df, 'drugs')
        table.add_href('DRUG_ID', url="associations/drug_", suffix=".html")
        table.df.columns = [
            x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR')
            for x in table.df.columns
        ]
        table.add_bgcolor('hits',
                          mode='max',
                          cmap=cmap_builder('white', 'orange', 'red'))

        self.jinja['drug_table'] = table.to_html(escape=False,
                                                 header=True,
                                                 index=False)

        # ---------------------- Create full table with links to all features
        df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()})
        try:
            df.sort_values(by='FEATURE', inplace=True)
        except:
            df.sort('FEATURE', inplace=True)

        groups = self.report.get_significant_set().groupby('FEATURE').groups

        count = []
        for feature in df['FEATURE'].values:
            if feature in groups.keys():
                count.append(len(groups[feature]))
            else:
                count.append(0)
        df['hits'] = count

        table = HTMLTable(df, 'features')
        table.sort('hits', ascending=False)
        table.add_href('FEATURE', url="associations/", suffix=".html")
        table.add_bgcolor('hits',
                          mode='max',
                          cmap=cmap_builder('white', 'orange', 'red'))
        self.jinja['feature_table'] = table.to_html(escape=False,
                                                    header=True,
                                                    index=False)
예제 #12
0
 def _set_default_cmap(self):
     self.cm = cmap_builder('#AA0000', 'white', 'darkblue')
예제 #13
0
    def plot(self,
             num=1,
             cmap="heat",
             colorbar=True,
             vmin=None,
             vmax=None,
             colorbar_position='right',
             gradient_span='None'):
        """


        :param gradient_span: None is default in R
        iusing::

            df = pd.DataFrame({'A':[1,0,1,1],
                               'B':[.9,0.1,.6,1],
                            'C':[.5,.2,0,1],
                            'D':[.5,.2,0,1]})
        and ::

            h = Heatmap(df)
            h.plot(vmin=0, vmax=1.1)

        we seem to get the same as in R wiht ::

            df = data.frame(A=c(1,0,1,1), B=c(.9,.1,.6,1), C=c(.5,.2,0,1), D=c(.5,.2,0,1))
            heatmap((as.matrix(df)), scale='none')


        .. todo:: right now, the order of cols and rows is random somehow.
            could be ordered like in heatmap (r) byt mean of the row and col
            or with a set of vector for col and rows.

            heatmap((as.matrix(df)), Rowv=c(3,2), Colv=c(1), scale='none')

            gives same as::

                df = get_heatmap_df()
                h = heatmap.Heatmap(df)
                h.plot(vmin=-0, vmax=1.1)

        """
        # save all parameters in a dict
        layout = {}

        cmap = colormap.cmap_builder(cmap)

        # keep track of row and column names for later.
        row_header = self.frame.index
        column_header = self.frame.columns

        # FIXME sometinh clever for the fontsize
        if len(row_header) > 100 or len(column_header) > 100:
            matplotlib.rcParams['font.size'] = 6
        if len(row_header) > 50 or len(column_header) > 50:
            matplotlib.rcParams['font.size'] = 7
        else:
            matplotlib.rcParams['font.size'] = 12

        # scaling min/max range
        self.gradient_span = gradient_span  #'only_max'
        # min_to_max, min_to_max_centered, only_max, only_min

        if self.gradient_span == 'min_to_max_centered':
            vmax = max([vmax, abs(vmin)])
            vmin = vmax * -1
        if self.gradient_span == 'only_max':
            vmin = 0
            vmax = self.frame.max().max()
        if self.gradient_span == 'only_min':
            vmin = self.frame.min().min()
            vmax = 0
        norm = matplotlib.colors.Normalize(vmin, vmax)

        # Scale the figure window size #
        fig = pylab.figure(num=num, figsize=(12, 8))
        fig.clf()

        # LAYOUT --------------------------------------------------
        # ax1 (dendrogram 1) on the left of the heatmap
        [ax1_x, ax1_y, ax1_w, ax1_h] = [0.05, 0.22, 0.2, 0.6]
        width_between_ax1_axr = 0.004
        # distance between the top color bar axis and the matrix
        height_between_ax1_axc = 0.004
        # Sufficient size to show
        color_bar_w = 0.015

        # axr, placement of row side colorbar
        # second to last controls the width of the side color bar - 0.015 when showing
        [axr_x, axr_y, axr_w, axr_h] = [0.31, 0.1, color_bar_w, 0.6]
        axr_x = ax1_x + ax1_w + width_between_ax1_axr
        axr_y = ax1_y
        axr_h = ax1_h
        width_between_axr_axm = 0.004

        # axc, placement of column side colorbar #
        # last one controls the hight of the top color bar - 0.015 when showing
        [axc_x, axc_y, axc_w, axc_h] = [0.4, 0.63, 0.5, color_bar_w]
        axc_x = axr_x + axr_w + width_between_axr_axm
        axc_y = ax1_y + ax1_h + height_between_ax1_axc
        height_between_axc_ax2 = 0.004

        # axm, placement of heatmap for the data matrix # why larger than 1?
        [axm_x, axm_y, axm_w, axm_h] = [0.4, 0.9, 2.5, 0.5]
        axm_x = axr_x + axr_w + width_between_axr_axm
        axm_y = ax1_y
        axm_h = ax1_h
        axm_w = axc_w

        # ax2 (dendrogram 2), on the top of the heatmap #
        [ax2_x, ax2_y, ax2_w, ax2_h] = [0.3, 0.72, 0.6, 0.15]
        ax2_x = axr_x + axr_w + width_between_axr_axm
        ax2_y = ax1_y + ax1_h + height_between_ax1_axc + axc_h + height_between_axc_ax2
        ax2_w = axc_w

        # axcb - placement of the color legend #
        if colorbar_position == 'top left':
            [axcb_x, axcb_y, axcb_w, axcb_h] = [0.07, 0.88, 0.18, 0.09]
        elif colorbar_position == 'right':
            [axcb_x, axcb_y, axcb_w, axcb_h] = [0.85, 0.2, 0.08, 0.6]

        # COMPUTATION DENDOGRAM 1 -------------------------------------
        if self.column_method:
            Y = self.get_linkage(self.frame.transpose(), self.column_method,
                                 self.column_metric)
            ax2 = fig.add_axes([ax2_x, ax2_y, ax2_w, ax2_h], frame_on=True)
            Z = hierarchy.dendrogram(Y)
            ind2 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]),
                                      self.cluster_criterion)

            ax2.set_xticks([])
            ax2.set_yticks([])
            # apply the clustering for the array-dendrograms to the actual matrix data
            idx2 = Z['leaves']
            self.frame = self.frame.iloc[:, idx2]
            # reorder the flat cluster to match the order of the leaves the dendrogram
            ind2 = ind2[idx2]
            layout['dendogram2'] = ax2
        else:
            idx2 = range(self.frame.shape[1])

        # COMPUTATION DENDOGRAM 2 ---------------------------------
        if self.row_method:
            Y = self.get_linkage(self.frame, self.row_method, self.row_metric)

            ax1 = fig.add_axes([ax1_x, ax1_y, ax1_w, ax1_h], frame_on=True)
            Z = hierarchy.dendrogram(Y, orientation='right')
            ind1 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]),
                                      self.cluster_criterion)

            ax1.set_xticks([])
            ax1.set_yticks([])
            # apply the clustering for the array-dendrograms to the actual matrix data
            idx1 = Z['leaves']
            self.frame = self.frame.iloc[idx1, :]
            # reorder the flat cluster to match the order of the leaves the dendrogram
            ind1 = ind1[idx1]
            layout['dendogram1'] = ax1
        else:
            idx1 = range(self.frame.shape[0])

        # HEATMAP itself
        axm = fig.add_axes([axm_x, axm_y, axm_w, axm_h])
        axm.imshow(self.frame,
                   aspect='auto',
                   origin='lower',
                   interpolation='None',
                   cmap=cmap,
                   norm=norm)
        axm.set_xticks([])
        axm.set_yticks([])
        layout['heatmap'] = axm

        # TEXT
        new_row_header = []
        new_column_header = []
        for i in range(self.frame.shape[0]):
            axm.text(self.frame.shape[1] - 0.5,
                     i,
                     '  ' + str(row_header[idx1[i]]),
                     verticalalignment="center")
            new_row_header.append(
                row_header[idx1[i]] if self.row_method else row_header[i])

        for i in range(self.frame.shape[1]):
            axm.text(i,
                     -0.9,
                     ' ' + str(column_header[idx2[i]]),
                     rotation=90,
                     verticalalignment="top",
                     horizontalalignment="center")
            new_column_header.append(column_header[idx2[i]] if self.
                                     column_method else column_header[i])

        # CATEGORY column ------------------------------
        if self.column_method:
            axc = fig.add_axes([axc_x, axc_y, axc_w, axc_h])
            cmap_c = matplotlib.colors.ListedColormap(
                self.params.col_side_colors)
            dc = np.array(ind2, dtype=int)
            dc.shape = (1, len(ind2))
            axc.matshow(dc, aspect='auto', origin='lower', cmap=cmap_c)
            axc.set_xticks([])
            axc.set_yticks([])
            layout['category_column'] = axc

        # CATEGORY row -------------------------------
        if self.row_method:
            axr = fig.add_axes([axr_x, axr_y, axr_w, axr_h])
            dr = np.array(ind1, dtype=int)
            dr.shape = (len(ind1), 1)
            cmap_r = matplotlib.colors.ListedColormap(
                self.params.col_side_colors)
            axr.matshow(dr, aspect='auto', origin='lower', cmap=cmap_r)
            axr.set_xticks([])
            axr.set_yticks([])
            layout['category_row'] = axr

        # COLORBAR ----------------------
        if colorbar == True:
            axcb = fig.add_axes([axcb_x, axcb_y, axcb_w, axcb_h],
                                frame_on=False)
            if colorbar_position == 'right':
                orientation = 'vertical'
            else:
                orientation = 'horizontal'
            cb = matplotlib.colorbar.ColorbarBase(axcb,
                                                  cmap=cmap,
                                                  norm=norm,
                                                  orientation=orientation)
            #axcb.set_title("whatever")
            #max_cb_ticks = 5
            #axcb.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(max_cb_ticks))
            layout['colorbar'] = cb

        #   could be useful
        d = {'ordered': self.frame.copy(), 'rorder': idx1, 'corder': idx2}

        return layout
예제 #14
0
def _build_colormap(color1, color2, color3):
    """ Builds colormap from three given colors (given as strings)"""
    cm = cmap_builder('blue', 'orange', 'green')
    return cm
예제 #15
0
    def _create_report(self, onweb=True):
        # A summary table
        diag = self.report.diagnostics()
        table = HTMLTable(diag, 'summary')
        txt = ''
        for index, row in diag.iterrows():
            if len(row.text) == 0 and len(row.value) == 0:
                txt += '----<br/>'
            else:
                txt += row.text + ": " +  str(row.value) + "<br/>"
        self.jinja['summary'] = txt

        print('Creating volcano plots')
        # this can be pretty slow. so keep only 1000 most relevant
        # values and 1000 random ones to get an idea of the distribution
        v = VolcanoANOVA(self.report.df, settings=self.settings)
        v.selector(v.df, 1500, 1500, inplace=True)
        v.volcano_plot_all()
        v.savefig_and_js("volcano_all_js")

        self.jinja['volcano'] = """
            <h3></h3>
            <a href="volcano_all_js.html">
                <img alt="volcano plot for all associations"
                    src="volcano_all_js.png">
            </a>
            <br/>
            <p> A javascript version is available
                <a href="volcano_all_js.html">here</a> (
                or click on the image).</p>
        """

        # MANOVA link
        N = len(self.report.get_significant_set())
        self.jinja['manova'] = """
        There were %(N)s significant associations found.
        All significant associations have been gatherered
        in the following link: <br/><a href="manova.html">manova results</a>.
        """ % {'N': N}

        # feature summary
        df_features = self.report.feature_summary("feature_summary.png")
        filename = 'OUTPUT' + os.sep + 'features_summary.csv'
        df_features.to_csv(self.directory + os.sep + filename, sep=',')

        # drug summary
        #not_tested = [x for x in self.report.gdsc.drugIds if x not in
        #        self.report.df.DRUG_ID.unique()]
        #if len(not_tested) > 0:
        #    not_tested = """%s drugs were not analysed due to
        #    lack of valid data points: """ % len(not_tested) + \
        #            ", ".join(not_tested)
        #else:
        #    not_tested = ""
        not_tested = ""
        self.jinja['drug_not_tested'] = not_tested

        df_drugs = self.report.drug_summary(filename="drug_summary.png")
        get_name = self.report.drug_decode.get_name
        if len(self.report.drug_decode.df) > 0:
            df_drugs.index = [x + "-" + get_name(x) for x in df_drugs.index]
        filename = 'OUTPUT' + os.sep + 'drugs_summary.csv'
        df_drugs.to_csv(self.directory + os.sep + filename, sep=',')

        # --------------------------- Create table with links to all drugs
        groups = self.report.df.groupby('DRUG_ID')
        try:
            df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values()
        except:
            # note double brackets for pythonn3.3
            df = groups.mean()[['ANOVA_FEATURE_FDR']].sort()
        df = df.reset_index() # get back the Drug id in the dframe columns

        # let us add also the drug name
        df = self.report.drug_decode.drug_annotations(df)

        # let us also add number of associations computed
        counts = [len(groups.groups[k]) for k in df.DRUG_ID]
        df['Number of associations computed'] = counts
        groups = self.report.get_significant_set().groupby('DRUG_ID').groups
        count = []
        for drug in df['DRUG_ID'].values:
            if drug in groups.keys():
                count.append(len(groups[drug]))
            else:
                count.append(0)
        df['hits'] = count

        # add another set of drug_id but sorted in alpha numerical order
        table = HTMLTable(df, 'drugs')
        table.add_href('DRUG_ID')
        table.df.columns = [x.replace('ANOVA_FEATURE_FDR',
            'mean FEATURE ANOVA FDR') for x in table.df.columns]
        table.add_bgcolor('hits', mode='max',
                cmap=cmap_builder('white', 'orange', 'red'))

        self.jinja['drug_table'] = table.to_html(escape=False,
                header=True, index=False)

        # ---------------------- Create full table with links to all features
        df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()})
        try:
            df.sort_values(by='FEATURE', inplace=True)
        except:
            df.sort('FEATURE', inplace=True)

        groups = self.report.get_significant_set().groupby('FEATURE').groups

        count = []
        for feature in df['FEATURE'].values:
            if feature in groups.keys():
                count.append(len(groups[feature]))
            else:
                count.append(0)
        df['hits'] = count

        table = HTMLTable(df, 'features')
        table.sort('hits', ascending=False)
        table.add_href('FEATURE')
        table.add_bgcolor('hits', mode='max',
                cmap=cmap_builder('white', 'orange', 'red'))
        self.jinja['feature_table'] = table.to_html(escape=False,
                header=True, index=False)

        # -------------------------------------- COSMIC table for completeness
        colnames = self.report.gdsc.features._special_names
        df = self.report.gdsc.features.df[colnames]

        # TODO
        # add other columns if possible e.g., GDSC1, GDSC2, TCGA

        df = df.reset_index()
        table = HTMLTable(df)
        url = "http://cancer.sanger.ac.uk/cell_lines/sample/overview?id="
        table.add_href('COSMIC_ID', url=url, newtab=True)
        self.jinja['cosmic_table'] = table.to_html()

        # -------------------------------------- settings and INPUT files
        input_dir = self.directory + os.sep + 'INPUT'
        filename = 'ANOVA_input.csv'
        filename = os.sep.join([input_dir, filename])
        self.report.gdsc.ic50.to_csv(filename)
        filename = os.sep.join(['INPUT', 'ANOVA_input.csv'])
        self.jinja['ic50_file'] = filename

        # the genomic features, which may be the default version
        # one provided by the user. It may have been changed
        gf_filename = os.sep.join([input_dir, 'genomic_features.csv'])
        self.report.gdsc.features.to_csv(gf_filename)
        html = """Saved <a href="INPUT/genomic_features.csv">Genomic
                  Features</a> file<br/> (possibly the default
                  version)."""
        self.jinja['gf_file'] = html

        # Always save DRUG_DECODE file even if empty
        # It may be be interpreted in other pipeline or for reproducibility
        output_filename = input_dir + os.sep + 'DRUG_DECODE.csv'
        self.report.drug_decode.to_csv(output_filename)
        html = 'Get <a href="INPUT/DRUG_DECODE.csv">Drug DECODE file</a>'
        if len(self.report.drug_decode) == 0:
            html += 'Note that DRUG_DECODE file was not provided (empty?).'
        self.jinja['drug_decode'] = html

        # Save settings as json file
        filename = os.sep.join([input_dir, 'settings.json'])
        self.settings.to_json(filename)
        filename = os.path.basename(filename)
        self.jinja['settings'] = \
                """Get the settings as a <a href="INPUT/%s">
                json file</a>.""" % filename

        # Save all Results dataframe
        filename = os.sep.join([self.settings.directory, 'OUTPUT',
            'results.csv'])
        ANOVAResults(self.report.df).to_csv(filename)

        code = """from gdsctools import *
import os

def getfile(filename, where='../INPUT'):
    return os.sep.join([where, filename])

# reback the IC50 and genomic features matrices
gdsc = ANOVA(getfile('%(ic50)s'), getfile('%(gf_filename)s'),
        getfile('DRUG_DECODE.csv'))
gdsc.settings.from_json(getfile('settings.json'))
gdsc.init()

# Analyse the data
results = gdsc.anova_all()

# Create the HTML report
r = ANOVAReport(gdsc, results)
r.create_html_pages(onweb=False)"""
        code = code % {
                'ic50': 'ANOVA_input.csv',
                'gf_filename': 'genomic_features.csv'}

        filename = os.sep.join([self.settings.directory, 'code','rerun.py'])
        fh = open(filename, 'w')
        fh.write(code)
        fh.close()
예제 #16
0
파일: heatmap.py 프로젝트: biokit/biokit
    def plot(self, num=1, cmap=None, colorbar=True, vmin=None,
             vmax=None, colorbar_position='right', gradient_span='None'
             ):
        """


        :param gradient_span: None is default in R
        
        Using::

            df = pd.DataFrame({'A':[1,0,1,1],
                               'B':[.9,0.1,.6,1],
                            'C':[.5,.2,0,1],
                            'D':[.5,.2,0,1]})
        and ::

            h = Heatmap(df)
            h.plot(vmin=0, vmax=1.1)

        we seem to get the same as in R wiht ::

            df = data.frame(A=c(1,0,1,1), B=c(.9,.1,.6,1), C=c(.5,.2,0,1), D=c(.5,.2,0,1))
            heatmap((as.matrix(df)), scale='none')


        .. todo:: right now, the order of cols and rows is random somehow.
            could be ordered like in heatmap (r) byt mean of the row and col
            or with a set of vector for col and rows.

            heatmap((as.matrix(df)), Rowv=c(3,2), Colv=c(1), scale='none')

            gives same as::

                df = get_heatmap_df()
                h = heatmap.Heatmap(df)
                h.plot(vmin=-0, vmax=1.1)

        """
        # save all parameters in a dict
        layout = {}

        if cmap is None:
            cmap = self.params.cmap
        try:cmap = colormap.cmap_builder(cmap)
        except:pass

        # keep track of row and column names for later.
        row_header = self.frame.index
        column_header = self.frame.columns

        # FIXME something clever for the fontsize
        if len(row_header) > 100 or len(column_header) > 100:
            matplotlib.rcParams['font.size'] = 6
        if len(row_header) > 50 or len(column_header) > 50:
            matplotlib.rcParams['font.size'] = 7
        else:
            matplotlib.rcParams['font.size'] = 12

        # scaling min/max range
        self.gradient_span  = gradient_span #'only_max'
        # min_to_max, min_to_max_centered, only_max, only_min

        if self.gradient_span == 'min_to_max_centered':
            vmax = max([vmax, abs(vmin)])
            vmin = vmax * -1
        if self.gradient_span == 'only_max':
            vmin = 0
            vmax = self.frame.max().max()
        if self.gradient_span == 'only_min':
            vmin = self.frame.min().min()
            vmax = 0
        norm = matplotlib.colors.Normalize(vmin, vmax)

        # Scale the figure window size #
        fig = pylab.figure(num=num, figsize=(12, 8))
        fig.clf()

        # LAYOUT --------------------------------------------------
        # ax1 (dendrogram 1) on the left of the heatmap
        [ax1_x, ax1_y, ax1_w, ax1_h] = [0.05, 0.22, 0.2, 0.6]
        width_between_ax1_axr = 0.004
        # distance between the top color bar axis and the matrix
        height_between_ax1_axc = 0.004
        # Sufficient size to show
        color_bar_w = 0.015

        # axr, placement of row side colorbar
        # second to last controls the width of the side color bar - 0.015 when showing
        [axr_x, axr_y, axr_w, axr_h] = [0.31, 0.1, color_bar_w, 0.6]
        axr_x = ax1_x + ax1_w + width_between_ax1_axr
        axr_y = ax1_y; axr_h = ax1_h
        width_between_axr_axm = 0.004

        # axc, placement of column side colorbar #
        # last one controls the hight of the top color bar - 0.015 when showing
        [axc_x, axc_y, axc_w, axc_h] = [0.4, 0.63, 0.5, color_bar_w]
        axc_x = axr_x + axr_w + width_between_axr_axm
        axc_y = ax1_y + ax1_h + height_between_ax1_axc
        height_between_axc_ax2 = 0.004

        # axm, placement of heatmap for the data matrix # why larger than 1?
        [axm_x, axm_y, axm_w, axm_h] = [0.4, 0.9, 2.5, 0.5]
        axm_x = axr_x + axr_w + width_between_axr_axm
        axm_y = ax1_y; axm_h = ax1_h
        axm_w = axc_w

        # ax2 (dendrogram 2), on the top of the heatmap #
        [ax2_x, ax2_y, ax2_w, ax2_h] = [0.3, 0.72, 0.6, 0.15]
        ax2_x = axr_x + axr_w + width_between_axr_axm
        ax2_y = ax1_y + ax1_h + height_between_ax1_axc + axc_h + height_between_axc_ax2
        ax2_w = axc_w

        # axcb - placement of the color legend #
        if colorbar_position == 'top left':
            [axcb_x, axcb_y, axcb_w, axcb_h] = [0.07, 0.88, 0.18, 0.09]
        elif colorbar_position == 'right':
            [axcb_x, axcb_y, axcb_w, axcb_h] = [0.85, 0.2, 0.08, 0.6]
        else:
            raise ValueError("'top left' or 'right' accepted for now")

        # COMPUTATION DENDOGRAM 1 -------------------------------------
        if self.column_method:
            Y = self.linkage(self.frame.transpose(),self.column_method,
                                  self.column_metric )
            ax2 = fig.add_axes([ax2_x, ax2_y, ax2_w, ax2_h], frame_on=True)
            Z = hierarchy.dendrogram(Y)
            ind2 = hierarchy.fcluster(Y, 0.7*max(Y[:,2]), self.cluster_criterion)

            ax2.set_xticks([])
            ax2.set_yticks([])
            # apply the clustering for the array-dendrograms to the actual matrix data
            idx2 = Z['leaves']
            self.frame = self.frame.iloc[:,idx2]
            # reorder the flat cluster to match the order of the leaves the dendrogram
            ind2 = ind2[idx2]
            layout['dendogram2'] = ax2
        else:
            idx2 = range(self.frame.shape[1])

        # COMPUTATION DENDOGRAM 2 ---------------------------------
        if self.row_method:
            Y = self.linkage(self.frame, self.row_method, self.row_metric )

            ax1 = fig.add_axes([ax1_x, ax1_y, ax1_w, ax1_h], frame_on=True)
            Z = hierarchy.dendrogram(Y, orientation='right')
            ind1 = hierarchy.fcluster(Y, 0.7*max(Y[:,2]), self.cluster_criterion)

            ax1.set_xticks([])
            ax1.set_yticks([])
            # apply the clustering for the array-dendrograms to the actual matrix data
            idx1 = Z['leaves']
            self.frame = self.frame.iloc[idx1,:]
            # reorder the flat cluster to match the order of the leaves the dendrogram
            ind1 = ind1[idx1]
            layout['dendogram1'] = ax1
        else:
            idx1 = range(self.frame.shape[0])

        # HEATMAP itself
        axm = fig.add_axes([axm_x, axm_y, axm_w, axm_h])
        axm.imshow(self.frame, aspect='auto', origin='lower', interpolation='None',
                   cmap=cmap, norm=norm)
        axm.set_xticks([])
        axm.set_yticks([])
        layout['heatmap'] = axm

        # TEXT
        new_row_header = []
        new_column_header = []
        for i in range(self.frame.shape[0]):
            axm.text(self.frame.shape[1]-0.5, i, '  ' + str(row_header[idx1[i]]),
                     verticalalignment="center")
            new_row_header.append(row_header[idx1[i]] if self.row_method else row_header[i])

        for i in range(self.frame.shape[1]):
            axm.text(i, -0.9, ' '+str(column_header[idx2[i]]),
                     rotation=90, verticalalignment="top",
                     horizontalalignment="center")
            new_column_header.append(column_header[idx2[i]] if self.column_method else column_header[i])


        # CATEGORY column ------------------------------
        if self.category_column:
            axc = fig.add_axes([axc_x, axc_y, axc_w, axc_h])
            cmap_c = matplotlib.colors.ListedColormap(self.params.col_side_colors)
            category_col = [self.category_column[self.df.columns[i]] for i in idx2]

            dc = np.array(category_col, dtype=int)
            dc.shape = (1,len(ind2))
            axc.matshow(dc, aspect='auto', origin='lower', cmap=cmap_c)
            axc.set_xticks([])
            axc.set_yticks([])
            layout['category_column'] = axc

        # CATEGORY row -------------------------------
        if self.category_row:
            axr = fig.add_axes([axr_x, axr_y, axr_w, axr_h])
            # self.category_row must be a dictionary with names as found in the columns
            # of the dataframe.

            category_row = [self.category_row[self.df.columns[i]] for i in idx1]
            
            dr = np.array(category_row, dtype=int)
            dr.shape = (len(category_row),1)
            cmap_r = matplotlib.colors.ListedColormap(self.params.col_side_colors)
            axr.matshow(dr, aspect='auto', origin='lower', cmap=cmap_r)
            axr.set_xticks([])
            axr.set_yticks([])
            layout['category_row'] = axr
            

        # COLORBAR ----------------------
        if colorbar == True:
            axcb = fig.add_axes([axcb_x, axcb_y, axcb_w, axcb_h], frame_on=False)
            if colorbar_position == 'right':
                orientation = 'vertical'
            else:
                orientation = 'horizontal'
            cb = matplotlib.colorbar.ColorbarBase(axcb, cmap=cmap,
                                              norm=norm, orientation=orientation)
            #axcb.set_title("whatever")
            #max_cb_ticks = 5
            #axcb.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(max_cb_ticks))
            layout['colorbar'] = cb

        #   could be useful
        self.d = {'ordered': self.frame.copy(),  'rorder': idx1, 'corder': idx2} 

        return layout
예제 #17
0
    def _create_report(self, onweb=True):
        # A summary table
        diag = self.report.diagnostics()
        table = HTMLTable(diag, 'summary')
        txt = ''
        for index, row in diag.iterrows():
            if len(row.text) == 0 and len(row.value) == 0:
                txt += '----<br/>'
            else:
                txt += row.text + ": " + str(row.value) + "<br/>"
        self.jinja['summary'] = txt

        print('Creating volcano plots')
        # this can be pretty slow. so keep only 1000 most relevant
        # values and 1000 random ones to get an idea of the distribution
        v = VolcanoANOVA(self.report.df, settings=self.settings)
        v.selector(v.df, 1500, 1500, inplace=True)
        v.volcano_plot_all()
        v.savefig_and_js("volcano_all_js")

        self.jinja['volcano'] = """
            <h3></h3>
            <a href="volcano_all_js.html">
                <img alt="volcano plot for all associations"
                    src="volcano_all_js.png">
            </a>
            <br/>
            <p> A javascript version is available
                <a href="volcano_all_js.html">here</a> (
                or click on the image).</p>
        """

        # MANOVA link
        N = len(self.report.get_significant_set())
        self.jinja['manova'] = """
        There were %(N)s significant associations found.
        All significant associations have been gatherered
        in the following link: <br/><a href="manova.html">manova results</a>.
        """ % {
            'N': N
        }

        # feature summary
        df_features = self.report.feature_summary("feature_summary.png")
        filename = 'OUTPUT' + os.sep + 'features_summary.csv'
        df_features.to_csv(self.directory + os.sep + filename, sep=',')

        # drug summary
        #not_tested = [x for x in self.report.gdsc.drugIds if x not in
        #        self.report.df.DRUG_ID.unique()]
        #if len(not_tested) > 0:
        #    not_tested = """%s drugs were not analysed due to
        #    lack of valid data points: """ % len(not_tested) + \
        #            ", ".join(not_tested)
        #else:
        #    not_tested = ""
        not_tested = ""
        self.jinja['drug_not_tested'] = not_tested

        df_drugs = self.report.drug_summary(filename="drug_summary.png")
        get_name = self.report.drug_decode.get_name
        if len(self.report.drug_decode.df) > 0:
            df_drugs.index = [x + "-" + get_name(x) for x in df_drugs.index]
        filename = 'OUTPUT' + os.sep + 'drugs_summary.csv'
        df_drugs.to_csv(self.directory + os.sep + filename, sep=',')

        # --------------------------- Create table with links to all drugs
        groups = self.report.df.groupby('DRUG_ID')
        try:
            df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values()
        except:
            # note double brackets for pythonn3.3
            df = groups.mean()[['ANOVA_FEATURE_FDR']].sort()
        df = df.reset_index()  # get back the Drug id in the dframe columns

        # let us add also the drug name
        df = self.report.drug_decode.drug_annotations(df)

        # let us also add number of associations computed
        counts = [len(groups.groups[k]) for k in df.DRUG_ID]
        df['Number of associations computed'] = counts
        groups = self.report.get_significant_set().groupby('DRUG_ID').groups
        count = []
        for drug in df['DRUG_ID'].values:
            if drug in groups.keys():
                count.append(len(groups[drug]))
            else:
                count.append(0)
        df['hits'] = count

        # add another set of drug_id but sorted in alpha numerical order
        table = HTMLTable(df, 'drugs')
        table.add_href('DRUG_ID')
        table.df.columns = [
            x.replace('ANOVA_FEATURE_FDR', 'mean FEATURE ANOVA FDR')
            for x in table.df.columns
        ]
        table.add_bgcolor('hits',
                          mode='max',
                          cmap=cmap_builder('white', 'orange', 'red'))

        self.jinja['drug_table'] = table.to_html(escape=False,
                                                 header=True,
                                                 index=False)

        # ---------------------- Create full table with links to all features
        df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()})
        try:
            df.sort_values(by='FEATURE', inplace=True)
        except:
            df.sort('FEATURE', inplace=True)

        groups = self.report.get_significant_set().groupby('FEATURE').groups

        count = []
        for feature in df['FEATURE'].values:
            if feature in groups.keys():
                count.append(len(groups[feature]))
            else:
                count.append(0)
        df['hits'] = count

        table = HTMLTable(df, 'features')
        table.sort('hits', ascending=False)
        table.add_href('FEATURE')
        table.add_bgcolor('hits',
                          mode='max',
                          cmap=cmap_builder('white', 'orange', 'red'))
        self.jinja['feature_table'] = table.to_html(escape=False,
                                                    header=True,
                                                    index=False)

        # -------------------------------------- COSMIC table for completeness
        colnames = self.report.gdsc.features._special_names
        df = self.report.gdsc.features.df[colnames]

        # TODO
        # add other columns if possible e.g., GDSC1, GDSC2, TCGA

        df = df.reset_index()
        table = HTMLTable(df)
        url = "http://cancer.sanger.ac.uk/cell_lines/sample/overview?id="
        table.add_href('COSMIC_ID', url=url, newtab=True)
        self.jinja['cosmic_table'] = table.to_html()

        # -------------------------------------- settings and INPUT files
        input_dir = self.directory + os.sep + 'INPUT'
        filename = 'ANOVA_input.csv'
        filename = os.sep.join([input_dir, filename])
        self.report.gdsc.ic50.to_csv(filename)
        filename = os.sep.join(['INPUT', 'ANOVA_input.csv'])
        self.jinja['ic50_file'] = filename

        # the genomic features, which may be the default version
        # one provided by the user. It may have been changed
        gf_filename = os.sep.join([input_dir, 'genomic_features.csv'])
        self.report.gdsc.features.to_csv(gf_filename)
        html = """Saved <a href="INPUT/genomic_features.csv">Genomic
                  Features</a> file<br/> (possibly the default
                  version)."""
        self.jinja['gf_file'] = html

        # Always save DRUG_DECODE file even if empty
        # It may be be interpreted in other pipeline or for reproducibility
        output_filename = input_dir + os.sep + 'DRUG_DECODE.csv'
        self.report.drug_decode.to_csv(output_filename)
        html = 'Get <a href="INPUT/DRUG_DECODE.csv">Drug DECODE file</a>'
        if len(self.report.drug_decode) == 0:
            html += 'Note that DRUG_DECODE file was not provided (empty?).'
        self.jinja['drug_decode'] = html

        # Save settings as json file
        filename = os.sep.join([input_dir, 'settings.json'])
        self.settings.to_json(filename)
        filename = os.path.basename(filename)
        self.jinja['settings'] = \
                """Get the settings as a <a href="INPUT/%s">
                json file</a>.""" % filename

        # Save all Results dataframe
        filename = os.sep.join(
            [self.settings.directory, 'OUTPUT', 'results.csv'])
        ANOVAResults(self.report.df).to_csv(filename)

        code = """from gdsctools import *
import os

def getfile(filename, where='../INPUT'):
    return os.sep.join([where, filename])

# reback the IC50 and genomic features matrices
gdsc = ANOVA(getfile('%(ic50)s'), getfile('%(gf_filename)s'),
        getfile('DRUG_DECODE.csv'))
gdsc.settings.from_json(getfile('settings.json'))
gdsc.init()

# Analyse the data
results = gdsc.anova_all()

# Create the HTML report
r = ANOVAReport(gdsc, results)
r.create_html_pages(onweb=False)"""
        code = code % {
            'ic50': 'ANOVA_input.csv',
            'gf_filename': 'genomic_features.csv'
        }

        filename = os.sep.join([self.settings.directory, 'code', 'rerun.py'])
        fh = open(filename, 'w')
        fh.write(code)
        fh.close()
예제 #18
0
    def plot(self,
             num=1,
             cmap=None,
             colorbar=True,
             figsize=(12, 8),
             fontsize=None):
        """

        Using as input::

            df = pd.DataFrame({'A':[1,0,1,1],
                               'B':[.9,0.1,.6,1],
                            'C':[.5,.2,0,1],
                            'D':[.5,.2,0,1]})

        .. plot::
            :include-source:
            :width: 80%

            from sequana.viz import heatmap
            df = heatmap.get_heatmap_df()
            h = heatmap.Heatmap(df)
            h.category_row['A'] = 1
            h.category_row['C'] = 1
            h.category_row['D'] = 2
            h.category_row['B'] = 2
            h.plot()


        """
        # save all parameters in a dict
        layout = {}

        if cmap is None:
            cmap = self.params.cmap
        try:
            cmap = colormap.cmap_builder(cmap)
        except:
            pass

        # keep track of row and column names for later.
        header = self.frame.index

        # FIXME something clever for the fontsize
        if len(header) > 100 or len(header) > 100:
            matplotlib.rcParams['font.size'] = 6
        if len(header) > 50 or len(header) > 50:
            matplotlib.rcParams['font.size'] = 7
        if len(header) > 30 or len(header) > 30:
            matplotlib.rcParams['font.size'] = 8
        else:
            matplotlib.rcParams['font.size'] = 12
        if fontsize:
            matplotlib.rcParams['font.size'] = fontsize

        # scaling min/max range

        # Scale the figure window size #
        fig = pylab.figure(num=num, figsize=figsize)
        fig.clf()

        Y = self.linkage(self.frame, self.method, self.metric)

        Z = hierarchy.dendrogram(Y,
                                 orientation='right',
                                 color_threshold=0,
                                 above_threshold_color="k",
                                 distance_sort="descending")
        ind1 = hierarchy.fcluster(Y, 0.7 * max(Y[:, 2]),
                                  self.cluster_criterion)

        # apply the clustering for the array-dendrograms to the actual matrix data
        idx1 = Z['leaves']

        # Rearrange the data frame in the order of the dendogram
        self.frame = self.frame.iloc[idx1, :]
        ticks = pylab.yticks()[0]
        pylab.yticks(ticks, self.frame.index)
        pylab.tight_layout()

        # reorder the flat cluster to match the order of the leaves the dendrogram
        ind1 = ind1[idx1]

        if self.category:
            gca = pylab.gca()
            X, Y = gca.get_position().get_points()
            f = pylab.gcf()
            ax = f.add_axes([X[0], X[1], 0.02, Y[1] - X[1]])

            category = [self.category[x] for x in self.df.index]
            dr = np.array(category, dtype=int)
            dr.shape = (len(category), 1)
            cmap_r = matplotlib.colors.ListedColormap(self.params.side_colors)
            ax.matshow(dr, aspect='auto', origin='lower', cmap=cmap_r)
            ax.set_xticks([])
            ax.set_yticks([])
예제 #19
0
    def add_features(self):
        
        # feature summary
        df_features = self.report.feature_summary("feature_summary.png")
        filename = 'OUTPUT' + os.sep + 'features_summary.csv'
        df_features.to_csv(self.directory + os.sep + filename, sep=',')

        not_tested = ""
        self.jinja['drug_not_tested'] = not_tested

        df_drugs = self.report.drug_summary(filename="drug_summary.png")
        get_name = self.report.drug_decode.get_name
        if len(self.report.drug_decode.df) > 0:
            df_drugs.index = ["{}-{}".format(x, get_name(x)) for x in df_drugs.index]
        filename = 'OUTPUT' + os.sep + 'drugs_summary.csv'
        df_drugs.to_csv(self.directory + os.sep + filename, sep=',')

        if len(self.report.df) == 0:
            return

        # --------------------------- Create table with links to all drugs
        groups = self.report.df.groupby('DRUG_ID')
        try:
            df = groups.mean()['ANOVA_FEATURE_FDR'].sort_values()
        except:
            # note double brackets for pythonn3.3
            df = groups.mean()[['ANOVA_FEATURE_FDR']].sort()

        df = df.reset_index() # get back the Drug id in the dframe columns
        # let us add also the drug name
        df = self.report.drug_decode.drug_annotations(df)

        # let us also add number of associations computed
        counts = [len(groups.groups[k]) for k in df.DRUG_ID]
        df['Number of associations computed'] = counts
        groups = self.report.get_significant_set().groupby('DRUG_ID').groups
        count = []
        for drug in df['DRUG_ID'].values:
            if drug in groups.keys():
                count.append(len(groups[drug]))
            else:
                count.append(0)
        df['hits'] = count

        # add another set of drug_id but sorted in alpha numerical order
        table = HTMLTable(df, 'drugs')
        table.add_href('DRUG_ID', url="associations/drug_", suffix=".html")
        table.df.columns = [x.replace('ANOVA_FEATURE_FDR',
            'mean FEATURE ANOVA FDR') for x in table.df.columns]
        table.add_bgcolor('hits', mode='max',
                cmap=cmap_builder('white', 'orange', 'red'))

        self.jinja['drug_table'] = table.to_html(escape=False,
                header=True, index=False)

        # ---------------------- Create full table with links to all features
        df = pd.DataFrame({'FEATURE': self.report.df['FEATURE'].unique()})
        try:
            df.sort_values(by='FEATURE', inplace=True)
        except:
            df.sort('FEATURE', inplace=True)

        groups = self.report.get_significant_set().groupby('FEATURE').groups

        count = []
        for feature in df['FEATURE'].values:
            if feature in groups.keys():
                count.append(len(groups[feature]))
            else:
                count.append(0)
        df['hits'] = count

        table = HTMLTable(df, 'features')
        table.sort('hits', ascending=False)
        table.add_href('FEATURE', url="associations/", suffix=".html")
        table.add_bgcolor('hits', mode='max',
                cmap=cmap_builder('white', 'orange', 'red'))
        self.jinja['feature_table'] = table.to_html(escape=False,
                header=True, index=False)
예제 #20
0
파일: htmltable.py 프로젝트: damca/reports
    def add_bgcolor(self, colname, cmap='copper', mode='absmax',
            threshold=2):
        """Change column content into HTML paragraph with background color

        :param colname:
        :param cmap: a colormap (matplotlib) or created using
            colormap package (from pypi).
        :param mode: type of normalisation in 'absmax', 'max', 'clip'
            (see details below)
        :param threshold: used if mode is set to 'clip'

        Colormap have values between 0 and 1 so we need to normalised the data
        between 0 and 1. There are 3 mode to normalise the data so far.

        If mode is set to 'absmax', negatives and positives values are
        expected to be found in a range from -inf to inf. Values are
        scaled in between [0,1] X' = (X / M +1) /2. where m is the absolute
        maximum. Ideally a colormap should be made of 3 colors, the first
        color used for negative values, the second for zeros and third color
        for positive values.

        If mode is set to 'clip', values are clipped to a max value (parameter
        *threshold* and values are normalised by that same threshold.

        If mode is set to 'max', values are normalised by the max.

        """
        try:
            # if a cmap is provided, it may be just a known cmap name
            cmap = cmap_builder(cmap)
        except:
            pass

        data = self.df[colname].values

        if len(data) == 0:
            return

        if mode == 'clip':
            data = [min(x, threshold)/float(threshold) for x in data]
        elif mode == 'absmax':
            m = abs(data.min())
            M = abs(data.max())
            M = max([m, M])
            if M != 0:
                data = (data / M + 1)/2.
        elif mode == 'max':
            if data.max() != 0:
                data = data / float(data.max())

        # the expected RGB values for a given data point
        rgbcolors = [cmap(x)[0:3] for x in data]
        hexcolors = [rgb2hex(*x, normalised=True) for x in rgbcolors]

        # need to read original data again
        data = self.df[colname].values
        # need to set precision since this is going to be a text not a number
        # so pandas will not use the precision for those cases:

        def prec(x):
            try:
                # this may fail if for instance x is nan or inf
                x = easydev.precision(x, self.pd_options['precision'])
                return x
            except:
                return x

        data = [prec(x) for x in data]
        html_formatter = '<p style="background-color:{0}">{1}</p>'
        self.df[colname] = [html_formatter.format(x, y)
                for x, y in zip(hexcolors, data)]
예제 #21
0
    def plot(self,
             fig=None,
             grid=True,
             rotation=30,
             lower=None,
             upper=None,
             shrink=0.9,
             axisbg='white',
             colorbar=True,
             label_color='black',
             fontsize='small',
             edgecolor='black',
             method='ellipse',
             order_method='complete',
             order_metric='euclidean',
             cmap=None,
             ax=None,
             binarise_color=False):
        """plot the correlation matrix from the content of :attr:`df`
        (dataframe)

        By default, the correlation is shown on the upper and lower triangle and is
        symmetric wrt to the diagonal. The symbols are ellipses. The symbols can
        be changed to e.g. rectangle. The symbols are shown on upper and lower sides but
        you could choose a symbol for the upper side and another for the lower side using
        the **lower** and **upper** parameters.

        :param fig: Create a new figure by default. If an instance of an existing
            figure is provided, the corrplot is overlayed on the figure provided.
            Can also be the number of the figure.
        :param grid: add grid (Defaults to grey color). You can set it to False or a color.
        :param rotation: rotate labels on y-axis
        :param lower: if set to a valid method, plots the data on the lower
            left triangle
        :param upper: if set to a valid method, plots the data on the upper
            left triangle
        :param float shrink: maximum space used (in percent) by a symbol.
            If negative values are provided, the absolute value is taken.
            If greater than 1, the symbols wiill overlap.
        :param axisbg: color of the background (defaults to white).
        :param colorbar: add the colorbar (defaults to True).
        :param str label_color: (defaults to black).
        :param fontsize: size of the fonts defaults to 'small'.
        :param method: shape to be used in 'ellipse', 'square', 'rectangle',
            'color', 'text', 'circle',  'number', 'pie'.

        :param order_method: see :meth:`order`.
        :param order_metric: see : meth:`order`.
        :param cmap: a valid cmap from matplotlib or colormap package (e.g.,
            'jet', or 'copper'). Default is red/white/blue colors.
        :param ax: a matplotlib axes.

        The colorbar can be tuned with the parameters stored in :attr:`params`.

        Here is an example. See notebook for other examples::

            c = corrplot.Corrplot(dataframe)
            c.plot(cmap=('Orange', 'white', 'green'))
            c.plot(method='circle')
            c.plot(colorbar=False, shrink=.8, upper='circle'  )

        """
        # default
        if cmap != None:
            try:
                if isinstance(cmap, str):
                    self.cm = cmap_builder(cmap)
                else:
                    self.cm = cmap_builder(*cmap)
            except:
                print("incorrect cmap. Use default one")
                self._set_default_cmap()
        else:
            self._set_default_cmap()

        self.shrink = abs(shrink)
        self.fontsize = fontsize
        self.edgecolor = edgecolor

        df = self.order(method=order_method, metric=order_metric)

        # figure can be a number or an instance; otherwise creates it
        if isinstance(fig, int):
            fig = plt.figure(num=fig, facecolor=axisbg)
        elif fig is not None:
            fig = plt.figure(num=fig.number, facecolor=axisbg)
        else:
            fig = plt.figure(num=None, facecolor=axisbg)

        # do we have an axes to plot the data in ?
        if ax is None:
            ax = plt.subplot(1, 1, 1, aspect='equal', axisbg=axisbg)
        else:
            # if so, clear the axes. Colorbar cannot be removed easily.
            plt.sca(ax)
            ax.clear()

        # subplot resets the bg color, let us set it again
        fig.set_facecolor(axisbg)

        width, height = df.shape
        labels = (df.columns)

        # add all patches to the figure
        # TODO check value of lower and upper

        if upper is None and lower is None:
            mode = 'method'
            diagonal = True
        elif upper and lower:
            mode = 'both'
            diagonal = False
        elif lower is not None:
            mode = 'lower'
            diagonal = True
        elif upper is not None:
            mode = 'upper'
            diagonal = True

        self.binarise_color = binarise_color
        if mode == 'upper':
            self._add_patches(df, upper, 'upper', ax, diagonal=True)
        elif mode == 'lower':
            self._add_patches(df, lower, 'lower', ax, diagonal=True)
        elif mode == 'method':
            self._add_patches(df, method, 'both', ax, diagonal=True)
        elif mode == 'both':
            self._add_patches(df, upper, 'upper', ax, diagonal=False)
            self._add_patches(df, lower, 'lower', ax, diagonal=False)

        # shift the limits to englobe the patches correctly
        ax.set_xlim(-0.5, width - .5)
        ax.set_ylim(-0.5, height - .5)

        # set xticks/xlabels on top
        ax.xaxis.tick_top()
        xtickslocs = np.arange(len(labels))
        ax.set_xticks(xtickslocs)
        ax.set_xticklabels(labels,
                           rotation=rotation,
                           color=label_color,
                           fontsize=fontsize,
                           ha='left')

        ax.invert_yaxis()
        ytickslocs = np.arange(len(labels))
        ax.set_yticks(ytickslocs)
        ax.set_yticklabels(labels, fontsize=fontsize, color=label_color)
        plt.tight_layout()

        if grid is not False:
            if grid is True:
                grid = 'grey'
            for i in range(0, width):
                ratio1 = float(i) / width
                ratio2 = float(i + 2) / width
                # TODO 1- set axis off
                # 2 - set xlabels along the diagonal
                # set colorbar either on left or bottom
                if mode == 'lower':
                    plt.axvline(i + .5, ymin=1 - ratio1, ymax=0., color=grid)
                    plt.axhline(i + .5, xmin=0, xmax=ratio2, color=grid)
                if mode == 'upper':
                    plt.axvline(i + .5, ymin=1 - ratio2, ymax=1, color=grid)
                    plt.axhline(i + .5, xmin=ratio1, xmax=1, color=grid)
                if mode in ['method', 'both']:
                    plt.axvline(i + .5, color=grid)
                    plt.axhline(i + .5, color=grid)

            # can probably be simplified
            if mode == 'lower':
                plt.axvline(-.5, ymin=0, ymax=1, color='grey')
                plt.axvline(width - .5,
                            ymin=0,
                            ymax=1. / width,
                            color='grey',
                            lw=2)
                plt.axhline(width - .5, xmin=0, xmax=1, color='grey', lw=2)
                plt.axhline(-.5, xmin=0, xmax=1. / width, color='grey', lw=2)
                plt.xticks([])
                for i in range(0, width):
                    plt.text(i,
                             i - .6,
                             labels[i],
                             fontsize=fontsize,
                             color=label_color,
                             rotation=rotation,
                             verticalalignment='bottom')
                    plt.text(-.6,
                             i,
                             labels[i],
                             fontsize=fontsize,
                             color=label_color,
                             rotation=0,
                             horizontalalignment='right')
                plt.axis('off')
            # can probably be simplified
            elif mode == 'upper':
                plt.axvline(width - .5, ymin=0, ymax=1, color='grey', lw=2)
                plt.axvline(-.5,
                            ymin=1 - 1. / width,
                            ymax=1,
                            color='grey',
                            lw=2)
                plt.axhline(-.5, xmin=0, xmax=1, color='grey', lw=2)
                plt.axhline(width - .5,
                            xmin=1 - 1. / width,
                            xmax=1,
                            color='grey',
                            lw=2)
                plt.yticks([])
                for i in range(0, width):
                    plt.text(-.6 + i,
                             i,
                             labels[i],
                             fontsize=fontsize,
                             color=label_color,
                             horizontalalignment='right',
                             rotation=0)
                    plt.text(i,
                             -.5,
                             labels[i],
                             fontsize=fontsize,
                             color=label_color,
                             rotation=rotation,
                             verticalalignment='bottom')
                plt.axis('off')

        # set all ticks length to zero
        ax = plt.gca()
        ax.tick_params(axis='both', which='both', length=0)

        if colorbar:
            from mpl_toolkits.axes_grid1 import make_axes_locatable
            divider = make_axes_locatable(ax)
            cax = divider.append_axes("right", size="5%", pad=0.00)
            N = self.params['colorbar.N'] + 1
            assert N >= 2
            cb = plt.gcf().colorbar(
                self.collection,
                cax=cax,
                orientation=self.params['colorbar.orientation'],
                # shrink=self.params['colorbar.shrink'],
                boundaries=np.linspace(0, 1, N),
                ticks=[0, .25, 0.5, 0.75, 1])
            cb.ax.set_yticklabels([-1, -.5, 0, .5, 1])
            cb.set_clim(
                0, 1
            )  # make sure it goes from -1 to 1 even though actual values may not reach that range
예제 #22
0
파일: corrplot.py 프로젝트: cokelaer/biokit
    def plot(self, num=1, grid=True,
            rotation=30, colorbar_width=10, lower=None, upper=None,
            shrink=0.9, axisbg='white', colorbar=True, label_color='black',
            fontsize='small', edgecolor='black', method='ellipse', order=None,
            cmap=None
            ):
        """plot the correlation matrix from the content of :attr:`df`
        (dataframe)

        :param grid: add grid (Defaults to True)
        :param rotation: rotate labels on y-axis
        :param lower: if set to a valid method, plots the data on the lower
            left triangle
        :param upper: if set to a valid method, plots the data on the upper
            left triangle
        :param method: shape to be used in 'ellipse', 'square', 'rectangle', 
            'color', 'text', 'circle',  'number', 'pie'.
        :param cmap: a valid cmap from matplotlib of colormap package (e.g.,
        jet, or 

        Here are some examples provided that the data is created and pass to c::

            c = corrplot.Corrplor(dataframe)
            c.plot(cmap=('Orange', 'white', 'green'))
            c.plot(method='circle')
            c.plot(colorbar=False, shrink=.8, upper='circle'  )


        """

        # default
        if cmap != None:
            try:
                if isinstance(cmap, str):
                    self.cm = cmap_builder(cmap)
                else:
                    self.cm = cmap_builder(*cmap)
            except:
                print("incorrect cmap. Use default one")
                self._set_default_cmap()
        else:
            self._set_default_cmap()

        self.shrink = shrink
        self.fontsize = fontsize
        self.edgecolor = edgecolor

        if order == 'hclust':
            df = self.order(method='hclust')
        else:
            df = self.df

        plt.clf()
        fig = plt.figure(num=num, facecolor=axisbg)

        ax = plt.subplot(1, 1, 1, aspect='equal', axisbg=axisbg)
        # subplot resets the bg color, let us set it again
        fig.set_facecolor(axisbg)

        width, height = df.shape
        labels = (df.columns)

        # add all patches to the figure
        # TODO check value of lower and upper

        if upper is None and lower is None:
            mode = 'method'
            diagonal = True
        elif upper and lower:
            mode = 'both'
            diagonal = False
        elif lower is not None:
            mode = 'lower'
            diagonal = True
        elif upper is not None:
            mode = 'upper'
            diagonal = True
        else:
            raise ValueError

        if mode == 'upper':
            self._add_patches(df, upper, 'upper',  ax, diagonal=True)
        elif mode == 'lower':
            self._add_patches(df, lower, 'lower',  ax, diagonal=True)
        elif mode == 'method':
            self._add_patches(df, method, 'both',  ax, diagonal=True)
        elif mode == 'both':
            self._add_patches(df, upper, 'upper',  ax, diagonal=False)
            self._add_patches(df, lower, 'lower',  ax, diagonal=False)

        # shift the limits to englobe the patches correctly
        ax.set_xlim(-0.5, width-.5)
        ax.set_ylim(-0.5, height-.5)

        # set xticks/xlabels on top
        ax.xaxis.tick_top()
        xtickslocs = np.arange(len(labels))
        ax.set_xticks(xtickslocs)
        ax.set_xticklabels(labels, rotation=rotation, color=label_color,
                fontsize=fontsize, ha='left')

        ax.invert_yaxis()
        ytickslocs = np.arange(len(labels))
        ax.set_yticks(ytickslocs)
        ax.set_yticklabels(labels, fontsize=fontsize, color=label_color)
        plt.tight_layout()

        if grid is True:
            for i in range(0, width):
                ratio1 = float(i)/width
                ratio2 = float(i+2)/width
                # TODO 1- set axis off
                # 2 - set xlabels along the diagonal
                # set colorbar either on left or bottom
                if mode == 'lower':
                    plt.axvline(i+.5, ymin=1-ratio1, ymax=0., color='grey')
                    plt.axhline(i+.5, xmin=0, xmax=ratio2, color='grey')
                if mode == 'upper':
                    plt.axvline(i+.5, ymin=1 - ratio2, ymax=1, color='grey')
                    plt.axhline(i+.5, xmin=ratio1, xmax=1, color='grey')
                if mode in ['method', 'both']:
                    plt.axvline(i+.5, color='grey')
                    plt.axhline(i+.5, color='grey')

            # can probably be simplified
            if mode == 'lower':
                plt.axvline(-.5, ymin=0, ymax=1, color='grey')
                plt.axvline(width-.5, ymin=0, ymax=1./width, color='grey', lw=2)
                plt.axhline(width-.5, xmin=0, xmax=1, color='grey',lw=2)
                plt.axhline(-.5, xmin=0, xmax=1./width, color='grey',lw=2)
                plt.xticks([])
                for i in range(0, width):
                    plt.text(i, i-.6 ,labels[i],fontsize=fontsize,
                            color=label_color,
                            rotation=rotation, verticalalignment='bottom')
                    plt.text(-.6, i ,labels[i],fontsize=fontsize,
                            color=label_color,
                            rotation=0, horizontalalignment='right')
                plt.axis('off')
            # can probably be simplified
            elif mode == 'upper':
                plt.axvline(width-.5, ymin=0, ymax=1, color='grey', lw=2)
                plt.axvline(-.5, ymin=1-1./width, ymax=1, color='grey', lw=2)
                plt.axhline(-.5, xmin=0, xmax=1, color='grey',lw=2)
                plt.axhline(width-.5, xmin=1-1./width, xmax=1, color='grey',lw=2)
                plt.yticks([])
                for i in range(0, width):
                    plt.text(-.6+i, i ,labels[i],fontsize=fontsize,
                            color=label_color, horizontalalignment='right',
                            rotation=0)
                    plt.text(i, -.5 ,labels[i],fontsize=fontsize,
                            color=label_color, rotation=rotation, verticalalignment='bottom')
                plt.axis('off')

        # set all ticks length to zero
        ax = plt.gca()
        ax.tick_params(axis='both',which='both', length=0)

        if colorbar:
            N = self.params['colorbar.N']
            cb = plt.gcf().colorbar(self.collection,
                    orientation=self.params['colorbar.orientation'], shrink=.9,
                boundaries= np.linspace(0,1,N), ticks=[0,.25, 0.5, 0.75,1])
            cb.ax.set_yticklabels([-1,-.5,0,.5,1])
            cb.set_clim(0,1) # make sure it goes from -1 to 1 even though actual values may not reach that range