예제 #1
0
def create_mask(data, use_sig=True, p_value=0.1, fold_change_cutoff=1.5):
    """ Creates a mask for volcano plots.


    # Visual example of volcano plot
    # section 0 are significant criteria

    #    0    #    1   #      0     #
    #         #        #            #
    #################################
    #         #        #            #
    #    2    #    2   #      2     #
    #         #        #            #
    #################################


    Parameters
    ----------
    data : pd.DataFrame
    use_sig : bool
    p_value : float
        p_value threshold
    fold_change_cutoff : float
        fold change threshold

    Returns
    -------

    """
    # copy of data
    tmp = data.loc[:, (p_val, fold_change, flag)].copy()

    # convert to log10 scale
    tmp[p_val] = np.log10(data[p_val]) * -1

    # convert to log2 space
    tmp = log2_normalize_df(tmp, column=fold_change)

    if use_sig:
        sec_0 = tmp[tmp[flag]]
        sec_2 = tmp[~tmp[flag]]
        sec_1 = None
    else:
        fc = np.log2(fold_change_cutoff)
        p_value = -1 * np.log10(p_value)
        criteria_1 = tmp[p_val] >= p_value
        sec_0 = tmp[criteria_1 & (np.abs(tmp[fold_change]) >= fc)]
        sec_1 = tmp[criteria_1 & (np.abs(tmp[fold_change]) < fc)]
        sec_2 = tmp[(tmp[p_val] < p_value)]
    return sec_0, sec_1, sec_2
예제 #2
0
    def create_histogram_measurements(self,
                                      exp_data_type,
                                      save_name=None,
                                      y_range=None,
                                      out_dir=None):
        """
        Plots a histogram of data

        Parameters
        ----------
        exp_data_type: str
            Which data to plot
        save_name: str
            Name of figure
        out_dir: str, path
            Path to location to save figure
        y_range: array_like
            range of data


        Returns
        -------

        """

        if not self._check_experiment_type_existence(exp_type=exp_data_type):
            return
        data = self.data[self.data[exp_method] == exp_data_type].copy()
        data = data.dropna(subset=[p_val])
        data = data[np.isfinite(data[fold_change])]
        data = data.dropna(subset=[fold_change])

        tmp = np.array(log2_normalize_df(data, fold_change)[fold_change])

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.hist(tmp, 50, color='gray')
        if y_range is not None:
            plt.xlim(y_range[0], y_range[1])

        ax.set_yscale('log', basey=10)
        ax.set_xlabel('log$_2$ Fold Change', fontsize=16)
        ax.set_ylabel('Count', fontsize=16)
        if save_name is not None:
            v_plot.save_plot(fig, save_name, out_dir)
        return fig
예제 #3
0
    def create_histogram_measurements(self, save_name=None,
                                      y_range=None, out_dir=None):
        """
        Plots a histogram of data

        Parameters
        ----------
        save_name: str
            Name of figure
        out_dir: str, path
            Path to location to save figure
        y_range: array_like
            range of data


        Returns
        -------

        """
        data = self.copy()
        data = data.dropna(subset=[p_val])
        data = data[np.isfinite(data[fold_change])]
        data = data.dropna(subset=[fold_change])

        tmp = np.array(log2_normalize_df(data, fold_change)[fold_change])

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.hist(tmp, 50, color='gray')
        if y_range is not None:
            plt.xlim(y_range[0], y_range[1])

        ax.set_yscale('log', basey=10)
        ax.set_xlabel('log$_2$ Fold Change', fontsize=16)
        ax.set_ylabel('Count', fontsize=16)
        fig.tight_layout()
        if save_name is not None:
            v_plot.save_plot(fig, save_name, out_dir)
        return fig
예제 #4
0
    def plot_histogram(self, save_name=None, y_range=None, out_dir=None):
        """
        Plots a histogram of data

        Parameters
        ----------
        save_name: str
            Name of figure
        out_dir: str, path
            Path to location to save figure
        y_range: array_like
            range of data


        Returns
        -------

        """
        data = self.copy()
        data = data.dropna(subset=[p_val])
        data = data[np.isfinite(data[fold_change])]
        data = data.dropna(subset=[fold_change])

        tmp = np.array(log2_normalize_df(data, fold_change)[fold_change])

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.hist(tmp, 50, color='gray')
        if y_range is not None:
            plt.xlim(y_range[0], y_range[1])

        ax.set_yscale('log', basey=10)
        ax.set_xlabel('log$_2$ Fold Change', fontsize=16)
        ax.set_ylabel('Count', fontsize=16)
        fig.tight_layout()
        if save_name is not None:
            v_plot.save_plot(fig, save_name, out_dir)
        return fig
예제 #5
0
def plot_species(df, species_list=None, save_name='test', out_dir=None,
                 title=None, plot_type='plotly', image_format='pdf',
                 close_plots=False):
    """

    Parameters
    ----------
    df: pandas.DataFrame
        magine formatted dataframe
    species_list: list
        List of genes to be plotter
    save_name: str
        Filename to be saved as
    out_dir: str
        Path for output to be saved
    title: str
        Title of plot, useful when list of genes corresponds to a GO term
    plot_type : str
        Use plotly to generate html output or matplotlib to generate pdf
    image_format : str
        pdf or png, only used if plot_type="matplotlib"
    close_plots : bool
        Close plot after making, use when creating lots of plots in parallel.
        
    Returns
    -------

    """

    ldf = df.copy()

    if out_dir is not None:
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)

    # gather x axis points
    x_points = sorted(ldf[sample_id].unique())
    if len(x_points) == 0:
        return
    if isinstance(x_points[0], np.float):
        x_point_dict = {i: x_points[n] for n, i
                        in enumerate(x_points)}
    else:
        x_point_dict = {i: n for n, i
                        in enumerate(x_points)}
    if species_list is not None:
        ldf = ldf.loc[ldf[identifier].isin(species_list)].copy()

    ldf = log2_normalize_df(ldf, column=fold_change)

    n_plots = len(ldf[identifier].unique())
    num_colors = len(ldf[label_col].unique())
    color_list = sns.color_palette("tab20", num_colors)
    if plot_type == 'matplotlib':
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.set_prop_cycle(plt.cycler('color', color_list))

    colors = enumerate(color_list)

    plotly = []
    names_list = []
    total_counter = 0
    for name, j in ldf.groupby(identifier):
        index_counter = 0
        for n, m in j.groupby(label_col):

            x = np.array(m[sample_id])
            if len(x) < 1:
                continue
            y = np.array(m['fold_change'])
            sig_flag = np.array(m[flag])
            index = np.argsort(x)
            x = x[index]
            y = y[index]
            s_flag = sig_flag[index]

            # x values with scaled values (only changes things if non-float
            # values are used for sample_id
            x_index = np.array([x_point_dict[ind] for ind in x])

            index_counter += 1
            total_counter += 1

            # create matplotlib plot
            if plot_type == 'matplotlib':
                label = "\n".join(wrap(n, 40))
                p = ax.plot(x_index, y, '.-', label=label)
                if len(s_flag) != 0:
                    color = p[0].get_color()
                    ax.plot(x_index[s_flag], y[s_flag], '^', color=color)

            # create plotly plot
            elif plot_type == 'plotly':
                c = next(colors)[1]
                plotly.append(_ploty_graph(x_index, y, n, n, c))
                if len(s_flag) != 0:
                    index_counter += 1
                    total_counter += 1
                    plotly.append(_ploty_graph(x_index[s_flag], y[s_flag],
                                               n, n, c, marker='x-open-dot'))
        names_list.append([name, index_counter])
    if plot_type == 'matplotlib':
        lgd = _format_mpl(ax, x_point_dict, x_points)
        if save_name is not None:
            tmp_savename = "{}.{}".format(save_name, image_format)
            if out_dir is not None:
                tmp_savename = os.path.join(out_dir, tmp_savename)
            plt.savefig(tmp_savename, bbox_extra_artists=(lgd,),
                        bbox_inches='tight')

        if close_plots:
            plt.close(fig)
        else:
            return fig

    elif plot_type == 'plotly':
        fig = _create_plotly(total_counter, n_plots, names_list, x_point_dict,
                             title, x_points, plotly)
        if save_name:
            _save_ploty_output(fig, out_dir, save_name)
        else:
            init_notebook_mode(connected=True)
            iplot(fig)
예제 #6
0
def plot_species(df, species_list=None, save_name='test', out_dir=None,
                 title=None, plot_type='plotly', image_format='pdf',
                 close_plots=False):
    """

    Parameters
    ----------
    df: pandas.DataFrame
        magine formatted dataframe
    species_list: list
        List of genes to be plotter
    save_name: str
        Filename to be saved as
    out_dir: str
        Path for output to be saved
    title: str
        Title of plot, useful when list of genes corresponds to a GO term
    plot_type : str
        Use plotly to generate html output or matplotlib to generate pdf
    image_format : str
        pdf or png, only used if plot_type="matplotlib"
    close_plots : bool
        Close plot after making, use when creating lots of plots in parallel.
    Returns
    -------

    """

    ldf = df.copy()

    if out_dir is not None:
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)

    # gather x axis points
    x_points = sorted(ldf[sample_id].unique())
    if len(x_points) == 0:
        return
    if isinstance(x_points[0], np.float):
        x_point_dict = {i: x_points[n] for n, i
                        in enumerate(x_points)}
    else:
        x_point_dict = {i: n for n, i
                        in enumerate(x_points)}
    if species_list is not None:
        ldf = ldf.loc[ldf[identifier].isin(species_list)].copy()

    ldf = log2_normalize_df(ldf, column=fold_change)

    n_plots = len(ldf[identifier].unique())
    num_colors = len(ldf[label_col].unique())
    color_list = sns.color_palette("tab20", num_colors)
    if plot_type == 'matplotlib':
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.set_prop_cycle(plt.cycler('color', color_list))

    colors = enumerate(color_list)

    plotly = []
    names_list = []
    total_counter = 0
    for name, j in ldf.groupby(identifier):
        index_counter = 0
        for n, m in j.groupby(label_col):

            x = np.array(m[sample_id])
            if len(x) < 1:
                continue
            y = np.array(m['fold_change'])
            sig_flag = np.array(m[flag])
            index = np.argsort(x)
            x = x[index]
            y = y[index]
            s_flag = sig_flag[index]

            # x values with scaled values (only changes things if non-float
            # values are used for sample_id
            x_index = np.array([x_point_dict[ind] for ind in x])

            index_counter += 1
            total_counter += 1

            # create matplotlib plot
            if plot_type == 'matplotlib':
                label = "\n".join(wrap(n, 40))
                p = ax.plot(x_index, y, '.-', label=label)
                if len(s_flag) != 0:
                    color = p[0].get_color()
                    ax.plot(x_index[s_flag], y[s_flag], '^', color=color)

            # create plotly plot
            elif plot_type == 'plotly':
                c = next(colors)[1]
                plotly.append(_ploty_graph(x_index, y, n, n, c))
                if len(s_flag) != 0:
                    index_counter += 1
                    total_counter += 1
                    plotly.append(_ploty_graph(x_index[s_flag], y[s_flag],
                                               n, n, c, marker='x-open-dot'))
        names_list.append([name, index_counter])
    if plot_type == 'matplotlib':
        lgd = _format_mpl(ax, x_point_dict, x_points)
        if save_name is not None:
            tmp_savename = "{}.{}".format(save_name, image_format)
            if out_dir is not None:
                tmp_savename = os.path.join(out_dir, tmp_savename)
            plt.savefig(tmp_savename, bbox_extra_artists=(lgd,),
                        bbox_inches='tight')

        if close_plots:
            plt.close(fig)
        else:
            return fig

    elif plot_type == 'plotly':
        fig = _create_plotly(total_counter, n_plots, names_list, x_point_dict,
                             title, x_points, plotly)
        if save_name:
            _save_ploty_output(fig, out_dir, save_name)
        else:
            init_notebook_mode(connected=True)
            iplot(fig)