Python plot_all 예제들, ezplotly.plot_all Python 예제들

예제 #1

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def qqplot(
    data: Sequence[float],
    distr: str = "norm",
    sparams: List[Any] = (),
    title: Optional[str] = None,
    name: Optional[str] = None,
    marker_color: str = "blue",
    line_color: str = "red",
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Optional[Tuple[EZPlotlyPlot, EZPlotlyPlot]]:
    """

    Make qq-plot of data with respect to a hypothesis distribution.

    :param data: The data to use as `Sequence[float]`
    :param distr: The hypothetical null distribution as `str`
    :param sparams: Distributional params as `List[Any]`
    :param title: The title of the plot as `Optional[str]`
    :param name: The name of the series as `Optional[str]
    :param marker_color: The color of a marker as `Optional[str]`
    :param line_color: The color of the line as `str`
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: If specified, save to this file as `Optional[str]`
    :return:
        `Optional[Tuple[EZPlotlyPlot, EZPlotlyPlot]]` of scatter plot and line plot representing qqplot
    """
    qq = sc.probplot(data, dist=distr, sparams=sparams)
    x = np.array([qq[0][0][0], qq[0][0][-1]])
    pts_scatter = ep.scattergl(
        x=qq[0][0],
        y=qq[0][1],
        title=title,
        xlabel="Expected",
        ylabel="Observed",
        marker_size=5,
        marker_color=marker_color,
        name=name,
    )
    if name is None:
        name = ""
    line_plot = ep.line(
        x=x,
        y=qq[1][1] + qq[1][0] * x,
        width=3,
        color=line_color,
        title=title,
        name=(name + " (distribution=" + distr + ")"),
    )

    # make plot (or withhold)
    if withhold:
        return pts_scatter, line_plot
    else:
        ep.plot_all(plots=[pts_scatter, line_plot],
                    panels=[1, 1],
                    outfile=outfile)

예제 #2

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def chr_distr(
    data: pd.DataFrame,
    chr_df: pd.DataFrame,
    distr_name: str,
    title: Optional[str] = None,
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Optional[EZPlotlyPlot]:
    """

    Makes a p-value plot testing whether data for each chromosome follows a particular distribution.
    Plots a bar, one for each chr, for the associated p-value of fit to the specified distribution.

    :param data: The data in a `pd.DataFrame`
    :param chr_df: Chrs specified in a `pd.DataFrame`
    :param distr_name: The distribution name as `str`
    :param title: The title of the plot as `Optional[str]`
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: The file to write plot to as `Optional[str]`
    :return:
        `Optional[EZPlotlyPlot]` representing plot
    """

    # extract unique chr vals
    uniq_vals = chr_df.unique()

    # make chromosome-level counts
    pvals = np.zeros((len(uniq_vals), ))
    cnt = 0
    for u in uniq_vals:
        chr_dat = data[chr_df == u]
        res = sc.kstest(chr_dat, distr_name)
        pvals[cnt] = res[1]
        cnt = cnt + 1

    # plot
    bar_plot = ep.bar(
        pvals,
        x=uniq_vals,
        title=title,
        xlabel="Chromosome",
        ylabel="P-value (Null: Data is " + distr_name + ")",
        ylim=[0, 1],
    )
    if withhold:
        return bar_plot
    else:
        ep.plot_all([bar_plot], outfile=outfile)

예제 #3

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def chr_rolling_median(
    chr_pos_df: pd.DataFrame,
    chr_val_df: pd.DataFrame,
    rollwinsize: int,
    ylabel: Optional[str] = None,
    title: Optional[str] = None,
    xlim: Optional[List[int]] = None,
    ylim: Optional[List[int]] = None,
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Optional[EZPlotlyPlot]:
    """
    Makes a chromosome rolling median plot.

    :param chr_pos_df: Chromosome positions in a dataframe as `pd.DataFrame`
    :param chr_val_df: Chromosome values in a dataframe as `pd.DataFrame`
    :param rollwinsize: The size of the rolling window as `int`
    :param ylabel: The y-axis label of the plot as `Optional[str]`
    :param title: The title of the plot as `Optional[str]`
    :param xlim: The x-axis limits [x_left_lim, x_right_lim] as `Optional[List[float]]`
    :param ylim: The y-axis limits [y_left_lim, y_right_lim] as `Optional[List[float]]`
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: The file to write plot to as `Optional[str]`
    :return:
        `Optional[EZPlotlyPlot]` representing rolling median plot
    """
    # prepare plot
    x = chr_pos_df.values
    y = chr_val_df.rolling(rollwinsize).median()
    line_plot = ep.line(
        x=x,
        y=y,
        title=title,
        xlabel="Chr Position",
        ylabel=ylabel,
        xlim=xlim,
        ylim=ylim,
    )

    # withhold (return) or plot
    if withhold:
        return line_plot
    else:
        ep.plot_all([line_plot], outfile=outfile)

예제 #4

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def chr_count(
    bool_vals: pd.DataFrame,
    chr_df: pd.DataFrame,
    title: Optional[str] = None,
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Optional[EZPlotlyPlot]:
    """

    Makes a chr bar plot, 1 bar for each chr specified tabulating a dataframe of booleans.

    :param bool_vals: Dataframe of booleans as `pd.DataFrame`
    :param chr_df: Chrs specified in a `pd.DataFrame`
    :param title: The title of the plot as `Optional[str]`
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: The file to write plot to as `Optional[str]`
    :return:
        `Optional[EZPlotlyPlot]` representing chr bar plot.

    """
    # extract unique chr vals
    uniq_vals = chr_df.unique()

    # make chromosome-level counts
    counts = np.zeros((len(uniq_vals), ))
    cnt = 0
    for u in uniq_vals:
        counts[cnt] = np.sum(bool_vals[chr_df == u])
        cnt = cnt + 1

    # plot
    bar_plot = ep.bar(counts,
                      x=uniq_vals,
                      title=title,
                      xlabel="Chromosome",
                      ylabel="Count")
    if withhold:
        return bar_plot
    else:
        ep.plot_all([bar_plot], outfile=outfile)

예제 #5

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def manhattan_plot(
    df: pd.DataFrame,
    title: Optional[str] = None,
    height: Optional[int] = None,
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Optional[List[EZPlotlyPlot]]:
    """
    Plot a manhattan plot of p-values from a pandas dataframe containing chromosome position p-values.
    Makes a plot separately for each chromosome specified in dataframe.

    :param df: A `pd.DataFrame` containing columns (chr, pos, pval).
    :param title: The title of the overall plot as `Optional[str]`
    :param height: The height of the plot as `Optional[int]`
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: The file to write plot to as `Optional[str]`
    :return:
        figs_list: `Optional[List[EZPlotlyPlot]]` of figures containing produced EZPlotlyPlot set
    """

    # prepare manhattan plots per chromosome
    running_pos = 0
    cnt = 0
    figs_list = list()
    panels = [1 for _ in range(1, 25)]
    for chrs in range(1, 25):
        if chrs <= 22:
            chr_name = "chr" + str(chrs)
        elif chrs == 23:
            chr_name = "chrX"
        elif chrs == 24:
            chr_name = "chrY"
        else:
            raise ValueError("Invalid state.")
        chr_df = df[df.chr == chr_name]
        if len(chr_df) == 0:
            continue
        max_pos = np.max(chr_df["pos"].values)
        pts_x = chr_df["pos"].values + running_pos
        pts_y = -1 * np.log10(chr_df["pval"].values)
        figs_list.append(
            ep.scattergl(
                pts_x,
                pts_y,
                name=chr_name,
                ylabel="-log10(p)",
                title=title,
                marker_size=3,
            ))
        cnt = cnt + len(chr_df)
        running_pos = running_pos + max_pos

    # withhold (return) or plot
    if withhold:
        return figs_list
    else:
        ep.plot_all(
            figs_list,
            panels=panels,
            numcols=1,
            showlegend=True,
            height=height,
            outfile=outfile,
        )

예제 #6

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def nonparametric_ci(
    x: np.array,
    y_data: np.array,
    conf: float = 0.95,
    ci_plot_type: str = "line",
    color: Optional[str] = None,
    xlabel: Optional[str] = None,
    ylabel: Optional[str] = None,
    title: Optional[str] = None,
    name: Optional[str] = None,
    xscale: Optional[str] = None,
    yscale: Optional[str] = None,
    x_dtick: Optional[float] = None,
    y_dtick: Optional[float] = None,
    xlim: Optional[List[float]] = None,
    ylim: Optional[List[float]] = None,
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Union[List[EZPlotlyPlot], EZPlotlyPlot]:
    """
    Plots a data plot with non-parametric (i.e. N%) confidence intervals.

    :param x: x data series as `np.array[float]`
    :param y_data: y data series as `np.array[float]`
    :param conf: The % confidence interval as `float`
    :param ci_plot_type: The confidence interval plot type as `str` (either line or point)
    :param color: The line or marker color as `str`
    :param xlabel: X-axis label as `Optional[str]`
    :param ylabel: Y-axis label as `Optional[str]`
    :param title: Plot title as `Optional[str]`
    :param name: The name of the scatter plot as `Optional[str]` (useful for plotting series)
    :param xscale: The scale of the x-axis ('log', 'linear') as `Optional[str]`
    :param yscale: The scale of the y-axis ('log', 'linear') as `Optional[str]`
    :param x_dtick: The plotting delta tick (i.e. tick length) of the x-axis as `Optional[float]`
    :param y_dtick: The plotting delta tick (i.e. tick length) of the y-axis as `Optional[float]`
    :param xlim: The x-axis limits [x_left_lim, x_right_lim] as `Optional[List[float]]`
    :param ylim: The y-axis limits [y_left_lim, y_right_lim] as `Optional[List[float]]`
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: If specified, save to this file as `Optional[str]`
    :return:
        `Union[List[EZPlotlyPlot], EZPlotlyPlot]` representing plots
    """
    # compute confidence intervals
    m, ll, ul = yr.nonparametric_ci(data=y_data, conf=conf)

    # generate plots
    if ci_plot_type == "line":

        # get name
        if name is None:
            ci_name = " (CI=" + str(conf * 100.0) + "%" + ")"
        else:
            ci_name = name + " (CI=" + str(conf * 100.0) + "%" + ")"

        # make line EZPlotlyPlots
        ll_pl = ep.line(x=x, y=ll, color=color, dash="dot", name=ci_name)
        ul_pl = ep.line(x=x, y=ul, color=color, dash="dot", name=ci_name)
        mean_pl = ep.line(
            x=x,
            y=m,
            color=color,
            name=name,
            xlabel=xlabel,
            ylabel=ylabel,
            title=title,
            xscale=xscale,
            yscale=yscale,
            x_dtick=x_dtick,
            y_dtick=y_dtick,
            xlim=xlim,
            ylim=ylim,
        )
        figs_list = [mean_pl, ll_pl, ul_pl]

        # make plot (or withhold)
        if withhold:
            return figs_list
        else:
            ep.plot_all(figs_list, panels=[1, 1, 1], outfile=outfile)
    elif ci_plot_type == "point":

        # gen EZPlotlyPlot
        plot = ep.line(
            x=x,
            y=m,
            ucl=np.abs(ul - m),
            lcl=np.abs(ll - m),
            color=color,
            name=name,
            xlabel=xlabel,
            ylabel=ylabel,
            title=title,
            xscale=xscale,
            yscale=yscale,
            x_dtick=x_dtick,
            y_dtick=y_dtick,
            xlim=xlim,
            ylim=ylim,
        )

        # make plot (or withhold)
        if withhold:
            return plot
        else:
            ep.plot_all([plot], outfile=outfile)
    else:
        raise ValueError("ci_plot_type must be line or point.")

예제 #7

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def corr_plot(
    x: np.array,
    y: np.array,
    error_y: Optional[Sequence[float]] = None,
    xlabel: Optional[str] = None,
    ylabel: Optional[str] = None,
    title: Optional[str] = None,
    name: Optional[str] = None,
    xscale: Optional[str] = None,
    yscale: Optional[str] = None,
    x_dtick: Optional[float] = None,
    y_dtick: Optional[float] = None,
    xlim: Optional[List[float]] = None,
    ylim: Optional[List[float]] = None,
    plot_type: str = "scatter",
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Optional[EZPlotlyPlot]:
    """
    Make scatterplot plot of data of two data vectors x and y. Displays correlation of data vectors in legend.

    :param x: First data vector as 1-D `np.array`
    :param y: Second data vector as 1-D `np.array`
    :param error_y: Error bar length as Sequence[float]`
    :param xlabel: X-axis label as `Optional[str]`
    :param ylabel: Y-axis label as `Optional[str]`
    :param title: Plot title as `Optional[str]`
    :param name: The name of the scatter plot as `Optional[str]` (useful for plotting series)
    :param xscale: The scale of the x-axis ('log', 'linear') as `Optional[str]`
    :param yscale: The scale of the y-axis ('log', 'linear') as `Optional[str]`
    :param x_dtick: The plotting delta tick (i.e. tick length) of the x-axis as `Optional[float]`
    :param y_dtick: The plotting delta tick (i.e. tick length) of the y-axis as `Optional[float]`
    :param xlim: The x-axis limits [x_left_lim, x_right_lim] as `Optional[List[float]]`
    :param ylim: The y-axis limits [y_left_lim, y_right_lim] as `Optional[List[float]]`
    :param plot_type: The plot type as `str` (either scatter or line)
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: If specified, save to this file as `Optional[str]`
    :return:
        `Optional[EZPlotlyPlot]` representing correlation plot
    """

    # compute correlation value
    corr_val = pd.core.nanops.nancorr(x, y)

    # get name of plot
    if name is None:
        name = "corr=" + str(np.round(corr_val, 3))
    else:
        name = name + " (" + "corr=" + str(np.round(corr_val, 3)) + ")"

    # make EZPlotlyPlot
    if plot_type == "scatter":
        plot = ep.scattergl(
            x=x,
            y=y,
            error_y=error_y,
            xlabel=xlabel,
            ylabel=ylabel,
            title=title,
            name=name,
            xscale=xscale,
            yscale=yscale,
            x_dtick=x_dtick,
            y_dtick=y_dtick,
            xlim=xlim,
            ylim=ylim,
        )
    elif plot_type == "line":
        plot = ep.line(
            x=x,
            y=y,
            error_y=error_y,
            xlabel=xlabel,
            ylabel=ylabel,
            title=title,
            name=name,
            xscale=xscale,
            yscale=yscale,
            x_dtick=x_dtick,
            y_dtick=y_dtick,
            xlim=xlim,
            ylim=ylim,
        )
    else:
        raise ValueError("plot_type must be {scatter, line}.")

    # make plot (or withhold)
    if withhold:
        return plot
    else:
        ep.plot_all(plots=[plot], outfile=outfile, showlegend=True)

예제 #8

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def rcdf(
    data: np.array,
    min_bin: Optional[float] = None,
    max_bin: Optional[float] = None,
    bin_size: Optional[float] = None,
    title: Optional[str] = None,
    xlabel: Optional[str] = None,
    ylabel: Optional[str] = None,
    norm: bool = False,
    name: Optional[str] = None,
    xscale: Optional[str] = None,
    yscale: Optional[str] = None,
    x_dtick: Optional[List[int]] = None,
    y_dtick: Optional[List[int]] = None,
    color: Optional[str] = None,
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Optional[EZPlotlyPlot]:
    """
    Plot of reverse cumulative frequency distribution.

    :param data: The data to plot as `np.array`
    :param min_bin: The left bin edge of the estimating histogram as `Optional[float]`
    :param max_bin: The right bin edge of the estimating histogram as `Optional[float]`
    :param bin_size: The bin size of a bin in the estimating histogram as `Optional[float]`
    :param title: The title of the plot as `Optional[str]`
    :param xlabel: The x-axis label as `Optional[str]`
    :param ylabel: The y-axis label as `Optional[str]`
    :param norm: Whether to normalize the CDF or not as `bool`
    :param name: The name of the series as `Optional[str]`
    :param xscale: The scale of the x-axis ('log', 'linear') as `Optional[str]`
    :param yscale: The scale of the y-axis ('log', 'linear') as `Optional[str]`
    :param x_dtick: The plotting delta tick (i.e. tick length) of the x-axis as `Optional[float]`
    :param y_dtick: The plotting delta tick (i.e. tick length) of the y-axis as `Optional[float]`
    :param color: The color of the line as `Optional[str]`
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: If specified, save to this file as `Optional[str]`
    :return:
        `Optional[EZPlotlyPlot]` represent rcdf
    """

    # default binning
    if min_bin is None:
        min_bin = np.nanmin(data)
    if max_bin is None:
        max_bin = np.nanmax(data)
    if bin_size is None:
        bin_size = 1

    # histogram data and produce cdf
    counts, bin_edges = np.histogram(data,
                                     bins=np.arange(min_bin,
                                                    max_bin + bin_size,
                                                    bin_size))
    counts_sum = np.sum(counts)
    counts = counts / counts_sum
    cdf = np.cumsum(counts)

    # plot
    if not norm:
        if ylabel is None:
            ylabel = "Reverse Cum Freq"
        cdf_line = ep.line(
            x=bin_edges[:-1],
            y=np.round(counts_sum * (1.0 - cdf), 5),
            title=title,
            xlabel=xlabel,
            ylabel=ylabel,
            xlim=[min_bin, max_bin + bin_size],
            name=name,
            xscale=xscale,
            yscale=yscale,
            x_dtick=x_dtick,
            y_dtick=y_dtick,
            color=color,
        )
    else:
        if ylabel is None:
            ylabel = "CDF"
        cdf_line = ep.line(
            x=bin_edges[:-1],
            y=np.round(1.0 - cdf, 5),
            title=title,
            xlabel=xlabel,
            ylabel=ylabel,
            xlim=[min_bin, max_bin + bin_size],
            ylim=[0, 1.0],
            name=name,
            xscale=xscale,
            yscale=yscale,
            x_dtick=x_dtick,
            y_dtick=y_dtick,
            color=color,
        )

    # make plot (or withhold)
    if withhold:
        return cdf_line
    else:
        ep.plot_all(plots=[cdf_line], outfile=outfile)

예제 #9

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def roc(
    preds: np.array,
    gt: np.array,
    panel: int = 1,
    names: Optional[List[str]] = None,
    title: Optional[str] = None,
    xscale: Optional[str] = None,
    yscale: Optional[str] = None,
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Optional[Tuple[List[EZPlotlyPlot], List[int]]]:
    """
    Make Receiver Operating Characteristic (ROC) Curve.

    :param preds: The raw prediction values as an N x L matrix np.array[float] where N is the number of methods and L
        is the number of labels.
    :param gt: The ground truth labels as an (L,) np.array[int]
    :param panel: The panel to plot the ROC curve as `int`
    :param names: The names of the methods as `Optional[List[str]]`
    :param title: The title of the plot as `Optional[str]`
    :param xscale: The scale of the x-axis ('log', 'linear') as `Optional[str]`
    :param yscale: The scale of the y-axis ('log', 'linear') as `Optional[str]`
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: If specified, save to this file as `Optional[str]`
    :return:
        plots: List[EZPlotlyPlot] of generated plots
        panels: List[int] of panels for plots
    """

    # structure
    plots = list()
    panels = list()

    # add chance curve
    p = ep.line(
        x=np.arange(0.0, 1.01, 0.01),
        y=np.arange(0.0, 1.01, 0.01),
        width=2,
        name="Chance Curve",
        color="black",
        xlabel="False Positive Rate",
        ylabel="True Positive Rate",
        title=title,
        xlim=[0, 1.01],
        ylim=[0, 1.01],
        xscale=xscale,
        yscale=yscale,
        x_dtick=0.1,
        y_dtick=0.1,
    )
    plots.append(p)
    panels.append(panel)

    # for each predictor compute roc curve
    for i in range(0, preds.shape[0]):
        fpr, tpr = yr.roc(preds[i, :], gt)
        auc = np.round(yr.auc(fpr, tpr), 3)
        name = "AUC=" + str(auc)
        if names is not None:
            name = names[i] + "(" + "AUC=" + str(auc) + ")"
        p = ep.line(
            x=fpr,
            y=tpr,
            width=2,
            name=name,
            xlim=[0, 1.0],
            ylim=[0, 1.01],
            xscale=xscale,
            yscale=yscale,
            x_dtick=0.1,
            y_dtick=0.1,
        )
        plots.append(p)
        panels.append(panel)

    # make plot (or withhold)
    if withhold:
        return plots, panels
    else:
        ep.plot_all(plots=plots,
                    panels=panels,
                    outfile=outfile,
                    showlegend=True)

예제 #10

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def chr_qq(
    df: pd.DataFrame,
    data_col_name: str,
    distr: str = "norm",
    sparams: List = (),
    title: Optional[str] = None,
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Optional[List[EZPlotlyPlot]]:
    """
    Makes 24 qq plots for data, 1 for each chromosome. Specify a pandas dataframe with columns (chr, data_col_name).
    Specify what the hypothetical distribution is using distr and sparams.

    :param df: `pd.DataFrame` of data containing columns (chr, data_col_name)
    :param data_col_name: The data column name in df as `str`
    :param distr: The hypothetical null distribution as `str`
    :param sparams: Distributional params as `List`
    :param title: The title of the plot as `Optional[str]`
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: If specified, save to this file as `str`
    :return:
        `Optional[List[EZPlotlyPlot]]` of plots representing each chromosome qq-plot
    """

    # chrs
    chrs_list = ["chr" + str(i) for i in range(1, 23)]
    chrs_list.append("chrX")
    chrs_list.append("chrY")

    # extract unique chr values
    uniq_vals = df["chr"].unique()

    # make chromosome level qq-plots
    plots = list()
    panels = list()
    panel_index = 1
    for u_chr in chrs_list:
        if u_chr in uniq_vals:
            data = df[data_col_name][df["chr"] == u_chr].values
            qq = qqplot(data,
                        sparams=sparams,
                        distr=distr,
                        title=u_chr,
                        withhold=True)
            plots.append(qq[0])
            plots.append(qq[1])
            panels.append(panel_index)
            panels.append(panel_index)
            panel_index = panel_index + 1

    # make plot (or withhold)
    if withhold:
        return plots
    else:
        ep.plot_all(
            plots,
            panels=panels,
            numcols=5,
            height=1000,
            title=title,
            chrpacked=True,
            outfile=outfile,
        )

예제 #11

0

파일 보기

파일: ezplotly_bio.py 프로젝트: prateekt/EasyPlotly

def chr_hist(
    df: pd.DataFrame,
    data_col_name: str,
    min_bin: Optional[float] = None,
    max_bin: Optional[float] = None,
    bin_size: Optional[int] = None,
    title: Optional[str] = None,
    xlabel: Optional[str] = None,
    ylabel: Optional[str] = None,
    histnorm: Optional[str] = None,
    x_dtick: Optional[List[int]] = None,
    y_dtick: Optional[List[int]] = None,
    withhold: bool = False,
    outfile: Optional[str] = None,
) -> Optional[List[EZPlotlyPlot]]:
    """
    Makes 24 histograms of data, 1 for each chromosome. Data is passed in as a pandas dataframe
    with columns (chr, data_col_name).

    :param df: The data frame containing columns (chr, data_col_name) as `pd.dataFrame`
    :param data_col_name: The data column to histogram as `str`
    :param min_bin: The left bin edge of the histogram as `Optional[float]`
    :param max_bin: The right bin edge of the histogram as `Optional[float]`
    :param bin_size: The size of a histogram bin as `Optional[float]`
    :param title: The title of the plot as `Optional[str]`
    :param xlabel: The x-axis label as `Optional[str]`
    :param ylabel: The y-axis label as `Optional[str]`
    :param histnorm: The normalization scheme to use as `Optional[str]`
    :param x_dtick: The plotting delta tick (i.e. tick length) of the x-axis as `Optional[float]`
    :param y_dtick: The plotting delta tick (i.e. tick length) of the y-axis as `Optional[float]`
    :param withhold: Whether to plot the plot or withhold and return the plot to the user as `bool`
    :param outfile: If specified, save to this file as `Optional[str]`
    :return:
        `Optional[List[EZPlotlyPlot]]` representing figures list of histograms
    """

    # labels
    if xlabel is None:
        xlabel = data_col_name
    if ylabel is None:
        ylabel = "Count"

    # chrs
    chrs_list = list()
    for i in range(1, 23):
        chrs_list.append("chr" + str(i))
    chrs_list.append("chrX")
    chrs_list.append("chrY")

    # extract unique chr values
    uniq_vals = df["chr"].unique()

    # make chromosome level hists
    hists = list()
    for u_chr in chrs_list:
        if u_chr in uniq_vals:
            data = df[data_col_name][df["chr"] == u_chr]
            ep_hist = ep.hist(
                data,
                title=u_chr,
                xlabel=xlabel,
                min_bin=min_bin,
                max_bin=max_bin,
                bin_size=bin_size,
                ylabel=ylabel,
                color="#1ad1ff",
                histnorm=histnorm,
                x_dtick=x_dtick,
                y_dtick=y_dtick,
            )
            hists.append(ep_hist)

    # make plot (or withhold)
    if withhold:
        return hists
    else:
        ep.plot_all(hists,
                    numcols=5,
                    title=title,
                    chrpacked=True,
                    outfile=outfile)