Python unique Examples

Programming Language: Python

Namespace/Package Name: gseapy.parser

Method/Function: unique

Examples at hotexamples.com: 2

Python unique - 2 examples found. These are the top rated real world Python examples of gseapy.parser.unique extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: plot.py Project: ranikay/GSEApy

def dotplot(df, cutoff=0.05, figsize=(3.5, 6), top_term=10, scale=1):
    """Visualize enrichr results.

    :param df: GSEApy DataFrame results.
    :param cutoff: p-adjust cut-off.
    :param top_term: number of enriched terms to show.
    :param scale: dotplot point size scale.
    :return:  a dotplot for enrichr terms.

    """

    if 'fdr' in df.columns:
        #gsea results
        df.rename(columns={
            'fdr': 'Adjusted P-value',
        }, inplace=True)
        df['hits_ratio'] = df['matched_size'] / df['gene_set_size']
    else:
        #enrichr results
        df['Count'] = df['Overlap'].str.split("/").str[0].astype(int)
        df['Background'] = df['Overlap'].str.split("/").str[1].astype(int)
        df['hits_ratio'] = df['Count'] / df['Background']

    # pvalue cut off
    df = df[df['Adjusted P-value'] <= cutoff]

    if len(df) < 1:
        logging.warning("Warning: No enrich terms when cuttoff = %s" % cutoff)
        return None
    #sorting the dataframe for better visualization
    df = df.sort_values(by='Adjusted P-value', ascending=False)
    df = df.head(top_term)
    # x axis values
    padj = df['Adjusted P-value']
    combined_score = df['Combined Score'].round().astype('int')
    x = -padj.apply(np.log10)
    # y axis index and values
    y = [i for i in range(0, len(df))]
    labels = df.Term.values

    area = np.pi * (df['Count'] * scale)**2

    #creat scatter plot
    if hasattr(sys, 'ps1'):
        #working inside python console, show figure
        fig, ax = plt.subplots(figsize=figsize)
    else:
        #If working on commandline, don't show figure
        fig = Figure(figsize=figsize)
        canvas = FigureCanvas(fig)
        ax = fig.add_subplot(111)
    vmin = np.percentile(combined_score.min(), 2)
    vmax = np.percentile(combined_score.max(), 98)
    sc = ax.scatter(x=x,
                    y=y,
                    s=area,
                    edgecolors='face',
                    c=combined_score,
                    cmap=plt.cm.RdBu,
                    vmin=vmin,
                    vmax=vmax)
    ax.set_xlabel("-log$_{10}$(Adjust P-value)", fontsize=16)
    ax.yaxis.set_major_locator(plt.FixedLocator(y))
    ax.yaxis.set_major_formatter(plt.FixedFormatter(labels))
    ax.set_yticklabels(labels, fontsize=16)
    #ax.set_ylim([-1, len(df)])
    ax.grid()

    #colorbar
    cax = fig.add_axes([0.93, 0.20, 0.07, 0.22])
    cbar = fig.colorbar(
        sc,
        cax=cax,
    )
    cbar.ax.tick_params(right='off')
    cbar.ax.set_title('Com-\nscore', loc='left', fontsize=12)

    #for terms less than 3
    if len(df) >= 3:

        # find the index of the closest value to the median
        idx = [
            area.argmax(),
            np.abs(area - area.mean()).argmin(),
            area.argmin()
        ]
        idx = unique(idx)
        x2 = [0] * len(idx)
    else:
        x2 = [0] * len(df)
        idx = df.index
    #scale of dots
    ax2 = fig.add_axes([0.93, 0.55, 0.09, 0.06 * len(idx)])
    #s=area[idx]
    l1 = ax2.scatter([], [], s=10, edgecolors='none')
    l2 = ax2.scatter([], [], s=50, edgecolors='none')
    l3 = ax2.scatter([], [], s=100, edgecolors='none')
    labels = df['Count'][idx]
    leg = ax.legend([l1, l2, l3],
                    labels,
                    nrow=3,
                    frameon=True,
                    fontsize=12,
                    handlelength=2,
                    loc=8,
                    borderpad=1.8,
                    handletextpad=1,
                    title='Gene\nRatio',
                    scatterpoints=1)

    #canvas.print_figure('test', bbox_inches='tight')
    return fig

Example #2

Show file

File: plot.py Project: jiawu/GSEApy

def dotplot(df,
            column='Adjusted P-value',
            title='',
            cutoff=0.05,
            top_term=10,
            sizes=None,
            norm=None,
            legend=True,
            figsize=(6, 5.5),
            cmap='RdBu_r',
            ofname=None,
            **kwargs):
    """Visualize enrichr results.

    :param df: GSEApy DataFrame results.
    :param column: which column of DataFrame to show. Default: Adjusted P-value
    :param title: figure title
    :param cutoff: terms with 'column' value < cut-off are shown.
    :param top_term: number of enriched terms to show.
    :param ascending: bool, the order of y axis.
    :param sizes: tuple, (min, max) scatter size. Not functional for now
    :param norm: maplotlib.colors.Normalize object.
    :param legend: bool, whether to show legend.
    :param figsize: tuple, figure size. 
    :param cmap: matplotlib colormap
    :param ofname: output file name. If None, don't save figure 

    """

    colname = column
    # sorting the dataframe for better visualization
    if colname in ['Adjusted P-value', 'P-value']:
        # check if any values in `df[colname]` can't be coerced to floats
        can_be_coerced = df[colname].map(isfloat)
        if np.sum(~can_be_coerced) > 0:
            raise ValueError(
                'some value in %s could not be typecast to `float`' % colname)
        else:
            df.loc[:, colname] = df[colname].map(float)
        df = df[df[colname] <= cutoff]
        if len(df) < 1:
            msg = "Warning: No enrich terms when cutoff = %s" % cutoff
            return msg
        df = df.assign(logAP=lambda x: -x[colname].apply(np.log10))
        colname = 'logAP'
    df = df.sort_values(by=colname).iloc[-top_term:, :]
    #
    temp = df['Overlap'].str.split("/", expand=True).astype(int)
    df = df.assign(Hits=temp.iloc[:, 0], Background=temp.iloc[:, 1])
    df = df.assign(Hits_ratio=lambda x: x.Hits / x.Background)
    # x axis values
    x = df.loc[:, colname].values
    combined_score = df['Combined Score'].round().astype('int')
    # y axis index and values
    y = [i for i in range(0, len(df))]
    ylabels = df['Term'].values
    # Normalise to [0,1]
    # b = (df['Count']  - df['Count'].min())/ np.ptp(df['Count'])
    # area = 100 * b

    # control the size of scatter and legend marker
    levels = numbers = np.sort(df.Hits.unique())
    if norm is None:
        norm = Normalize()
    elif isinstance(norm, tuple):
        norm = Normalize(*norm)
    elif not isinstance(norm, Normalize):
        err = ("``size_norm`` must be None, tuple, " "or Normalize object.")
        raise ValueError(err)
    min_width, max_width = np.r_[20, 100] * plt.rcParams["lines.linewidth"]
    norm.clip = True
    if not norm.scaled():
        norm(np.asarray(numbers))
    size_limits = norm.vmin, norm.vmax
    scl = norm(numbers)
    widths = np.asarray(min_width + scl * (max_width - min_width))
    if scl.mask.any():
        widths[scl.mask] = 0
    sizes = dict(zip(levels, widths))
    df['sizes'] = df.Hits.map(sizes)
    area = df['sizes'].values

    # create scatter plot
    if hasattr(sys, 'ps1') and (ofname is None):
        # working inside python console, show figure
        fig, ax = plt.subplots(figsize=figsize)
    else:
        # If working on commandline, don't show figure
        fig = Figure(figsize=figsize)
        canvas = FigureCanvas(fig)
        ax = fig.add_subplot(111)
    vmin = np.percentile(combined_score.min(), 2)
    vmax = np.percentile(combined_score.max(), 98)
    sc = ax.scatter(x=x,
                    y=y,
                    s=area,
                    edgecolors='face',
                    c=combined_score,
                    cmap=cmap,
                    vmin=vmin,
                    vmax=vmax)

    if column in ['Adjusted P-value', 'P-value']:
        xlabel = "-log$_{10}$(%s)" % column
    else:
        xlabel = column
    ax.set_xlabel(xlabel, fontsize=14, fontweight='bold')
    ax.yaxis.set_major_locator(plt.FixedLocator(y))
    ax.yaxis.set_major_formatter(plt.FixedFormatter(ylabels))
    ax.set_yticklabels(ylabels, fontsize=16)

    # ax.set_ylim([-1, len(df)])
    ax.grid()
    # colorbar
    cax = fig.add_axes([0.95, 0.20, 0.03, 0.22])
    cbar = fig.colorbar(
        sc,
        cax=cax,
    )
    cbar.ax.tick_params(right=True)
    cbar.ax.set_title('Combined\nScore', loc='left', fontsize=12)

    # for terms less than 3
    if len(df) >= 3:
        # find the index of the closest value to the median
        idx = [
            area.argmax(),
            np.abs(area - area.mean()).argmin(),
            area.argmin()
        ]
        idx = unique(idx)
    else:
        idx = range(len(df))
    label = df.iloc[idx, df.columns.get_loc('Hits')]

    if legend:
        handles, _ = ax.get_legend_handles_labels()
        legend_markers = []
        for ix in idx:
            legend_markers.append(ax.scatter([], [], s=area[ix], c='b'))
        # artist = ax.scatter([], [], s=size_levels,)
        ax.legend(legend_markers, label, title='Hits')
    ax.set_title(title, fontsize=20, fontweight='bold')

    if ofname is not None:
        # canvas.print_figure(ofname, bbox_inches='tight', dpi=300)
        fig.savefig(ofname, bbox_inches='tight', dpi=300)
        return
    return ax