Beispiel #1
0
def Venn_Upset(adata,genelists,size_height=3):
    from upsetplot import UpSet
    from upsetplot import plot
    #gene lists can be ['Deep_1','Deep_2']
    deepgenes=pd.DataFrame(adata.var[genelists+['highly_variable']])
    deepgenes=deepgenes.set_index(genelists)
    upset = UpSet(deepgenes, subset_size='count', intersection_plot_elements=size_height)
    upset.plot()
    return upset
def UpSetFromLists(listOflist,labels,size_height=3,showplot=True):
    from upsetplot import UpSet
    listall=list(set([j for i in listOflist for j in i]))
    temp=pd.Series(listall,index=listall)
    temp2=pd.concat([temp.isin(i) for i in listOflist+[temp]],axis=1)
    temp2.columns=labels+['all']
    temp2=temp2.set_index(labels)
    upset = UpSet(temp2,subset_size='count', intersection_plot_elements=3)
    if showplot is True:
        upset.plot()
    return upset
Beispiel #3
0
def test_add_catplot():
    pytest.importorskip('seaborn')
    X = generate_data(n_samples=100)
    upset = UpSet(X)
    # smoke test
    upset.add_catplot('violin')
    fig = matplotlib.figure.Figure()
    upset.plot(fig)

    # can't provide value with Series
    with pytest.raises(ValueError):
        upset.add_catplot('violin', value='foo')

    # check the above add_catplot did not break the state
    upset.plot(fig)

    X = generate_data(n_samples=100)
    X.name = 'foo'
    X = X.to_frame()
    upset = UpSet(X, sum_over=False)
    # must provide value with DataFrame
    with pytest.raises(ValueError):
        upset.add_catplot('violin')
    upset.add_catplot('violin', value='foo')
    with pytest.raises(ValueError):
        # not a known column
        upset.add_catplot('violin', value='bar')
    upset.plot(fig)

    # invalid plot kind raises error when plotting
    upset.add_catplot('foobar', value='foo')
    with pytest.raises(AttributeError):
        upset.plot(fig)
Beispiel #4
0
def plot_upset(sets, path):

    if len(sets) > 1:
        df_upset = from_contents(sets)
        upset_plot = UpSet(df_upset,
                           sort_by='degree',
                           sort_categories_by='cardinality',
                           show_counts=True,
                           show_percentages=True)
        fig = plt.figure()
        upset_plot.plot(fig=fig)
        fig.savefig(path)
    elif len(sets) in {0, 1}:
        print(f'plot_upset: No sets to intersect for {path}')
Beispiel #5
0
def lp_dist(data, percentage=False, scale=1, fname=None):
    """Plot pattern combination frequencies as an UpSet plot.

    Parameters
    ----------
    data : AnnData
        Spatial formatted AnnData
    percentage : bool, optional
        If True, label each bar as a percentage else label as a count, by default False
    scale : int, optional
        scale > 1 scales the plot larger, scale < 1 scales. the plot smaller, by default 1
    fname : str, optional
        Save the figure to specified filename, by default None
    """
    sample_labels = []
    for p in PATTERN_NAMES:
        p_df = data.to_df(p).reset_index().melt(id_vars="cell")
        p_df = p_df[~p_df["value"].isna()]
        p_df = p_df.set_index(["cell", "gene"])
        sample_labels.append(p_df)

    sample_labels = pd.concat(sample_labels, axis=1) == 1
    sample_labels = sample_labels == 1
    sample_labels.columns = PATTERN_NAMES

    # Drop unlabeled samples
    # sample_labels = sample_labels[sample_labels.sum(axis=1) > 0]

    # Sort by degree, then pattern name
    sample_labels["degree"] = -sample_labels[PATTERN_NAMES].sum(axis=1)
    sample_labels = (sample_labels.reset_index().sort_values(
        ["degree"] + PATTERN_NAMES, ascending=False).drop("degree", axis=1))

    upset = UpSet(
        from_indicators(PATTERN_NAMES, data=sample_labels),
        element_size=scale * 40,
        min_subset_size=sample_labels.shape[0] * 0.001,
        facecolor="lightgray",
        sort_by=None,
        show_counts=(not percentage),
        show_percentages=percentage,
    )

    for p, color in zip(PATTERN_NAMES, PATTERN_COLORS):
        if sample_labels[p].sum() > 0:
            upset.style_subsets(present=p, max_degree=1, facecolor=color)

    upset.plot()
    plt.suptitle(
        f"Localization Patterns\n{data.n_obs} cells, {data.n_vars} genes")
Beispiel #6
0
def test_sort_sets_by_deprecation(x, sort_sets_by):
    with pytest.warns(DeprecationWarning, match='sort_sets_by'):
        upset1 = UpSet(x, sort_sets_by=sort_sets_by)
    with pytest.warns(None):
        upset2 = UpSet(x, sort_categories_by=sort_sets_by)

    fig = matplotlib.figure.Figure()
    upset1.plot(fig)
    png1 = io.BytesIO()
    fig.savefig(png1, format='raw')

    fig = matplotlib.figure.Figure()
    upset2.plot(fig)
    png2 = io.BytesIO()
    fig.savefig(png2, format='raw')

    assert png1.getvalue() == png2.getvalue()
Beispiel #7
0
 def gen_upset_plot(self, className=None):
     # total_peps = len([pep for s in self.results.samples for pep in s.peptides])
     total_peps = np.sum([len(s.peptides) for s in self.results.samples])
     data = from_contents({s.sample_name: set(s.peptides)
                           for s in self.results.samples})
     for intersection in data.index.unique():
         if len(data.loc[intersection, :])/total_peps < 0.005:
             data.drop(index=intersection, inplace=True)
     data['peptide_length'] = np.vectorize(len)(data['id'])
     n_sets = len(data.index.unique())
     if n_sets <= 100:  # Plot horizontal
         upset = UpSet(data,
                       sort_by='cardinality',
                       #sort_categories_by=None,
                       show_counts=True,)
                       #totals_plot_elements=4,
                       #intersection_plot_elements=10)
         upset.add_catplot(value='peptide_length', kind='boxen', color='gray')
         plot = upset.plot()
         plot['totals'].grid(False)
         ylim = plot['intersections'].get_ylim()[1]
         plot['intersections'].set_ylim((0, ylim * 1.1))
         for c in plot['intersections'].get_children():
             if isinstance(c, plotText):
                 text = c.get_text()
                 text = text.replace('\n', ' ')
                 c.set_text(text)
                 c.set_rotation('vertical')
                 pos = c.get_position()
                 pos = (pos[0], pos[1] + 0.02 * ylim)
                 c.set_position(pos)
     else:  # plot vertical
         upset = UpSet(data, subset_size='count',
                       orientation='vertical',
                       sort_by='cardinality',
                       sort_categories_by=None,
                       show_counts=True)
         upset.add_catplot(value='peptide_length', kind='boxen', color='gray')
         plot = upset.plot()
         lim = plot['intersections'].get_xlim()
         plot['intersections'].set_xlim([0, lim[1] * 1.6])
         plot['totals'].grid(False)
         ylim = plot['totals'].get_ylim()[1]
         for c in plot['totals'].get_children():
             if isinstance(c, plotText):
                 text = c.get_text()
                 text = text.replace('\n', ' ')
                 c.set_text(text)
                 c.set_rotation('vertical')
                 pos = c.get_position()
                 pos = (pos[0], pos[1] + 0.1 * ylim)
                 c.set_position(pos)
         plt.draw()
     upset_fig = f'{self.fig_dir / "upsetplot.svg"}'
     plt.savefig(upset_fig, bbox_inches="tight")
     encoded_upset_fig = base64.b64encode(open(upset_fig, 'rb').read()).decode()
     card = div(className='card', style="height: 100%")
     card.add(div([b('UpSet Plot'), p('Only intersections > 0.5% are displayed')], className='card-header'))
     plot_body = div(img(src=f'data:image/svg+xml;base64,{encoded_upset_fig}',
                         className='img-fluid',
                         style=f'width: 100%; height: auto'),
                     className='card-body')
     card.add(plot_body)
     return div(card, className=className)
Beispiel #8
0
boston_above_avg = boston_df > boston_df.median(axis=0)
boston_above_avg = boston_above_avg[top_features]
boston_above_avg = boston_above_avg.rename(columns=lambda x: x + '>')

# Make this indicator mask an index of boston_df
boston_df = pd.concat([boston_df, boston_above_avg], axis=1)
boston_df = boston_df.set_index(list(boston_above_avg.columns))

# Also give us access to the target (median house value)
boston_df = boston_df.assign(median_value=boston.target)

# UpSet plot it!
upset = UpSet(boston_df, subset_size='count', intersection_plot_elements=3)
upset.add_catplot(value='median_value', kind='strip', color='blue')
upset.add_catplot(value='AGE', kind='strip', color='black')
upset.plot()
plt.title("UpSet with catplots, for orientation='horizontal'")
plt.show()

# And again in vertical orientation

upset = UpSet(boston_df,
              subset_size='count',
              intersection_plot_elements=3,
              orientation='vertical')
upset.add_catplot(value='median_value', kind='strip', color='blue')
upset.add_catplot(value='AGE', kind='strip', color='black')
upset.plot()
plt.title("UpSet with catplots, for orientation='vertical'")
plt.show()
x_df_3_binary = x_df_3_binary.set_index(selected_genes)

y_ind = y > 0
x_df_mets_3 = x_df_3.T[y_ind].T

x_df_mets_3_binary = x_df_mets_3.T > 0.
print x_df_mets_3_binary.shape

x_df_mets_3_binary = x_df_mets_3_binary.set_index(selected_genes)

font = {'family': 'Arial', 'weight': 'normal', 'size': 5}
matplotlib.rc('font', **font)
dd = x_df_3_binary.reset_index().set_index(['AR', 'TP53', 'MDM4'])

upset = UpSet(dd,
              subset_size='count',
              intersection_plot_elements=6,
              show_counts=True,
              with_lines=True,
              element_size=10)
fig = plt.figure(constrained_layout=False, figsize=(8, 6))
upset.plot(fig)
fig.subplots_adjust(bottom=0.2, top=0.9, left=0.08, right=0.99)

saving_dir = join(PLOTS_PATH, 'figure4')
filename = join(saving_dir, 'figure4_ar_tp53_mdm4.png')
plt.savefig(filename, dpi=300)
matplotlib.rcParams['pdf.fonttype'] = 42
filename = join(saving_dir, 'figure4_ar_tp53_mdm4.pdf')
plt.savefig(filename)