Esempio n. 1
0
def test_filter_subsets(min, max):
    data = generate_samples(0, 5000, 3)
    upset_data = UpSet(data, subset_size='auto')
    subset_upset_data = UpSet(data,
                              subset_size='auto',
                              min_subset_size=min,
                              max_subset_size=max)
    intersections = upset_data.intersections
    df = upset_data._df
    subset_intersections = intersections[np.logical_and(
        intersections >= min, intersections <= max)]
    subset_df = df[df.index.isin(subset_intersections.index)]
    assert_series_equal(subset_upset_data.intersections, subset_intersections)

    def _pack_binary(X):
        X = pd.DataFrame(X)
        out = 0
        for i, (_, col) in enumerate(X.items()):
            out *= 2
            out += col
        return out

    subset_df_packed = _pack_binary(subset_df.index.to_frame())
    subset_data_packed = _pack_binary(subset_intersections.index.to_frame())
    subset_df['_bin'] = pd.Series(subset_df_packed).map(
        pd.Series(np.arange(len(subset_data_packed)),
                  index=subset_data_packed))
    assert_frame_equal(subset_upset_data._df, subset_df)
Esempio n. 2
0
def Venn_Upset(adata,genelists,size_height=3):
    from upsetplot import UpSet
    from upsetplot import plot
    #gene lists can be ['Deep_1','Deep_2']
    deepgenes=pd.DataFrame(adata.var[genelists+['highly_variable']])
    deepgenes=deepgenes.set_index(genelists)
    upset = UpSet(deepgenes, subset_size='count', intersection_plot_elements=size_height)
    upset.plot()
    return upset
Esempio n. 3
0
def UpSetFromLists(listOflist,labels,size_height=3,showplot=True):
    from upsetplot import UpSet
    listall=list(set([j for i in listOflist for j in i]))
    temp=pd.Series(listall,index=listall)
    temp2=pd.concat([temp.isin(i) for i in listOflist+[temp]],axis=1)
    temp2.columns=labels+['all']
    temp2=temp2.set_index(labels)
    upset = UpSet(temp2,subset_size='count', intersection_plot_elements=3)
    if showplot is True:
        upset.plot()
    return upset
Esempio n. 4
0
def test_index_must_be_bool(x):
    # Truthy ints are okay
    x = x.reset_index()
    x[['cat0', 'cat2', 'cat2']] = x[['cat0', 'cat1', 'cat2']].astype(int)
    x = x.set_index(['cat0', 'cat1', 'cat2']).iloc[:, 0]

    UpSet(x)

    # other ints are not
    x = x.reset_index()
    x[['cat0', 'cat2', 'cat2']] = x[['cat0', 'cat1', 'cat2']] + 1
    x = x.set_index(['cat0', 'cat1', 'cat2']).iloc[:, 0]
    with pytest.raises(ValueError, match='not boolean'):
        UpSet(x)
Esempio n. 5
0
def plot_upset(sets, path):

    if len(sets) > 1:
        df_upset = from_contents(sets)
        upset_plot = UpSet(df_upset,
                           sort_by='degree',
                           sort_categories_by='cardinality',
                           show_counts=True,
                           show_percentages=True)
        fig = plt.figure()
        upset_plot.plot(fig=fig)
        fig.savefig(path)
    elif len(sets) in {0, 1}:
        print(f'plot_upset: No sets to intersect for {path}')
Esempio n. 6
0
def test_vertical():
    X = generate_data(n_samples=100)

    fig = matplotlib.figure.Figure()
    UpSet(X, orientation='horizontal').make_grid(fig)
    horz_height = fig.get_figheight()
    horz_width = fig.get_figwidth()
    assert horz_height < horz_width

    fig = matplotlib.figure.Figure()
    UpSet(X, orientation='vertical').make_grid(fig)
    vert_height = fig.get_figheight()
    vert_width = fig.get_figwidth()
    assert horz_width / horz_height > vert_width / vert_height

    # TODO: test axes positions, plot order, bar orientation
    pass
Esempio n. 7
0
def test_element_size():
    X = generate_data(n_samples=100)
    figsizes = []
    for element_size in range(10, 50, 5):
        fig = matplotlib.figure.Figure()
        UpSet(X, element_size=element_size).make_grid(fig)
        figsizes.append((fig.get_figwidth(), fig.get_figheight()))

    figwidths, figheights = zip(*figsizes)
    # Absolute width increases
    assert np.all(np.diff(figwidths) > 0)
    aspect = np.divide(figwidths, figheights)
    # Font size stays constant, so aspect ratio decreases
    assert np.all(np.diff(aspect) < 0)
    # But doesn't decrease by much
    assert np.all(aspect[:-1] / aspect[1:] < 1.1)

    fig = matplotlib.figure.Figure()
    figsize_before = fig.get_figwidth(), fig.get_figheight()
    UpSet(X, element_size=None).make_grid(fig)
    figsize_after = fig.get_figwidth(), fig.get_figheight()
    assert figsize_before == figsize_after
Esempio n. 8
0
def test_sort_sets_by_deprecation(x, sort_sets_by):
    with pytest.warns(DeprecationWarning, match='sort_sets_by'):
        upset1 = UpSet(x, sort_sets_by=sort_sets_by)
    with pytest.warns(None):
        upset2 = UpSet(x, sort_categories_by=sort_sets_by)

    fig = matplotlib.figure.Figure()
    upset1.plot(fig)
    png1 = io.BytesIO()
    fig.savefig(png1, format='raw')

    fig = matplotlib.figure.Figure()
    upset2.plot(fig)
    png2 = io.BytesIO()
    fig.savefig(png2, format='raw')

    assert png1.getvalue() == png2.getvalue()
def plot_intersection(data: dict, plot_outfile: str = "upsetplot.pdf"):
    """
    Take a dict of lists of unique identifiers, make quantitative venn diagram.

    Arguments:
        (REQUIRED) data: dict of lists, transformed with from_contents
        (OPTIONAL) plot_outfile: save the figure here
    """
    data = UpSet(data,
                 show_counts=True,
                 show_percentages=True,
                 sort_categories_by=None)
    data.plot()
    if plot_outfile:
        plt.savefig(plot_outfile)
    else:
        plt.show()
Esempio n. 10
0
def upsetplot_miss(data):

    null_cols_df = data.loc[:, data.isnull().any()]

    missingness = pd.isna(null_cols_df).rename(columns=lambda x: x + '_NA')

    for i, col in enumerate(missingness.columns):
        null_cols_df = null_cols_df.set_index(missingness[col], append=i != 0)

    tuple_false_values = (False, ) * sum(data.isnull().any())
    null_cols_only_miss_df = null_cols_df.loc[
        null_cols_df.index != tuple_false_values, :]

    upset = UpSet(null_cols_only_miss_df,
                  subset_size='count',
                  show_counts=True,
                  sort_by='cardinality')

    return (upset)
Esempio n. 11
0
def lp_dist(data, percentage=False, scale=1, fname=None):
    """Plot pattern combination frequencies as an UpSet plot.

    Parameters
    ----------
    data : AnnData
        Spatial formatted AnnData
    percentage : bool, optional
        If True, label each bar as a percentage else label as a count, by default False
    scale : int, optional
        scale > 1 scales the plot larger, scale < 1 scales. the plot smaller, by default 1
    fname : str, optional
        Save the figure to specified filename, by default None
    """
    sample_labels = []
    for p in PATTERN_NAMES:
        p_df = data.to_df(p).reset_index().melt(id_vars="cell")
        p_df = p_df[~p_df["value"].isna()]
        p_df = p_df.set_index(["cell", "gene"])
        sample_labels.append(p_df)

    sample_labels = pd.concat(sample_labels, axis=1) == 1
    sample_labels = sample_labels == 1
    sample_labels.columns = PATTERN_NAMES

    # Drop unlabeled samples
    # sample_labels = sample_labels[sample_labels.sum(axis=1) > 0]

    # Sort by degree, then pattern name
    sample_labels["degree"] = -sample_labels[PATTERN_NAMES].sum(axis=1)
    sample_labels = (sample_labels.reset_index().sort_values(
        ["degree"] + PATTERN_NAMES, ascending=False).drop("degree", axis=1))

    upset = UpSet(
        from_indicators(PATTERN_NAMES, data=sample_labels),
        element_size=scale * 40,
        min_subset_size=sample_labels.shape[0] * 0.001,
        facecolor="lightgray",
        sort_by=None,
        show_counts=(not percentage),
        show_percentages=percentage,
    )

    for p, color in zip(PATTERN_NAMES, PATTERN_COLORS):
        if sample_labels[p].sum() > 0:
            upset.style_subsets(present=p, max_degree=1, facecolor=color)

    upset.plot()
    plt.suptitle(
        f"Localization Patterns\n{data.n_obs} cells, {data.n_vars} genes")
Esempio n. 12
0
def plotUpset(adata,
              study_col=None,
              ct_col=None,
              mn_key="MetaNeighborUS",
              metaclusters="MetaNeighborUS_1v1_metaclusters",
              outlier_label="outliers",
              show=True):
    """Plot UpSet plot for intersections between datasets and metaclusters

    Shows how replicability depends on hte input dataset

    Arguments:
        adata {AnnData} -- AnnData object containing the output  of MetaNeighborUS 1vBest, and extractMetaClusters

    Keyword Arguments:
        study_col {[type]} -- If None, inferrefed from adata.uns[f'{mn_key}_params']['study_col'] else passed as vector (default: {None})
        ct_col {[type]} -- If None, inferrefed from adata.uns[f'{mn_key}_params']['ct_col'] else passed as vector (default: {None})
        mn_key {str} -- Location of MetaNeighborUS results (default: {'MetaNeighborUS'})
        metaclusters {str} -- Location of extractMetaClusters results (default: {'MetaNeighborUS_1v1_metaclusters'})
        outlier_label {str} -- Name of outlier_label in metaclusters (extractMetaClusters results) (default: {'outliers'})
        show {bool} -- Flag for showing plot or return UpSet object (default: {True})
    """

    if study_col is None:
        study_col = adata.uns[f"{mn_key}_params"]["study_col"]
    else:
        assert study_col in adata.obs_keys(), "Study Col not in adata"
    if ct_col is None:
        ct_col = adata.uns[f"{mn_key}_params"]["ct_col"]
    else:
        assert ct_col in adata.obs_keys(), "Cluster Col not in adata"

    if type(metaclusters) is str:
        assert (metaclusters in adata.uns_keys()
                ), "Run extractMetaClusters or pass Metacluster Series"
        metaclusters = adata.uns[metaclusters]
    pheno, _, _ = create_cell_labels(adata, study_col, ct_col)
    pheno = pheno.drop_duplicates().set_index("study_ct")

    get_studies = lambda x: pheno.loc[x, study_col].values.tolist()
    studies = [get_studies(x) for x in metaclusters.values]
    membership = dict(zip(metaclusters.index, studies))
    df = pd.DataFrame(
        [{name: True
          for name in names} for names in membership.values()],
        index=membership.keys(),
    )
    df = df.fillna(False)
    df = df[df.index != outlier_label]
    df = df.groupby(df.columns.tolist(), as_index=False).size()
    if type(
            df
    ) is not pd.Series:  #For pandas versions <1.0.0 size returns the correct series
        cols = df.columns[:-1].copy()
        for col in cols:
            df.set_index(df[col], append=True, inplace=True)
        df.index = df.index.droplevel(0)
        df = df["size"]
    us = UpSet(df, sort_by="cardinality")
    if show:
        plt.show()
    else:
        return us
Esempio n. 13
0
def test_add_catplot():
    pytest.importorskip('seaborn')
    X = generate_data(n_samples=100)
    upset = UpSet(X)
    # smoke test
    upset.add_catplot('violin')
    fig = matplotlib.figure.Figure()
    upset.plot(fig)

    # can't provide value with Series
    with pytest.raises(ValueError):
        upset.add_catplot('violin', value='foo')

    # check the above add_catplot did not break the state
    upset.plot(fig)

    X = generate_data(n_samples=100)
    X.name = 'foo'
    X = X.to_frame()
    upset = UpSet(X, sum_over=False)
    # must provide value with DataFrame
    with pytest.raises(ValueError):
        upset.add_catplot('violin')
    upset.add_catplot('violin', value='foo')
    with pytest.raises(ValueError):
        # not a known column
        upset.add_catplot('violin', value='bar')
    upset.plot(fig)

    # invalid plot kind raises error when plotting
    upset.add_catplot('foobar', value='foo')
    with pytest.raises(AttributeError):
        upset.plot(fig)
Esempio n. 14
0
from matplotlib import pyplot as plt
from upsetplot import UpSet

# Load the dataset into a DataFrame
boston = load_boston()
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)

# Get five features most correlated with median house value
correls = boston_df.corrwith(pd.Series(boston.target),
                             method='spearman').sort_values()
top_features = correls.index[-5:]

# Get a binary indicator of whether each top feature is above average
boston_above_avg = boston_df > boston_df.median(axis=0)
boston_above_avg = boston_above_avg[top_features]
boston_above_avg = boston_above_avg.rename(columns=lambda x: x + '>')

# Make this indicator mask an index of boston_df
boston_df = pd.concat([boston_df, boston_above_avg], axis=1)
boston_df = boston_df.set_index(list(boston_above_avg.columns))

# Also give us access to the target (median house value)
boston_df = boston_df.assign(median_value=boston.target)

# UpSet plot it!
upset = UpSet(boston_df, sum_over=False, intersection_plot_elements=3)
upset.add_catplot(value='median_value', kind='strip', color='blue')
upset.add_catplot(value='AGE', kind='strip', color='black')
upset.plot()
plt.show()
Esempio n. 15
0
def test_param_validation(kw):
    X = generate_data(n_samples=100)
    with pytest.raises(ValueError):
        UpSet(X, **kw)
x_df_3_binary = x_df_3_binary.set_index(selected_genes)

y_ind = y > 0
x_df_mets_3 = x_df_3.T[y_ind].T

x_df_mets_3_binary = x_df_mets_3.T > 0.
print x_df_mets_3_binary.shape

x_df_mets_3_binary = x_df_mets_3_binary.set_index(selected_genes)

font = {'family': 'Arial', 'weight': 'normal', 'size': 5}
matplotlib.rc('font', **font)
dd = x_df_3_binary.reset_index().set_index(['AR', 'TP53', 'MDM4'])

upset = UpSet(dd,
              subset_size='count',
              intersection_plot_elements=6,
              show_counts=True,
              with_lines=True,
              element_size=10)
fig = plt.figure(constrained_layout=False, figsize=(8, 6))
upset.plot(fig)
fig.subplots_adjust(bottom=0.2, top=0.9, left=0.08, right=0.99)

saving_dir = join(PLOTS_PATH, 'figure4')
filename = join(saving_dir, 'figure4_ar_tp53_mdm4.png')
plt.savefig(filename, dpi=300)
matplotlib.rcParams['pdf.fonttype'] = 42
filename = join(saving_dir, 'figure4_ar_tp53_mdm4.pdf')
plt.savefig(filename)
Esempio n. 17
0
top_features = correls.index[-5:]

# Get a binary indicator of whether each top feature is above average
boston_above_avg = boston_df > boston_df.median(axis=0)
boston_above_avg = boston_above_avg[top_features]
boston_above_avg = boston_above_avg.rename(columns=lambda x: x + '>')

# Make this indicator mask an index of boston_df
boston_df = pd.concat([boston_df, boston_above_avg], axis=1)
boston_df = boston_df.set_index(list(boston_above_avg.columns))

# Also give us access to the target (median house value)
boston_df = boston_df.assign(median_value=boston.target)

# UpSet plot it!
upset = UpSet(boston_df, subset_size='count', intersection_plot_elements=3)
upset.add_catplot(value='median_value', kind='strip', color='blue')
upset.add_catplot(value='AGE', kind='strip', color='black')
upset.plot()
plt.title("UpSet with catplots, for orientation='horizontal'")
plt.show()

# And again in vertical orientation

upset = UpSet(boston_df,
              subset_size='count',
              intersection_plot_elements=3,
              orientation='vertical')
upset.add_catplot(value='median_value', kind='strip', color='blue')
upset.add_catplot(value='AGE', kind='strip', color='black')
upset.plot()
Esempio n. 18
0
 def gen_upset_plot(self, className=None):
     # total_peps = len([pep for s in self.results.samples for pep in s.peptides])
     total_peps = np.sum([len(s.peptides) for s in self.results.samples])
     data = from_contents({s.sample_name: set(s.peptides)
                           for s in self.results.samples})
     for intersection in data.index.unique():
         if len(data.loc[intersection, :])/total_peps < 0.005:
             data.drop(index=intersection, inplace=True)
     data['peptide_length'] = np.vectorize(len)(data['id'])
     n_sets = len(data.index.unique())
     if n_sets <= 100:  # Plot horizontal
         upset = UpSet(data,
                       sort_by='cardinality',
                       #sort_categories_by=None,
                       show_counts=True,)
                       #totals_plot_elements=4,
                       #intersection_plot_elements=10)
         upset.add_catplot(value='peptide_length', kind='boxen', color='gray')
         plot = upset.plot()
         plot['totals'].grid(False)
         ylim = plot['intersections'].get_ylim()[1]
         plot['intersections'].set_ylim((0, ylim * 1.1))
         for c in plot['intersections'].get_children():
             if isinstance(c, plotText):
                 text = c.get_text()
                 text = text.replace('\n', ' ')
                 c.set_text(text)
                 c.set_rotation('vertical')
                 pos = c.get_position()
                 pos = (pos[0], pos[1] + 0.02 * ylim)
                 c.set_position(pos)
     else:  # plot vertical
         upset = UpSet(data, subset_size='count',
                       orientation='vertical',
                       sort_by='cardinality',
                       sort_categories_by=None,
                       show_counts=True)
         upset.add_catplot(value='peptide_length', kind='boxen', color='gray')
         plot = upset.plot()
         lim = plot['intersections'].get_xlim()
         plot['intersections'].set_xlim([0, lim[1] * 1.6])
         plot['totals'].grid(False)
         ylim = plot['totals'].get_ylim()[1]
         for c in plot['totals'].get_children():
             if isinstance(c, plotText):
                 text = c.get_text()
                 text = text.replace('\n', ' ')
                 c.set_text(text)
                 c.set_rotation('vertical')
                 pos = c.get_position()
                 pos = (pos[0], pos[1] + 0.1 * ylim)
                 c.set_position(pos)
         plt.draw()
     upset_fig = f'{self.fig_dir / "upsetplot.svg"}'
     plt.savefig(upset_fig, bbox_inches="tight")
     encoded_upset_fig = base64.b64encode(open(upset_fig, 'rb').read()).decode()
     card = div(className='card', style="height: 100%")
     card.add(div([b('UpSet Plot'), p('Only intersections > 0.5% are displayed')], className='card-header'))
     plot_body = div(img(src=f'data:image/svg+xml;base64,{encoded_upset_fig}',
                         className='img-fluid',
                         style=f'width: 100%; height: auto'),
                     className='card-body')
     card.add(plot_body)
     return div(card, className=className)