def test_three_sets_int(self): self.assertDictEqual( iterables.subsets([{0, 1}, {2, 3}, {1, 3, 4}]), { '010': set([2]), '011': set([3]), '001': set([4]), '111': set([]), '110': set([]), '100': set([0]), '101': set([1]) })
def upset_wrap(N, labels=None, drop_empty=False, drop_single=False, **kwargs): ''' Wrapper for UpSetPlot package. Mostly just generates the Boolean multi-indexed ``pandas.Series`` the ``upsetplot.plot`` function needs as input. * Arguments: - *N* [list]: Or any iterable type containing [set] objects. - *labels* [list]: Optional, ``None`` by default. Labels for the sets following the same order as provided in *N*. If none is passed they will be labelled ``'set0'``, ``'set1'`` and so on. - *drop_empty* [bool]: Optional, ``False`` by default. Whether to remove the empty set intersections from the plot or not. - *drop_single* [bool]: Optional, ``False`` by default. Whether to drop the non-intersecting sets from the plot. - *\*\*kwargs*: Optional. Additional keyword arguments passed to ``upsetplot.UpSet`` class. * Returns: [dict]: Contains the ``matplotlib.axes.Axes`` instances for the UpSetPlot figure. ''' ss = subsets(N) ids = [tuple([bool(int(j)) for j in i]) for i in ss.keys()] counts = list(map(len, ss.values())) if labels is None: labels = ['set%d' % i for i in range(len(N))] idx = pd.MultiIndex.from_tuples(ids, names=labels) series = pd.Series(counts, index=idx) if drop_empty: series[series == 0] = np.nan series.dropna(inplace=True) if drop_single: series = series.iloc[[ n for n, i in enumerate(series.index) if sum(i) > 1 ]] return usp.plot(series, **kwargs)
def venn(N, labels=['A', 'B', 'C', 'D', 'E'], c=['C0', 'C1', 'C2', 'C3', 'C4'], pct=False, sizes=False, title=None, filename=None, figsize=None): ''' Plots a Venn diagram from a list of sets *N*. Number of sets must be between 2 and 5 (inclusive). * Arguments: - *N* [list]: Or any iterable type containing [set] objects. - *labels* [list]: Optional, ``['A', 'B', 'C', 'D', 'E']`` by default. Labels for the sets following the same order as provided in *N*. - *c* [list]: Optional, ``['C0', 'C1' 'C2', 'C3', 'C4']`` by default (matplotlib default colors). Any iterable containing color arguments tolerated by matplotlib (e.g.: ``['r', 'b']`` for red and blue). Must contain at least the same number of elements as *N* (if more are provided, they will be ignored). - *pct* [bool]: Optional, ``False`` by default. Indicates whether to show percentages instead of absolute counts. - *sizes* [bool]: Optional, ``False`` by default. Whether to include the size of the sets in the legend or not. - *title* [str]: Optional, ``None`` by default. Defines the plot title. - *filename* [str]: Optional, ``None`` by default. If passed, indicates the file name or path where to store the figure. Format must be specified (e.g.: .png, .pdf, etc) - *figsize* [tuple]: Optional, ``None`` by default (default matplotlib size). Any iterable containing two values denoting the figure size (in inches) as [width, height]. * Returns: - [matplotlib.figure.Figure]: The figure object containing a combination of box and scatter plots of the gene-set scores, unless *filename* is provided. * Example: >>> N = [{0, 1}, {2, 3}, {1, 3, 4}] # Sets A, B, C >>> venn(N) .. image:: ../figures/venn_example.png :align: center :scale: 100 ''' def ellipse(ax, x, y, w, h, a, color, alpha=1, label=None): e = matplotlib.patches.Ellipse(xy=(x, y), width=w, height=h, angle=a, color=color, alpha=alpha, label=label) ax.add_patch(e) if len(N) == 2: # Ellipse parameters x = [-.25, .25] y = [0, 0] w = [1, 1] h = [1.5, 1.5] a = [0, 0] # Text (counts) parameters xt = [-.5, .5, 0] yt = [0, 0, 0] keys = ['10', '01', '11'] elif len(N) == 3: # Ellipse parameters x = [0, -.25, .25] y = [.33, -.33, -.33] w = [1, 1, 1] h = [1.5, 1.5, 1.5] a = [0, 0, 0] # Text (counts) parameters xt = [0, -.5, .5, -.33, .33, 0, 0] yt = [.6, -.5, -.5, .15, .15, -.6, -.1] keys = ['100', '010', '001', '110', '101', '011', '111'] elif len(N) == 4: # Ellipse parameters x = [-.15, -.35, .15, .35] y = [.15, -.25, .15, -.25] w = [2, 2, 2, 2] h = [1, 1, 1, 1] a = [-60, -60, 60, 60] # Text (counts) parameters xt = [-.5, -.8, .5, .8, -.6, 0, .4, -.4, 0, .6, -.3, .2, .3, -.2, 0] yt = [.8, -.1, .8, -.1, .33, .4, -.55, -.55, -.9, .33, 0, -.68, 0, -.68, -.33] keys = ['1000', '0100', '0010', '0001', '1100', '1010', '1001', '0110', '0101', '0011', '1110', '1101', '1011', '0111', '1111'] elif len(N) == 5: # Ellipse parameters x = [0, -.2125, -.2375, -.03125, .125] y = [0, -.05, -.275, -.35, -.1875] w = [1.25, 1.25, 1.25, 1.25, 1.25] h = [2, 2, 2, 2, 2] a = [0, 71, 154, 37, 108] # Text (counts) parameters xt = [0, -1, -.7, .5, .9, -.41, .1, .2, .51, -.87, -.67, .69, -.25, -.69, .6, -.1, -.51, .54, -.06, .33, .3, -.72, -.79, .63, -.48, -.38, .3, .49, -.13, -.69, -.1] yt = [.8, .1, -1, -1, .05, .55, .55, -.91, .3, -.3, .3, -.25, -1, -.65, -.65, .5, .43, 0, -.95, .4, -.78, 0, -.41, -.47, -.76, .33, .27, -.4, -.82, -.25, -.2] keys = ['10000', '01000', '00100', '00010', '00001', '11000', '10100', '10010', '10001', '01100', '01010', '01001', '00110', '00101', '00011', '11100', '11010', '11001', '10110', '10101', '10011', '01110', '01101', '01011', '00111', '11110', '11101', '11011', '10111', '01111', '11111'] else: return 'The maximum number of sets supported is 5.' ssets = subsets(N) # Subset counts text = dict(zip(ssets.keys(), map(len, ssets.values()))) if pct: total = float(sum(text.values())) text = dict(zip(text.keys(), np.round(100 * np.array(text.values()) / total, decimals=2))) fig, ax = plt.subplots(figsize=figsize) for i in range(len(N)): ellipse(ax, x[i], y[i], w[i], h[i], a[i], alpha=.25, color=c[i], label='%s (%d)' % (labels[i], len(N[i])) if sizes else labels[i]) for i in range(len(text)): ax.text(xt[i], yt[i], text[keys[i]], fontdict={'ha': 'center'}) ax.set_xlim(-1.5, 1.5) ax.set_ylim(-1.5, 1.5) ax.set_title(title) ax.legend() ax.axis('off') fig.tight_layout() if filename: fig.savefig(filename) else: return fig
def test_two_sets_int(self): self.assertDictEqual(iterables.subsets([{0, 1, 2}, {2, 3, 4}]), { '11': set([2]), '10': set([0, 1]), '01': set([3, 4]) })