Python subsets Examples, data_tools.iterables.subsets Python Examples

Example #1

0

Show file

File: test_iterables.py Project: siyer32/data_tools

 def test_three_sets_int(self):
     self.assertDictEqual(
         iterables.subsets([{0, 1}, {2, 3}, {1, 3, 4}]), {
             '010': set([2]),
             '011': set([3]),
             '001': set([4]),
             '111': set([]),
             '110': set([]),
             '100': set([0]),
             '101': set([1])
         })

Example #2

0

Show file

File: plots.py Project: Nic-Nic/data_tools

def upset_wrap(N, labels=None, drop_empty=False, drop_single=False, **kwargs):
    '''
    Wrapper for UpSetPlot package. Mostly just generates the Boolean
    multi-indexed ``pandas.Series`` the ``upsetplot.plot`` function
    needs as input.

    * Arguments:
        - *N* [list]: Or any iterable type containing [set] objects.
        - *labels* [list]: Optional, ``None`` by default. Labels for the
          sets following the same order as provided in *N*. If none is
          passed they will be labelled ``'set0'``, ``'set1'`` and so on.
        - *drop_empty* [bool]: Optional, ``False`` by default. Whether
          to remove the empty set intersections from the plot or not.
        - *drop_single* [bool]: Optional, ``False`` by default. Whether
          to drop the non-intersecting sets from the plot.
        - *\*\*kwargs*: Optional. Additional keyword arguments passed to
          ``upsetplot.UpSet`` class.

    * Returns:
        [dict]: Contains the ``matplotlib.axes.Axes`` instances for the
        UpSetPlot figure.
    '''

    ss = subsets(N)
    ids = [tuple([bool(int(j)) for j in i]) for i in ss.keys()]
    counts = list(map(len, ss.values()))

    if labels is None:
        labels = ['set%d' % i for i in range(len(N))]

    idx = pd.MultiIndex.from_tuples(ids, names=labels)
    series = pd.Series(counts, index=idx)

    if drop_empty:
        series[series == 0] = np.nan
        series.dropna(inplace=True)

    if drop_single:
        series = series.iloc[[
            n for n, i in enumerate(series.index) if sum(i) > 1
        ]]

    return usp.plot(series, **kwargs)

Example #3

0

Show file

def venn(N, labels=['A', 'B', 'C', 'D', 'E'], c=['C0', 'C1', 'C2', 'C3', 'C4'],
         pct=False, sizes=False, title=None, filename=None, figsize=None):
    '''
    Plots a Venn diagram from a list of sets *N*. Number of sets must be
    between 2 and 5 (inclusive).

    * Arguments:
        - *N* [list]: Or any iterable type containing [set] objects.
        - *labels* [list]: Optional, ``['A', 'B', 'C', 'D', 'E']`` by
          default. Labels for the sets following the same order as
          provided in *N*.
        - *c* [list]: Optional, ``['C0', 'C1' 'C2', 'C3', 'C4']`` by
          default (matplotlib default colors). Any iterable containing
          color arguments tolerated by matplotlib (e.g.: ``['r', 'b']``
          for red and blue). Must contain at least the same number of
          elements as *N* (if more are provided, they will be ignored).
        - *pct* [bool]: Optional, ``False`` by default. Indicates
          whether to show percentages instead of absolute counts.
        - *sizes* [bool]: Optional, ``False`` by default. Whether to
          include the size of the sets in the legend or not.
        - *title* [str]: Optional, ``None`` by default. Defines the plot
          title.
        - *filename* [str]: Optional, ``None`` by default. If passed,
          indicates the file name or path where to store the figure.
          Format must be specified (e.g.: .png, .pdf, etc)
        - *figsize* [tuple]: Optional, ``None`` by default (default
          matplotlib size). Any iterable containing two values denoting
          the figure size (in inches) as [width, height].

    * Returns:
        - [matplotlib.figure.Figure]: The figure object containing a
          combination of box and scatter plots of the gene-set scores,
          unless *filename* is provided.

    * Example:
        >>> N = [{0, 1}, {2, 3}, {1, 3, 4}] # Sets A, B, C
        >>> venn(N)

        .. image:: ../figures/venn_example.png
           :align: center
           :scale: 100
    '''

    def ellipse(ax, x, y, w, h, a, color, alpha=1, label=None):
        e = matplotlib.patches.Ellipse(xy=(x, y), width=w, height=h, angle=a,
                                       color=color, alpha=alpha, label=label)

        ax.add_patch(e)

    if len(N) == 2:
        # Ellipse parameters
        x = [-.25, .25]
        y = [0, 0]
        w = [1, 1]
        h = [1.5, 1.5]
        a = [0, 0]

        # Text (counts) parameters
        xt = [-.5, .5, 0]
        yt = [0, 0, 0]
        keys = ['10', '01', '11']

    elif len(N) == 3:
        # Ellipse parameters
        x = [0, -.25, .25]
        y = [.33, -.33, -.33]
        w = [1, 1, 1]
        h = [1.5, 1.5, 1.5]
        a = [0, 0, 0]

        # Text (counts) parameters
        xt = [0, -.5, .5, -.33, .33, 0, 0]
        yt = [.6, -.5, -.5, .15, .15, -.6, -.1]
        keys = ['100', '010', '001', '110', '101', '011', '111']

    elif len(N) == 4:
        # Ellipse parameters
        x = [-.15, -.35, .15, .35]
        y = [.15, -.25, .15, -.25]
        w = [2, 2, 2, 2]
        h = [1, 1, 1, 1]
        a = [-60, -60, 60, 60]

        # Text (counts) parameters
        xt = [-.5, -.8, .5, .8, -.6, 0, .4, -.4, 0, .6, -.3, .2, .3, -.2, 0]
        yt = [.8, -.1, .8, -.1, .33, .4, -.55, -.55, -.9, .33, 0, -.68, 0,
              -.68, -.33]
        keys = ['1000', '0100', '0010', '0001', '1100', '1010', '1001', '0110',
                '0101', '0011', '1110', '1101', '1011', '0111', '1111']

    elif len(N) == 5:
        # Ellipse parameters
        x = [0, -.2125, -.2375, -.03125, .125]
        y = [0, -.05, -.275, -.35, -.1875]
        w = [1.25, 1.25, 1.25, 1.25, 1.25]
        h = [2, 2, 2, 2, 2]
        a = [0, 71, 154, 37, 108]

        # Text (counts) parameters
        xt = [0, -1, -.7, .5, .9, -.41, .1, .2, .51, -.87, -.67, .69, -.25,
              -.69, .6, -.1, -.51, .54, -.06, .33, .3, -.72, -.79, .63, -.48,
              -.38, .3, .49, -.13, -.69, -.1]
        yt = [.8, .1, -1, -1, .05, .55, .55, -.91, .3, -.3, .3, -.25, -1, -.65,
              -.65, .5, .43, 0, -.95, .4, -.78, 0, -.41, -.47, -.76, .33, .27,
              -.4, -.82, -.25, -.2]
        keys = ['10000', '01000', '00100', '00010', '00001', '11000', '10100',
                '10010', '10001', '01100', '01010', '01001', '00110', '00101',
                '00011', '11100', '11010', '11001', '10110', '10101', '10011',
                '01110', '01101', '01011', '00111', '11110', '11101', '11011',
                '10111', '01111', '11111']

    else:
        return 'The maximum number of sets supported is 5.'

    ssets = subsets(N)
    # Subset counts
    text = dict(zip(ssets.keys(), map(len, ssets.values())))

    if pct:
        total = float(sum(text.values()))
        text = dict(zip(text.keys(),
                        np.round(100 * np.array(text.values()) / total,
                                 decimals=2)))

    fig, ax = plt.subplots(figsize=figsize)

    for i in range(len(N)):
        ellipse(ax, x[i], y[i], w[i], h[i], a[i], alpha=.25, color=c[i],
                label='%s (%d)' % (labels[i], len(N[i])) if sizes
                else labels[i])

    for i in range(len(text)):
        ax.text(xt[i], yt[i], text[keys[i]], fontdict={'ha': 'center'})

    ax.set_xlim(-1.5, 1.5)
    ax.set_ylim(-1.5, 1.5)

    ax.set_title(title)

    ax.legend()

    ax.axis('off')
    fig.tight_layout()

    if filename:
        fig.savefig(filename)

    else:
        return fig

Example #4

0

Show file

File: test_iterables.py Project: siyer32/data_tools

 def test_two_sets_int(self):
     self.assertDictEqual(iterables.subsets([{0, 1, 2}, {2, 3, 4}]), {
         '11': set([2]),
         '10': set([0, 1]),
         '01': set([3, 4])
     })