Example #1
0
def _homogenize_dict(frames, intersect=True, dtype=None):
    """
    Conform set of DataFrame-like objects to either an intersection
    of indices / columns or a union.

    Parameters
    ----------
    frames : dict
    intersect : boolean, default True

    Returns
    -------
    dict of aligned frames, index, columns
    """
    result = {}

    adj_frames = {}
    for k, v in frames.iteritems():
        if isinstance(v, dict):
            adj_frames[k] = DataFrame(v)
        else:
            adj_frames[k] = v

    all_indexes = [df.index for df in adj_frames.values()]
    all_columns = [df.columns for df in adj_frames.values()]

    index = _get_combined_index(all_indexes, intersect=intersect)
    columns = _get_combined_index(all_columns, intersect=intersect)

    for key, frame in adj_frames.iteritems():
        result[key] = frame.reindex(index=index, columns=columns,
                                    copy=False)

    return result, index, columns
Example #2
0
    def _get_comb_axis(self, i):
        if self._is_series:
            all_indexes = [x.index for x in self.objs]
        else:
            all_indexes = [x._data.axes[i] for x in self.objs]

        return _get_combined_index(all_indexes, intersect=self.intersect)
Example #3
0
def _extract_axis(data, axis=0, intersect=False):
    if len(data) == 0:
        index = Index([])
    elif len(data) > 0:
        raw_lengths = []
        indexes = []

        have_raw_arrays = False
        have_frames = False

        for v in data.values():
            if isinstance(v, DataFrame):
                have_frames = True
                indexes.append(v._get_axis(axis))
            else:
                have_raw_arrays = True
                raw_lengths.append(v.shape[axis])

        if have_frames:
            index = _get_combined_index(indexes, intersect=intersect)

        if have_raw_arrays:
            lengths = list(set(raw_lengths))
            if len(lengths) > 1:
                raise ValueError('ndarrays must match shape on axis %d' % axis)

            if have_frames:
                assert(lengths[0] == len(index))
            else:
                index = Index(np.arange(lengths[0]))

    return _ensure_index(index)
Example #4
0
    def _get_comb_axis(self, i):
        if self._is_series:
            all_indexes = [x.index for x in self.objs]
        else:
            all_indexes = [x._data.axes[i] for x in self.objs]

        return _get_combined_index(all_indexes, intersect=self.intersect)
Example #5
0
def _extract_axis(data, axis=0, intersect=False):
    if len(data) == 0:
        index = Index([])
    elif len(data) > 0:
        raw_lengths = []
        indexes = []

        have_raw_arrays = False
        have_frames = False

        for v in data.values():
            if isinstance(v, DataFrame):
                have_frames = True
                indexes.append(v._get_axis(axis))
            else:
                have_raw_arrays = True
                raw_lengths.append(v.shape[axis])

        if have_frames:
            index = _get_combined_index(indexes, intersect=intersect)

        if have_raw_arrays:
            lengths = list(set(raw_lengths))
            if len(lengths) > 1:
                raise ValueError('ndarrays must match shape on axis %d' % axis)

            if have_frames:
                assert (lengths[0] == len(index))
            else:
                index = Index(np.arange(lengths[0]))

    return _ensure_index(index)
Example #6
0
    def _get_new_axes(self):
        ndim = self.objs[0].ndim
        new_axes = [None] * ndim

        if self.ignore_index:
            concat_axis = None
        else:
            concat_axis = self._get_concat_axis()

        new_axes[self.axis] = concat_axis

        if self.join_axes is None:
            for i in range(ndim):
                if i == self.axis:
                    continue
                all_indexes = [x._data.axes[i] for x in self.objs]
                comb_axis = _get_combined_index(all_indexes,
                                                intersect=self.intersect)
                new_axes[i] = comb_axis

        else:
            assert(len(self.join_axes) == ndim - 1)

            # ufff...
            indices = range(ndim)
            indices.remove(self.axis)

            for i, ax in zip(indices, self.join_axes):
                new_axes[i] = ax

        return new_axes
Example #7
0
    def _get_new_axes(self):
        ndim = self.objs[0].ndim
        new_axes = [None] * ndim

        if self.ignore_index:
            concat_axis = None
        else:
            concat_axis = _concat_indexes([x._data.axes[self.axis]
                                           for x in self.objs])
            self._maybe_check_integrity(concat_axis)

        new_axes[self.axis] = concat_axis

        if self.join_axes is None:
            for i in range(ndim):
                if i == self.axis:
                    continue
                all_indexes = [x._data.axes[i] for x in self.objs]
                comb_axis = _get_combined_index(all_indexes,
                                                intersect=self.intersect)
                new_axes[i] = comb_axis

        else:
            assert(len(self.join_axes) == ndim - 1)

            # ufff...
            indices = range(ndim)
            indices.remove(self.axis)

            for i, ax in zip(indices, self.join_axes):
                new_axes[i] = ax

        return new_axes
Example #8
0
    def _get_comb_axis(self, i):
        if self._is_series:
            all_indexes = [x.index for x in self.objs]
        else:
            try:
                all_indexes = [x._data.axes[i] for x in self.objs]
            except IndexError:
                types = [type(x).__name__ for x in self.objs]
                raise TypeError("Cannot concatenate list of %s" % types)

        return _get_combined_index(all_indexes, intersect=self.intersect)
Example #9
0
    def _extract_axis(self, data, axis=0, intersect=False):

        index = None
        if len(data) == 0:
            index = Index([])
        elif len(data) > 0:
            raw_lengths = []
            indexes = []

        have_raw_arrays = False
        have_frames = False

        for v in data.values():
            if isinstance(v, self._constructor_sliced):
                have_frames = True
                indexes.append(v._get_axis(axis))
            elif v is not None:
                have_raw_arrays = True
                raw_lengths.append(v.shape[axis])

        if have_frames:
            index = _get_combined_index(indexes, intersect=intersect)

        if have_raw_arrays:
            lengths = list(set(raw_lengths))
            if len(lengths) > 1:
                raise ValueError('ndarrays must match shape on axis %d' % axis)

            if have_frames:
                if lengths[0] != len(index):
                    raise AssertionError('Length of data and index must match')
            else:
                index = Index(np.arange(lengths[0]))

        if index is None:
            index = Index([])

        return _ensure_index(index)
Example #10
0
    def _extract_axis(self, data, axis=0, intersect=False):

        index = None
        if len(data) == 0:
            index = Index([])
        elif len(data) > 0:
            raw_lengths = []
            indexes = []

        have_raw_arrays = False
        have_frames = False

        for v in data.values():
            if isinstance(v, self._constructor_sliced):
                have_frames = True
                indexes.append(v._get_axis(axis))
            elif v is not None:
                have_raw_arrays = True
                raw_lengths.append(v.shape[axis])

        if have_frames:
            index = _get_combined_index(indexes, intersect=intersect)

        if have_raw_arrays:
            lengths = list(set(raw_lengths))
            if len(lengths) > 1:
                raise ValueError('ndarrays must match shape on axis %d' % axis)

            if have_frames:
                if lengths[0] != len(index):
                    raise AssertionError('Length of data and index must match')
            else:
                index = Index(np.arange(lengths[0]))

        if index is None:
            index = Index([])

        return _ensure_index(index)
Example #11
0
def test_get_combined_index():
    from pandas.core.index import _get_combined_index, NULL_INDEX

    result = _get_combined_index([])
    assert(result is NULL_INDEX)
Example #12
0
def crosstab(index, columns, values=None, rownames=None, colnames=None,
             aggfunc=None, margins=False, margins_name='All', dropna=True,
             normalize=False):
    """
    Compute a simple cross-tabulation of two (or more) factors. By default
    computes a frequency table of the factors unless an array of values and an
    aggregation function are passed

    Parameters
    ----------
    index : array-like, Series, or list of arrays/Series
        Values to group by in the rows
    columns : array-like, Series, or list of arrays/Series
        Values to group by in the columns
    values : array-like, optional
        Array of values to aggregate according to the factors.
        Requires `aggfunc` be specified.
    aggfunc : function, optional
        If specified, requires `values` be specified as well
    rownames : sequence, default None
        If passed, must match number of row arrays passed
    colnames : sequence, default None
        If passed, must match number of column arrays passed
    margins : boolean, default False
        Add row/column margins (subtotals)
    margins_name : string, default 'All'
        Name of the row / column that will contain the totals
        when margins is True.

        .. versionadded:: 0.21.0

    dropna : boolean, default True
        Do not include columns whose entries are all NaN
    normalize : boolean, {'all', 'index', 'columns'}, or {0,1}, default False
        Normalize by dividing all values by the sum of values.

        - If passed 'all' or `True`, will normalize over all values.
        - If passed 'index' will normalize over each row.
        - If passed 'columns' will normalize over each column.
        - If margins is `True`, will also normalize margin values.

        .. versionadded:: 0.18.1


    Notes
    -----
    Any Series passed will have their name attributes used unless row or column
    names for the cross-tabulation are specified.

    Any input passed containing Categorical data will have **all** of its
    categories included in the cross-tabulation, even if the actual data does
    not contain any instances of a particular category.

    In the event that there aren't overlapping indexes an empty DataFrame will
    be returned.

    Examples
    --------
    >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar",
    ...               "bar", "bar", "foo", "foo", "foo"], dtype=object)
    >>> b = np.array(["one", "one", "one", "two", "one", "one",
    ...               "one", "two", "two", "two", "one"], dtype=object)
    >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny",
    ...               "shiny", "dull", "shiny", "shiny", "shiny"],
    ...               dtype=object)

    >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
    ... # doctest: +NORMALIZE_WHITESPACE
    b   one        two
    c   dull shiny dull shiny
    a
    bar    1     2    1     0
    foo    2     2    1     2

    >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
    >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
    >>> crosstab(foo, bar)  # 'c' and 'f' are not represented in the data,
    ...                     # but they still will be counted in the output
    ... # doctest: +SKIP
    col_0  d  e  f
    row_0
    a      1  0  0
    b      0  1  0
    c      0  0  0

    Returns
    -------
    crosstab : DataFrame
    """

    index = com._maybe_make_list(index)
    columns = com._maybe_make_list(columns)

    rownames = _get_names(index, rownames, prefix='row')
    colnames = _get_names(columns, colnames, prefix='col')

    obs_idxes = [obj.index for objs in (index, columns) for obj in objs
                 if hasattr(obj, 'index')]
    if obs_idxes:
        common_idx = _get_combined_index(obs_idxes, intersect=True)
    else:
        common_idx = None

    data = {}
    data.update(zip(rownames, index))
    data.update(zip(colnames, columns))

    if values is None and aggfunc is not None:
        raise ValueError("aggfunc cannot be used without values.")

    if values is not None and aggfunc is None:
        raise ValueError("values cannot be used without an aggfunc.")

    df = DataFrame(data, index=common_idx)
    if values is None:
        df['__dummy__'] = 0
        kwargs = {'aggfunc': len, 'fill_value': 0}
    else:
        df['__dummy__'] = values
        kwargs = {'aggfunc': aggfunc}

    table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
                           margins=margins, margins_name=margins_name,
                           dropna=dropna, **kwargs)

    # Post-process
    if normalize is not False:
        table = _normalize(table, normalize=normalize, margins=margins,
                           margins_name=margins_name)

    return table
Example #13
0
def test_get_combined_index():
    from pandas.core.index import _get_combined_index, NULL_INDEX

    result = _get_combined_index([])
    assert (result is NULL_INDEX)
Example #14
0
def test_get_combined_index():
    from pandas.core.index import _get_combined_index

    result = _get_combined_index([])
    assert result.equals(Index([]))