Exemple #1
0
def panel_index(time, panels, names=['time', 'panel']):
    """
    Returns a multi-index suitable for a panel-like DataFrame

    Parameters
    ----------
    time : array-like
        Time index, does not have to repeat
    panels : array-like
        Panel index, does not have to repeat
    names : list, optional
        List containing the names of the indices

    Returns
    -------
    multi_index : MultiIndex
        Time index is the first level, the panels are the second level.

    Examples
    --------
    >>> years = range(1960,1963)
    >>> panels = ['A', 'B', 'C']
    >>> panel_idx = panel_index(years, panels)
    >>> panel_idx
    MultiIndex([(1960, 'A'), (1961, 'A'), (1962, 'A'), (1960, 'B'),
                (1961, 'B'), (1962, 'B'), (1960, 'C'), (1961, 'C'),
                (1962, 'C')], dtype=object)

    or

    >>> import numpy as np
    >>> years = np.repeat(range(1960,1963), 3)
    >>> panels = np.tile(['A', 'B', 'C'], 3)
    >>> panel_idx = panel_index(years, panels)
    >>> panel_idx
    MultiIndex([(1960, 'A'), (1960, 'B'), (1960, 'C'), (1961, 'A'),
                (1961, 'B'), (1961, 'C'), (1962, 'A'), (1962, 'B'),
                (1962, 'C')], dtype=object)
    """
    time, panels = _ensure_like_indices(time, panels)
    time_factor = Factor.from_array(time)
    panel_factor = Factor.from_array(panels)

    labels = [time_factor.labels, panel_factor.labels]
    levels = [time_factor.levels, panel_factor.levels]
    return MultiIndex(levels, labels, sortorder=None, names=names)
Exemple #2
0
def _bins_to_cuts(x,
                  bins,
                  right=True,
                  labels=None,
                  retbins=False,
                  precision=3,
                  name=None):
    if name is None and isinstance(x, Series):
        name = x.name
    x = np.asarray(x)

    side = 'left' if right else 'right'
    ids = bins.searchsorted(x, side=side)

    mask = com.isnull(x)
    has_nas = mask.any()

    if labels is not False:
        if labels is None:
            labels = bins
        else:
            if len(labels) != len(bins):
                raise ValueError('labels must be same length as bins')

        fmt = lambda v: _format_label(v, precision=precision)
        if right:
            levels = [
                '(%s, %s]' % (fmt(a), fmt(b))
                for a, b in zip(labels, labels[1:])
            ]
        else:
            levels = [
                '[%s, %s)' % (fmt(a), fmt(b))
                for a, b in zip(labels, labels[1:])
            ]

        levels = np.asarray(levels, dtype=object)

        if has_nas:
            np.putmask(ids, mask, 0)

        fac = Factor(ids - 1, levels, name=name)
    else:
        fac = ids - 1
        if has_nas:
            fac = ids.astype(np.float64)
            np.putmask(fac, mask, np.nan)

    if not retbins:
        return fac

    return fac, bins
Exemple #3
0
    def _read_panel_table(self, group, where=None):
        table = getattr(group, 'table')
        fields = table._v_attrs.fields

        # create the selection
        sel = Selection(table, where, table._v_attrs.index_kind)
        sel.select()
        fields = table._v_attrs.fields

        columns = _maybe_convert(sel.values['column'],
                                 table._v_attrs.columns_kind)
        index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind)
        values = sel.values['values']

        major = Factor.from_array(index)
        minor = Factor.from_array(columns)

        J, K = len(major.levels), len(minor.levels)
        key = major.labels * K + minor.labels

        if len(unique(key)) == len(key):
            sorter, _ = lib.groupsort_indexer(com._ensure_int64(key), J * K)
            sorter = com._ensure_platform_int(sorter)

            # the data need to be sorted
            sorted_values = values.take(sorter, axis=0)
            major_labels = major.labels.take(sorter)
            minor_labels = minor.labels.take(sorter)

            block = block2d_to_block3d(sorted_values, fields, (J, K),
                                       major_labels, minor_labels)

            mgr = BlockManager([block],
                               [block.ref_items, major.levels, minor.levels])
            wp = Panel(mgr)
        else:
            if not self._quiet:  # pragma: no cover
                print(
                    'Duplicate entries in table, taking most recently '
                    'appended')

            # reconstruct
            long_index = MultiIndex.from_arrays([index, columns])
            lp = DataFrame(values, index=long_index, columns=fields)

            # need a better algorithm
            tuple_index = long_index.get_tuple_index()

            unique_tuples = lib.fast_unique(tuple_index)
            unique_tuples = _asarray_tuplesafe(unique_tuples)

            indexer = match(unique_tuples, tuple_index)
            indexer = com._ensure_platform_int(indexer)

            new_index = long_index.take(indexer)
            new_values = lp.values.take(indexer, axis=0)

            lp = DataFrame(new_values, index=new_index, columns=lp.columns)
            wp = lp.to_panel()

        if sel.column_filter:
            new_minor = sorted(set(wp.minor_axis) & sel.column_filter)
            wp = wp.reindex(minor=new_minor)
        return wp
Exemple #4
0
    def _read_panel_table(self, group, where=None):
        table = getattr(group, 'table')
        fields = table._v_attrs.fields

        # create the selection
        sel = Selection(table, where, table._v_attrs.index_kind)
        sel.select()
        fields = table._v_attrs.fields

        columns = _maybe_convert(sel.values['column'],
                                 table._v_attrs.columns_kind)
        index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind)
        values = sel.values['values']

        major = Factor.from_array(index)
        minor = Factor.from_array(columns)

        J, K = len(major.levels), len(minor.levels)
        key = major.labels * K + minor.labels

        if len(unique(key)) == len(key):
            sorter, _ = lib.groupsort_indexer(com._ensure_int64(key), J * K)
            sorter = com._ensure_platform_int(sorter)

            # the data need to be sorted
            sorted_values = values.take(sorter, axis=0)
            major_labels = major.labels.take(sorter)
            minor_labels = minor.labels.take(sorter)

            block = block2d_to_block3d(sorted_values, fields, (J, K),
                                       major_labels, minor_labels)

            mgr = BlockManager([block], [block.ref_items,
                                         major.levels, minor.levels])
            wp = Panel(mgr)
        else:
            if not self._quiet:  # pragma: no cover
                print ('Duplicate entries in table, taking most recently '
                       'appended')

            # reconstruct
            long_index = MultiIndex.from_arrays([index, columns])
            lp = DataFrame(values, index=long_index, columns=fields)

            # need a better algorithm
            tuple_index = long_index.get_tuple_index()

            unique_tuples = lib.fast_unique(tuple_index)
            unique_tuples = _asarray_tuplesafe(unique_tuples)

            indexer = match(unique_tuples, tuple_index)
            indexer = com._ensure_platform_int(indexer)

            new_index = long_index.take(indexer)
            new_values = lp.values.take(indexer, axis=0)

            lp = DataFrame(new_values, index=new_index, columns=lp.columns)
            wp = lp.to_panel()

        if sel.column_filter:
            new_minor = sorted(set(wp.minor_axis) & sel.column_filter)
            wp = wp.reindex(minor=new_minor)
        return wp
Exemple #5
0
    def test_constructor_unsortable(self):
        arr = np.array([1, 2, 3, datetime.now()], dtype='O')

        # it works!
        factor = Factor.from_array(arr)
Exemple #6
0
 def setUp(self):
     self.factor = Factor.from_array(['a', 'b', 'b', 'a',
                                      'a', 'c', 'c', 'c'])
Exemple #7
0
    def test_constructor_unsortable(self):
        arr = np.array([1, 2, 3, datetime.now()], dtype='O')

        # it works!
        factor = Factor.from_array(arr)
Exemple #8
0
 def setUp(self):
     self.factor = Factor.from_array(
         ['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
Exemple #9
0
def _make_concat_multiindex(indexes, keys, levels=None, names=None):
    if ((levels is None and isinstance(keys[0], tuple))
            or (levels is not None and len(levels) > 1)):
        zipped = zip(*keys)
        if names is None:
            names = [None] * len(zipped)

        if levels is None:
            levels = [Factor.from_array(zp).levels for zp in zipped]
        else:
            levels = [_ensure_index(x) for x in levels]
    else:
        zipped = [keys]
        if names is None:
            names = [None]

        if levels is None:
            levels = [_ensure_index(keys)]
        else:
            levels = [_ensure_index(x) for x in levels]

    if not _all_indexes_same(indexes):
        label_list = []

        # things are potentially different sizes, so compute the exact labels
        # for each level and pass those to MultiIndex.from_arrays

        for hlevel, level in zip(zipped, levels):
            to_concat = []
            for key, index in zip(hlevel, indexes):
                try:
                    i = level.get_loc(key)
                except KeyError:
                    raise ValueError('Key %s not in level %s' %
                                     (str(key), str(level)))

                to_concat.append(np.repeat(i, len(index)))
            label_list.append(np.concatenate(to_concat))

        concat_index = _concat_indexes(indexes)

        # these go at the end
        if isinstance(concat_index, MultiIndex):
            levels.extend(concat_index.levels)
            label_list.extend(concat_index.labels)
        else:
            factor = Factor.from_array(concat_index)
            levels.append(factor.levels)
            label_list.append(factor.labels)

        if len(names) == len(levels):
            names = list(names)
        else:
            # also copies
            names = names + _get_consensus_names(indexes)

        return MultiIndex(levels=levels, labels=label_list, names=names)

    new_index = indexes[0]
    n = len(new_index)
    kpieces = len(indexes)

    # also copies
    new_names = list(names)
    new_levels = list(levels)

    # construct labels
    new_labels = []

    # do something a bit more speedy

    for hlevel, level in zip(zipped, levels):
        hlevel = _ensure_index(hlevel)
        mapped = level.get_indexer(hlevel)

        mask = mapped == -1
        if mask.any():
            raise ValueError('Values not found in passed level: %s' %
                             str(hlevel[mask]))

        new_labels.append(np.repeat(mapped, n))

    if isinstance(new_index, MultiIndex):
        new_levels.extend(new_index.levels)
        new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels])
    else:
        new_levels.append(new_index)
        new_labels.append(np.tile(np.arange(n), kpieces))

    if len(new_names) < len(new_levels):
        new_names.extend(new_index.names)

    return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
Exemple #10
0
def _make_concat_multiindex(indexes, keys, levels=None, names=None):
    if ((levels is None and isinstance(keys[0], tuple)) or
        (levels is not None and len(levels) > 1)):
        zipped = zip(*keys)
        if names is None:
            names = [None] * len(zipped)

        if levels is None:
            levels = [Factor.from_array(zp).levels for zp in zipped]
        else:
            levels = [_ensure_index(x) for x in levels]
    else:
        zipped = [keys]
        if names is None:
            names = [None]

        if levels is None:
            levels = [_ensure_index(keys)]
        else:
            levels = [_ensure_index(x) for x in levels]

    if not _all_indexes_same(indexes):
        label_list = []

        # things are potentially different sizes, so compute the exact labels
        # for each level and pass those to MultiIndex.from_arrays

        for hlevel, level in zip(zipped, levels):
            to_concat = []
            for key, index in zip(hlevel, indexes):
                try:
                    i = level.get_loc(key)
                except KeyError:
                    raise ValueError('Key %s not in level %s' % (str(key), str(level)))

                to_concat.append(np.repeat(i, len(index)))
            label_list.append(np.concatenate(to_concat))

        concat_index = _concat_indexes(indexes)

        # these go at the end
        if isinstance(concat_index, MultiIndex):
            levels.extend(concat_index.levels)
            label_list.extend(concat_index.labels)
        else:
            factor = Factor.from_array(concat_index)
            levels.append(factor.levels)
            label_list.append(factor.labels)

        if len(names) == len(levels):
            names = list(names)
        else:
            # also copies
            names = names + _get_consensus_names(indexes)

        return MultiIndex(levels=levels, labels=label_list, names=names)

    new_index = indexes[0]
    n = len(new_index)
    kpieces = len(indexes)

    # also copies
    new_names = list(names)
    new_levels = list(levels)

    # construct labels
    new_labels = []

    # do something a bit more speedy

    for hlevel, level in zip(zipped, levels):
        hlevel = _ensure_index(hlevel)
        mapped = level.get_indexer(hlevel)

        mask = mapped == -1
        if mask.any():
            raise ValueError('Values not found in passed level: %s'
                             % str(hlevel[mask]))

        new_labels.append(np.repeat(mapped, n))

    if isinstance(new_index, MultiIndex):
        new_levels.extend(new_index.levels)
        new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels])
    else:
        new_levels.append(new_index)
        new_labels.append(np.tile(np.arange(n), kpieces))

    if len(new_names) < len(new_levels):
        new_names.extend(new_index.names)

    return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
Exemple #11
0
def _bins_to_cuts(x,
                  bins,
                  right=True,
                  labels=None,
                  retbins=False,
                  precision=3,
                  name=None):
    if name is None and isinstance(x, Series):
        name = x.name
    x = np.asarray(x)

    side = 'left' if right else 'right'
    ids = bins.searchsorted(x, side=side)

    na_mask = com.notnull(x)
    above = na_mask & (ids == len(bins))
    below = na_mask & (ids == 0)

    if above.any():
        raise ValueError('Values fall past last bin: %s' % str(x[above]))

    if below.any():
        raise ValueError('Values fall before first bin: %s' % str(x[below]))

    mask = com.isnull(x)
    has_nas = mask.any()

    if labels is not False:
        if labels is None:
            fmt = lambda v: _format_label(v, precision=precision)
            if right:
                levels = [
                    '(%s, %s]' % (fmt(a), fmt(b))
                    for a, b in zip(bins, bins[1:])
                ]
            else:
                levels = [
                    '[%s, %s)' % (fmt(a), fmt(b))
                    for a, b in zip(bins, bins[1:])
                ]
        else:
            if len(labels) != len(bins) - 1:
                raise ValueError('Bin labels must be one fewer than '
                                 'the number of bin edges')
            levels = labels

        levels = np.asarray(levels, dtype=object)

        if has_nas:
            np.putmask(ids, mask, 0)

        fac = Factor(ids - 1, levels, name=name)
    else:
        fac = ids - 1
        if has_nas:
            fac = ids.astype(np.float64)
            np.putmask(fac, mask, np.nan)

    if not retbins:
        return fac

    return fac, bins