Ejemplo n.º 1
0
    def test_booleanindex(self):
        boolIdx = np.repeat(True, len(self.strIndex)).astype(bool)
        boolIdx[5:30:2] = False

        subIndex = self.strIndex[boolIdx]
        tm.assert_dict_equal(tseries.map_indices_object(subIndex), subIndex.indexMap)

        subIndex = self.strIndex[list(boolIdx)]
        tm.assert_dict_equal(tseries.map_indices_object(subIndex), subIndex.indexMap)
Ejemplo n.º 2
0
    def test_booleanindex(self):
        boolIdx = np.repeat(True, len(self.strIndex)).astype(bool)
        boolIdx[5:30:2] = False

        subIndex = self.strIndex[boolIdx]
        tm.assert_dict_equal(tseries.map_indices_object(subIndex),
                             subIndex.indexMap)

        subIndex = self.strIndex[list(boolIdx)]
        tm.assert_dict_equal(tseries.map_indices_object(subIndex),
                             subIndex.indexMap)
Ejemplo n.º 3
0
    def indexMap(self):
        "{label -> location}"
        if self._indexMap is None:
            self._indexMap = lib.map_indices_object(self)
            self._integrity = len(self._indexMap) == len(self)

        if not self._integrity:
            raise Exception("Index cannot contain duplicate values!")
        return self._indexMap
Ejemplo n.º 4
0
    def _read_panel_table(self, group, where=None):
        from pandas.core.common import _asarray_tuplesafe

        table = getattr(group, 'table')

        # create the selection
        sel = Selection(table, where)
        sel.select()
        fields = table._v_attrs.fields

        columns = _maybe_convert(sel.values['column'],
                                 table._v_attrs.columns_kind)
        index = _maybe_convert(sel.values['index'],
                               table._v_attrs.index_kind)
        # reconstruct
        long_index = MultiIndex.from_arrays([index, columns])
        lp = LongPanel(sel.values['values'], index=long_index,
                       columns=fields)

        if lp.consistent:
            lp = lp.sortlevel(level=0)
            wp = lp.to_wide()
        else:
            if not self._quiet:  # pragma: no cover
                print ('Duplicate entries in table, taking most recently '
                       'appended')

            # need a better algorithm
            tuple_index = long_index.get_tuple_index()
            index_map = lib.map_indices_object(tuple_index)

            unique_tuples = lib.fast_unique(tuple_index)
            unique_tuples = _asarray_tuplesafe(unique_tuples)

            indexer = lib.merge_indexer_object(unique_tuples, index_map)

            new_index = long_index.take(indexer)
            new_values = lp.values.take(indexer, axis=0)

            lp = LongPanel(new_values, index=new_index, columns=lp.columns)
            wp = lp.to_wide()

        if sel.column_filter:
            new_minor = sorted(set(wp.minor_axis) & sel.column_filter)
            wp = wp.reindex(minor=new_minor)
        return wp
Ejemplo n.º 5
0
    def _read_panel_table(self, group, where=None):
        from pandas.core.common import _asarray_tuplesafe

        table = getattr(group, 'table')

        # create the selection
        sel = Selection(table, where)
        sel.select()
        fields = table._v_attrs.fields

        columns = _maybe_convert(sel.values['column'],
                                 table._v_attrs.columns_kind)
        index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind)
        # reconstruct
        long_index = MultiIndex.from_arrays([index, columns])
        lp = LongPanel(sel.values['values'], index=long_index, columns=fields)

        if lp.consistent:
            lp = lp.sortlevel(level=0)
            wp = lp.to_wide()
        else:
            if not self._quiet:  # pragma: no cover
                print(
                    'Duplicate entries in table, taking most recently '
                    'appended')

            # need a better algorithm
            tuple_index = long_index.get_tuple_index()
            index_map = lib.map_indices_object(tuple_index)

            unique_tuples = lib.fast_unique(tuple_index)
            unique_tuples = _asarray_tuplesafe(unique_tuples)

            indexer = lib.merge_indexer_object(unique_tuples, index_map)

            new_index = long_index.take(indexer)
            new_values = lp.values.take(indexer, axis=0)

            lp = LongPanel(new_values, index=new_index, columns=lp.columns)
            wp = lp.to_wide()

        if sel.column_filter:
            new_minor = sorted(set(wp.minor_axis) & sel.column_filter)
            wp = wp.reindex(minor=new_minor)
        return wp
Ejemplo n.º 6
0
def test_pad_backfill_object_segfault():
    from datetime import datetime
    old = np.array([], dtype='O')
    new = np.array([datetime(2010, 12, 31)], dtype='O')

    result = lib.pad_object(old, new, lib.map_indices_object(old),
                            lib.map_indices_object(new))
    expected = np.array([-1], dtype='i4')
    assert (np.array_equal(result, expected))

    result = lib.pad_object(new, old, lib.map_indices_object(new),
                            lib.map_indices_object(old))
    expected = np.array([], dtype='i4')
    assert (np.array_equal(result, expected))

    result = lib.backfill_object(old, new, lib.map_indices_object(old),
                                 lib.map_indices_object(new))
    expected = np.array([-1], dtype='i4')
    assert (np.array_equal(result, expected))

    result = lib.backfill_object(new, old, lib.map_indices_object(new),
                                 lib.map_indices_object(old))
    expected = np.array([], dtype='i4')
    assert (np.array_equal(result, expected))
Ejemplo n.º 7
0
def test_pad_backfill_object_segfault():
    from datetime import datetime
    old = np.array([], dtype='O')
    new = np.array([datetime(2010, 12, 31)], dtype='O')

    result = lib.pad_object(old, new, lib.map_indices_object(old),
                            lib.map_indices_object(new))
    expected = np.array([-1], dtype='i4')
    assert(np.array_equal(result, expected))

    result = lib.pad_object(new, old, lib.map_indices_object(new),
                            lib.map_indices_object(old))
    expected = np.array([], dtype='i4')
    assert(np.array_equal(result, expected))

    result = lib.backfill_object(old, new, lib.map_indices_object(old),
                                 lib.map_indices_object(new))
    expected = np.array([-1], dtype='i4')
    assert(np.array_equal(result, expected))

    result = lib.backfill_object(new, old, lib.map_indices_object(new),
                            lib.map_indices_object(old))
    expected = np.array([], dtype='i4')
    assert(np.array_equal(result, expected))
Ejemplo n.º 8
0
    def _read_panel_table(self, group, where=None):
        from pandas.core.index import unique_int64, Factor
        from pandas.core.common import _asarray_tuplesafe
        from pandas.core.internals import BlockManager
        from pandas.core.reshape import block2d_to_block3d

        table = getattr(group, 'table')
        fields = table._v_attrs.fields

        # create the selection
        sel = Selection(table, where, table._v_attrs.index_kind)
        sel.select()
        fields = table._v_attrs.fields

        columns = _maybe_convert(sel.values['column'],
                                 table._v_attrs.columns_kind)
        index = _maybe_convert(sel.values['index'],
                               table._v_attrs.index_kind)
        values = sel.values['values']

        major = Factor(index)
        minor = Factor(columns)

        J, K = len(major.levels), len(minor.levels)
        key = major.labels * K + minor.labels

        if len(unique_int64(key)) == len(key):
            sorter, _ = lib.groupsort_indexer(key, J * K)

            # the data need to be sorted
            sorted_values = values.take(sorter, axis=0)
            major_labels = major.labels.take(sorter)
            minor_labels = minor.labels.take(sorter)

            block = block2d_to_block3d(sorted_values, fields, (J, K),
                                       major_labels, minor_labels)

            mgr = BlockManager([block], [block.items,
                                         major.levels, minor.levels])
            wp = Panel(mgr)
        else:
            if not self._quiet:  # pragma: no cover
                print ('Duplicate entries in table, taking most recently '
                       'appended')

            # reconstruct
            long_index = MultiIndex.from_arrays([index, columns])
            lp = DataFrame(values, index=long_index, columns=fields)

            # need a better algorithm
            tuple_index = long_index.get_tuple_index()
            index_map = lib.map_indices_object(tuple_index)

            unique_tuples = lib.fast_unique(tuple_index)
            unique_tuples = _asarray_tuplesafe(unique_tuples)

            indexer = match(unique_tuples, tuple_index)

            new_index = long_index.take(indexer)
            new_values = lp.values.take(indexer, axis=0)

            lp = DataFrame(new_values, index=new_index, columns=lp.columns)
            wp = lp.to_panel()

        if sel.column_filter:
            new_minor = sorted(set(wp.minor_axis) & sel.column_filter)
            wp = wp.reindex(minor=new_minor)
        return wp
Ejemplo n.º 9
0
def lookup_python(values):
    table = lib.map_indices_object(values)
    return _timeit(lambda: lib.merge_indexer_object(values, table))
Ejemplo n.º 10
0
def map_locations_python_object():
    arr = string_test_data(N)
    return _timeit(lambda: lib.map_indices_object(arr))
Ejemplo n.º 11
0
def lookup_python(values):
    table = lib.map_indices_object(values)
    return _timeit(lambda: lib.merge_indexer_object(values, table))
Ejemplo n.º 12
0
def map_locations_python_object():
    arr = string_test_data(N)
    return _timeit(lambda: lib.map_indices_object(arr))
Ejemplo n.º 13
0
    def _read_panel_table(self, group, where=None):
        from pandas.core.index import unique_int64, Factor
        from pandas.core.common import _asarray_tuplesafe
        from pandas.core.internals import BlockManager
        from pandas.core.reshape import block2d_to_block3d

        table = getattr(group, 'table')

        # create the selection
        sel = Selection(table, where)
        sel.select()
        fields = table._v_attrs.fields

        columns = _maybe_convert(sel.values['column'],
                                 table._v_attrs.columns_kind)
        index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind)
        values = sel.values['values']

        major = Factor(index)
        minor = Factor(columns)

        J, K = len(major.levels), len(minor.levels)
        key = major.labels * K + minor.labels

        if len(unique_int64(key)) == len(key):
            sorter, _ = lib.groupsort_indexer(key, J * K)

            # the data need to be sorted
            sorted_values = values.take(sorter, axis=0)
            major_labels = major.labels.take(sorter)
            minor_labels = minor.labels.take(sorter)

            block = block2d_to_block3d(sorted_values, fields, (J, K),
                                       major_labels, minor_labels)

            mgr = BlockManager([block],
                               [block.items, major.levels, minor.levels])
            wp = Panel(mgr)
        else:
            if not self._quiet:  # pragma: no cover
                print(
                    'Duplicate entries in table, taking most recently '
                    'appended')

            # reconstruct
            long_index = MultiIndex.from_arrays([index, columns])
            lp = DataFrame(values, index=long_index, columns=fields)

            # need a better algorithm
            tuple_index = long_index.get_tuple_index()
            index_map = lib.map_indices_object(tuple_index)

            unique_tuples = lib.fast_unique(tuple_index)
            unique_tuples = _asarray_tuplesafe(unique_tuples)

            indexer = lib.merge_indexer_object(unique_tuples, index_map)

            new_index = long_index.take(indexer)
            new_values = lp.values.take(indexer, axis=0)

            lp = DataFrame(new_values, index=new_index, columns=lp.columns)
            wp = lp.to_panel()

        if sel.column_filter:
            new_minor = sorted(set(wp.minor_axis) & sel.column_filter)
            wp = wp.reindex(minor=new_minor)
        return wp