def test_strings(self): values = ["foo", "bar", "baz"] to_match = ["bar", "foo", "qux", "foo", "bar", "baz", "qux"] result = algos.match(to_match, values) expected = np.array([1, 0, -1, 0, 1, 2, -1]) self.assert_numpy_array_equal(result, expected) result = Series(algos.match(to_match, values, np.nan)) expected = Series(np.array([1, 0, np.nan, 0, 1, 2, np.nan])) tm.assert_series_equal(result, expected)
def test_strings(self): values = ['foo', 'bar', 'baz'] to_match = ['bar', 'foo', 'qux', 'foo', 'bar', 'baz', 'qux'] result = algos.match(to_match, values) expected = np.array([1, 0, -1, 0, 1, 2, -1]) self.assert_(np.array_equal(result, expected)) result = Series(algos.match(to_match, values, np.nan)) expected = Series(np.array([1, 0, np.nan, 0, 1, 2, np.nan])) tm.assert_series_equal(result,expected)
def test_strings(self): values = ["foo", "bar", "baz"] to_match = ["bar", "foo", "qux", "foo", "bar", "baz", "qux"] result = algos.match(to_match, values) expected = np.array([1, 0, -1, 0, 1, 2, -1]) self.assert_(np.array_equal(result, expected))
def test_ints(self): values = np.array([0, 2, 1]) to_match = np.array([0, 1, 2, 2, 0, 1, 3, 0]) result = algos.match(to_match, values) expected = np.array([0, 2, 1, 1, 0, 2, -1, 0]) self.assert_(np.array_equal(result, expected))
def match(*args, **kwargs): import warnings warnings.warn("pd.match() is deprecated and will be removed " "in a future version", FutureWarning, stacklevel=2) from pandas.core.algorithms import match return match(*args, **kwargs)
def test_ints(self): values = np.array([0, 2, 1]) to_match = np.array([0, 1, 2, 2, 0, 1, 3, 0]) result = algos.match(to_match, values) expected = np.array([0, 2, 1, 1, 0, 2, -1, 0]) self.assert_(np.array_equal(result, expected)) result = Series(algos.match(to_match, values, np.nan)) expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0])) tm.assert_series_equal(result,expected) s = pd.Series(np.arange(5),dtype=np.float32) result = algos.match(s, [2,4]) expected = np.array([-1, -1, 0, -1, 1]) self.assert_(np.array_equal(result, expected)) result = Series(algos.match(s, [2,4], np.nan)) expected = Series(np.array([np.nan, np.nan, 0, np.nan, 1])) tm.assert_series_equal(result,expected)
def test_ints(self): values = np.array([0, 2, 1]) to_match = np.array([0, 1, 2, 2, 0, 1, 3, 0]) result = algos.match(to_match, values) expected = np.array([0, 2, 1, 1, 0, 2, -1, 0]) self.assert_numpy_array_equal(result, expected) result = Series(algos.match(to_match, values, np.nan)) expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0])) tm.assert_series_equal(result, expected) s = pd.Series(np.arange(5), dtype=np.float32) result = algos.match(s, [2, 4]) expected = np.array([-1, -1, 0, -1, 1]) self.assert_numpy_array_equal(result, expected) result = Series(algos.match(s, [2, 4], np.nan)) expected = Series(np.array([np.nan, np.nan, 0, np.nan, 1])) tm.assert_series_equal(result, expected)
def _read_panel_table(self, group, where=None): table = getattr(group, 'table') fields = table._v_attrs.fields # create the selection sel = Selection(table, where, table._v_attrs.index_kind) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) values = sel.values['values'] major = Factor.from_array(index) minor = Factor.from_array(columns) J, K = len(major.levels), len(minor.levels) key = major.labels * K + minor.labels if len(unique(key)) == len(key): sorter, _ = lib.groupsort_indexer(com._ensure_int64(key), J * K) sorter = com._ensure_platform_int(sorter) # the data need to be sorted sorted_values = values.take(sorter, axis=0) major_labels = major.labels.take(sorter) minor_labels = minor.labels.take(sorter) block = block2d_to_block3d(sorted_values, fields, (J, K), major_labels, minor_labels) mgr = BlockManager([block], [block.ref_items, major.levels, minor.levels]) wp = Panel(mgr) else: if not self._quiet: # pragma: no cover print( 'Duplicate entries in table, taking most recently ' 'appended') # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = DataFrame(values, index=long_index, columns=fields) # need a better algorithm tuple_index = long_index._tuple_index unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) indexer = com._ensure_platform_int(indexer) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = DataFrame(new_values, index=new_index, columns=lp.columns) wp = lp.to_panel() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
def _read_panel_table(self, group, where=None): table = getattr(group, 'table') fields = table._v_attrs.fields # create the selection sel = Selection(table, where, table._v_attrs.index_kind) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) values = sel.values['values'] major = Factor(index) minor = Factor(columns) J, K = len(major.levels), len(minor.levels) key = major.labels * K + minor.labels if len(unique(key)) == len(key): sorter, _ = lib.groupsort_indexer(key, J * K) # the data need to be sorted sorted_values = values.take(sorter, axis=0) major_labels = major.labels.take(sorter) minor_labels = minor.labels.take(sorter) block = block2d_to_block3d(sorted_values, fields, (J, K), major_labels, minor_labels) mgr = BlockManager([block], [block.items, major.levels, minor.levels]) wp = Panel(mgr) else: if not self._quiet: # pragma: no cover print ('Duplicate entries in table, taking most recently ' 'appended') # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = DataFrame(values, index=long_index, columns=fields) # need a better algorithm tuple_index = long_index.get_tuple_index() unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = DataFrame(new_values, index=new_index, columns=lp.columns) wp = lp.to_panel() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp