def test_value_counts_bins(self): s = [1, 2, 3, 4] result = algos.value_counts(s, bins=1) self.assertEqual(result.tolist(), [4]) self.assertEqual(result.index[0], 0.997) result = algos.value_counts(s, bins=2, sort=False) self.assertEqual(result.tolist(), [2, 2]) self.assertEqual(result.index[0], 0.997) self.assertEqual(result.index[1], 2.5)
def test_value_counts(self): from pandas.tools.tile import cut arr = np.random.randn(4) factor = cut(arr, 4) tm.assert_isinstance(factor, Categorical) result = algos.value_counts(factor) expected = algos.value_counts(np.asarray(factor)) tm.assert_series_equal(result, expected)
def test_value_counts_dtypes(self): result = algos.value_counts([1, 1.]) self.assertEqual(len(result), 1) result = algos.value_counts([1, 1.], bins=1) self.assertEqual(len(result), 1) result = algos.value_counts(Series([1, 1., '1'])) # object self.assertEqual(len(result), 2) self.assertRaises(TypeError, lambda s: algos.value_counts(s, bins=1), ['1', 1])
def test_value_counts_nat(self): td = Series([np.timedelta64(10000), pd.NaT], dtype='timedelta64[ns]') dt = pd.to_datetime(['NaT', '2014-01-01']) for s in [td, dt]: vc = algos.value_counts(s) vc_with_na = algos.value_counts(s, dropna=False) self.assertEqual(len(vc), 1) self.assertEqual(len(vc_with_na), 2) exp_dt = pd.Series({pd.Timestamp('2014-01-01 00:00:00'): 1}) tm.assert_series_equal(algos.value_counts(dt), exp_dt)
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ Returns object containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default. Parameters ---------- normalize : boolean, default False If True then the object returned will contain the relative frequencies of the unique values. sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order bins : integer, optional Rather than count values, group them into half-open bins, a convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN. Returns ------- counts : Series """ from pandas.core.algorithms import value_counts result = value_counts(self, sort=sort, ascending=ascending, normalize=normalize, bins=bins, dropna=dropna) return result
def value_counts(self, dropna=False): from pandas import Series, PeriodIndex if dropna: values = self[~self.isna()]._data else: values = self._data cls = type(self) result = algos.value_counts(values, sort=False) index = PeriodIndex(cls(result.index, freq=self.freq), name=result.index.name) return Series(result.values, index=index, name=result.name)
def test_value_counts(self): np.random.seed(1234) from pandas.tools.tile import cut arr = np.random.randn(4) factor = cut(arr, 4) tm.assertIsInstance(factor, Categorical) result = algos.value_counts(factor) cats = ['(-1.194, -0.535]', '(-0.535, 0.121]', '(0.121, 0.777]', '(0.777, 1.433]'] expected_index = CategoricalIndex(cats, cats, ordered=True) expected = Series([1, 1, 1, 1], index=expected_index) tm.assert_series_equal(result.sort_index(), expected.sort_index())
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ Returns object containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default. Parameters ---------- normalize : boolean, default False If True then the object returned will contain the relative frequencies of the unique values. sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order bins : integer, optional Rather than count values, group them into half-open bins, a convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN. Returns ------- counts : Series """ from pandas.core.algorithms import value_counts from pandas.tseries.api import DatetimeIndex, PeriodIndex from pandas import Index result = value_counts(self, sort=sort, ascending=ascending, normalize=normalize, bins=bins, dropna=dropna) if isinstance(self, PeriodIndex): # preserve freq result.index = self._simple_new(result.index.values, freq=self.freq) elif isinstance(self, DatetimeIndex): result.index = self._simple_new(result.index.values, tz=getattr(self, "tz", None)) if isinstance(self, Index): return result else: return self._constructor(result)
def value_counts(self, dropna=True): """ Returns a Series containing counts of each interval. Parameters ---------- dropna : boolean, default True Don't include counts of NaN. Returns ------- counts : Series See Also -------- Series.value_counts """ # TODO: implement this is a non-naive way! from pandas.core.algorithms import value_counts return value_counts(np.asarray(self), dropna=dropna)
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ Returns object containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default. Parameters ---------- normalize : boolean, default False If True then the object returned will contain the relative frequencies of the unique values. sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order bins : integer, optional Rather than count values, group them into half-open bins, a convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN. Returns ------- counts : Series """ from pandas.core.algorithms import value_counts from pandas.tseries.api import DatetimeIndex, PeriodIndex result = value_counts(self, sort=sort, ascending=ascending, normalize=normalize, bins=bins, dropna=dropna) if isinstance(self, PeriodIndex): # preserve freq result.index = self._simple_new(result.index.values, freq=self.freq) elif isinstance(self, DatetimeIndex): result.index = self._simple_new(result.index.values, tz=getattr(self, 'tz', None)) return result
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ Returns object containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default. Parameters ---------- normalize : boolean, default False If True then the object returned will contain the relative frequencies of the unique values. sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order bins : integer, optional Rather than count values, group them into half-open bins, a convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN. Returns ------- counts : Series """ from pandas.core.algorithms import value_counts return value_counts(self.values, sort=sort, ascending=ascending, normalize=normalize, bins=bins, dropna=dropna)
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ Return a Series containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default. Parameters ---------- normalize : boolean, default False If True then the object returned will contain the relative frequencies of the unique values. sort : boolean, default True Sort by frequencies. ascending : boolean, default False Sort in ascending order. bins : integer, optional Rather than count values, group them into half-open bins, a convenience for ``pd.cut``, only works with numeric data. dropna : boolean, default True Don't include counts of NaN. Returns ------- Series See Also -------- Series.count: Number of non-NA elements in a Series. DataFrame.count: Number of non-NA elements in a DataFrame. Examples -------- >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) >>> index.value_counts() 3.0 2 4.0 1 2.0 1 1.0 1 dtype: int64 With `normalize` set to `True`, returns the relative frequency by dividing all values by the sum of values. >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) >>> s.value_counts(normalize=True) 3.0 0.4 4.0 0.2 2.0 0.2 1.0 0.2 dtype: float64 **bins** Bins can be useful for going from a continuous variable to a categorical variable; instead of counting unique apparitions of values, divide the index in the specified number of half-open bins. >>> s.value_counts(bins=3) (2.0, 3.0] 2 (0.996, 2.0] 2 (3.0, 4.0] 1 dtype: int64 **dropna** With `dropna` set to `False` we can also see NaN index values. >>> s.value_counts(dropna=False) 3.0 2 NaN 1 4.0 1 2.0 1 1.0 1 dtype: int64 """ from pandas.core.algorithms import value_counts result = value_counts(self, sort=sort, ascending=ascending, normalize=normalize, bins=bins, dropna=dropna) return result
def value_counts(self, dropna: bool = True): from pandas.core.algorithms import value_counts return value_counts(self.to_numpy(), dropna=dropna)
def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): """ Return a Series containing counts of unique values. The resulting object will be in descending order so that the first element is the most frequently-occurring element. Excludes NA values by default. Parameters ---------- normalize : boolean, default False If True then the object returned will contain the relative frequencies of the unique values. sort : boolean, default True Sort by values. ascending : boolean, default False Sort in ascending order. bins : integer, optional Rather than count values, group them into half-open bins, a convenience for ``pd.cut``, only works with numeric data. dropna : boolean, default True Don't include counts of NaN. Returns ------- counts : Series See Also -------- Series.count: number of non-NA elements in a Series DataFrame.count: number of non-NA elements in a DataFrame Examples -------- >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) >>> index.value_counts() 3.0 2 4.0 1 2.0 1 1.0 1 dtype: int64 With `normalize` set to `True`, returns the relative frequency by dividing all values by the sum of values. >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) >>> s.value_counts(normalize=True) 3.0 0.4 4.0 0.2 2.0 0.2 1.0 0.2 dtype: float64 **bins** Bins can be useful for going from a continuous variable to a categorical variable; instead of counting unique apparitions of values, divide the index in the specified number of half-open bins. >>> s.value_counts(bins=3) (2.0, 3.0] 2 (0.996, 2.0] 2 (3.0, 4.0] 1 dtype: int64 **dropna** With `dropna` set to `False` we can also see NaN index values. >>> s.value_counts(dropna=False) 3.0 2 NaN 1 4.0 1 2.0 1 1.0 1 dtype: int64 """ from pandas.core.algorithms import value_counts result = value_counts(self, sort=sort, ascending=ascending, normalize=normalize, bins=bins, dropna=dropna) return result