def test_nansem(self): tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import sem with np.errstate(invalid='ignore'): self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=False, allow_obj='convert')
def test_nankurt(self): tm.skip_if_no_package('scipy.stats') from scipy.stats import kurtosis func1 = partial(kurtosis, fisher=True) func = partial(self._skew_kurt_wrap, func=func1) self.check_funs(nanops.nankurt, func, allow_complex=False, allow_str=False, allow_date=False)
def test_nansem(self): tm.skip_if_no_package('scipy.stats') tm._skip_if_scipy_0_17() from scipy.stats import sem self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=True, allow_obj='convert')
def test_nanskew(self): tm.skip_if_no_package('scipy.stats') tm._skip_if_scipy_0_17() from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) self.check_funs(nanops.nanskew, func, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=False)
def test_rank_methods_frame(self): tm.skip_if_no_package('scipy', min_version='0.13', app='scipy.stats.rankdata') import scipy from scipy.stats import rankdata xs = np.random.randint(0, 21, (100, 26)) xs = (xs - 10.0) / 10.0 cols = [chr(ord('z') - i) for i in range(xs.shape[1])] for vals in [xs, xs + 1e6, xs * 1e-6]: df = DataFrame(vals, columns=cols) for ax in [0, 1]: for m in ['average', 'min', 'max', 'first', 'dense']: result = df.rank(axis=ax, method=m) sprank = np.apply_along_axis( rankdata, ax, vals, m if m != 'first' else 'ordinal') sprank = sprank.astype(np.float64) expected = DataFrame(sprank, columns=cols) if LooseVersion(scipy.__version__) >= '0.17.0': expected = expected.astype('float64') tm.assert_frame_equal(result, expected)
def test_nanskew(self): tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) with np.errstate(invalid='ignore'): self.check_funs(nanops.nanskew, func, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=False)
def test_nankurt(self): tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import kurtosis func1 = partial(kurtosis, fisher=True) func = partial(self._skew_kurt_wrap, func=func1) with np.errstate(invalid='ignore'): self.check_funs(nanops.nankurt, func, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=False)
def test_nancorr_kendall(self): tm.skip_if_no_package("scipy.stats") from scipy.stats import kendalltau targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0] targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall") targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0] targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall")
def test_nancorr_spearman(self): tm.skip_if_no_package("scipy.stats") from scipy.stats import spearmanr targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0] targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="spearman") targ0 = spearmanr(self.arr_float_1d, self.arr_float1_1d)[0] targ1 = spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="spearman")
def test_put_compression_blosc(self): tm.skip_if_no_package('tables', '2.2', app='blosc support') df = tm.makeTimeDataFrame() # can't compress if table=False self.assertRaises(ValueError, self.store.put, 'b', df, table=False, compression='blosc') self.store.put('c', df, table=True, compression='blosc') tm.assert_frame_equal(self.store['c'], df)
def test_spline_extrapolate(self): tm.skip_if_no_package( 'scipy', '0.15', 'setting ext on scipy.interpolate.UnivariateSpline') s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) result3 = s.interpolate(method='spline', order=1, ext=3) expected3 = Series([1., 2., 3., 4., 5., 6., 6.]) assert_series_equal(result3, expected3) result1 = s.interpolate(method='spline', order=1, ext=0) expected1 = Series([1., 2., 3., 4., 5., 6., 7.]) assert_series_equal(result1, expected1)
def test_nansem(self): tm.skip_if_no_package("scipy.stats") from scipy.stats import sem self.check_funs_ddof( nanops.nansem, sem, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=True, allow_obj="convert", )
def test_spline_extrapolate(self): tm.skip_if_no_package( 'scipy', min_version='0.15', app='setting ext on scipy.interpolate.UnivariateSpline') s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) result3 = s.interpolate(method='spline', order=1, ext=3) expected3 = Series([1., 2., 3., 4., 5., 6., 6.]) assert_series_equal(result3, expected3) result1 = s.interpolate(method='spline', order=1, ext=0) expected1 = Series([1., 2., 3., 4., 5., 6., 7.]) assert_series_equal(result1, expected1)
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # GH 4343 tm.skip_if_no_package('scipy') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(3, dtype=dtype) # GH 16179 arr[0, 1] = dtype(2) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal assert dict(sdf.to_coo().todok()) == dict(spm.todok()) # Ensure dtype is preserved if possible was_upcast = ((fill_value is None or is_float(fill_value)) and not is_object_dtype(dtype) and not is_float_dtype(dtype)) res_dtype = (bool if is_bool_dtype(dtype) else float if was_upcast else dtype) tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) assert sdf.to_coo().dtype == res_dtype # However, adding a str column results in an upcast to object sdf['strings'] = np.arange(len(sdf)).astype(str) assert sdf.to_coo().dtype == np.object_
def test_nancorr_spearman(self): tm.skip_if_no_package('scipy.stats') from scipy.stats import spearmanr targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0] targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method='spearman') targ0 = spearmanr(self.arr_float_1d, self.arr_float1_1d)[0] targ1 = spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method='spearman')
def test_nancorr_kendall(self): tm.skip_if_no_package('scipy.stats') from scipy.stats import kendalltau targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0] targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method='kendall') targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0] targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method='kendall')
def test_rank_methods_series(self): tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') from scipy.stats import rankdata xs = np.random.randn(9) xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates np.random.shuffle(xs) index = [chr(ord('a') + i) for i in range(len(xs))] for vals in [xs, xs + 1e6, xs * 1e-6]: ts = Series(vals, index=index) for m in ['average', 'min', 'max', 'first', 'dense']: result = ts.rank(m) sprank = rankdata(vals, m if m != 'first' else 'ordinal') tm.assert_series_equal(result, Series(sprank, index=index))
def test_rank_methods_series(self): tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') from scipy.stats import rankdata xs = np.random.randn(9) xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates np.random.shuffle(xs) index = [chr(ord('a') + i) for i in range(len(xs))] for vals in [xs, xs + 1e6, xs * 1e-6]: ts = Series(vals, index=index) for m in ['average', 'min', 'max', 'first', 'dense']: result = ts.rank(method=m) sprank = rankdata(vals, m if m != 'first' else 'ordinal') tm.assert_series_equal(result, Series(sprank, index=index))
def test_from_scipy_correct_ordering(spmatrix): # GH 16179 tm.skip_if_no_package('scipy') arr = np.arange(1, 5).reshape(2, 2) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm) expected = pd.SparseDataFrame(arr) tm.assert_sp_frame_equal(sdf, expected) tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
def test_rank_methods_frame(self): tm.skip_if_no_package('scipy', '0.13', 'scipy.stats.rankdata') from scipy.stats import rankdata xs = np.random.randint(0, 21, (100, 26)) xs = (xs - 10.0) / 10.0 cols = [chr(ord('z') - i) for i in range(xs.shape[1])] for vals in [xs, xs + 1e6, xs * 1e-6]: df = DataFrame(vals, columns=cols) for ax in [0, 1]: for m in ['average', 'min', 'max', 'first', 'dense']: result = df.rank(axis=ax, method=m) sprank = np.apply_along_axis( rankdata, ax, vals, m if m != 'first' else 'ordinal') expected = DataFrame(sprank, columns=cols) tm.assert_frame_equal(result, expected)
def test_interp_various(self): tm.skip_if_no_package('scipy', max_version='0.19.0') df = DataFrame({ 'A': [1, 2, np.nan, 4, 5, np.nan, 7], 'C': [1, 2, 3, 5, 8, 13, 21] }) df = df.set_index('C') expected = df.copy() result = df.interpolate(method='polynomial', order=1) expected.A.loc[3] = 2.66666667 expected.A.loc[13] = 5.76923076 assert_frame_equal(result, expected) result = df.interpolate(method='cubic') expected.A.loc[3] = 2.81621174 expected.A.loc[13] = 5.64146581 assert_frame_equal(result, expected) result = df.interpolate(method='nearest') expected.A.loc[3] = 2 expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False) result = df.interpolate(method='quadratic') expected.A.loc[3] = 2.82533638 expected.A.loc[13] = 6.02817974 assert_frame_equal(result, expected) result = df.interpolate(method='slinear') expected.A.loc[3] = 2.66666667 expected.A.loc[13] = 5.76923077 assert_frame_equal(result, expected) result = df.interpolate(method='zero') expected.A.loc[3] = 2. expected.A.loc[13] = 5 assert_frame_equal(result, expected, check_dtype=False) result = df.interpolate(method='quadratic') expected.A.loc[3] = 2.82533638 expected.A.loc[13] = 6.02817974 assert_frame_equal(result, expected)
def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object columns = list('cd') index = list('ab') tm.skip_if_no_package('scipy', max_version='0.19.0') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(2, dtype=dtype) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal assert dict(sdf.to_coo().todok()) == dict(spm.todok()) # Ensure dtype is preserved if possible res_dtype = object tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) assert sdf.to_coo().dtype == res_dtype
def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object columns = list('cd') index = list('ab') tm.skip_if_no_package('scipy', max_version='0.19.0') # Make one ndarray and from it one sparse matrix, both to be used for # constructing frames and comparing results arr = np.eye(2, dtype=dtype) try: spm = spmatrix(arr) assert spm.dtype == arr.dtype except (TypeError, AssertionError): # If conversion to sparse fails for this spmatrix type and arr.dtype, # then the combination is not currently supported in NumPy, so we # can just skip testing it thoroughly return sdf = pd.SparseDataFrame(spm, index=index, columns=columns, default_fill_value=fill_value) # Expected result construction is kind of tricky for all # dtype-fill_value combinations; easiest to cast to something generic # and except later on rarr = arr.astype(object) rarr[arr == 0] = np.nan expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna( fill_value if fill_value is not None else np.nan) # Assert frame is as expected sdf_obj = sdf.astype(object) tm.assert_sp_frame_equal(sdf_obj, expected) tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal tm.assert_equal(dict(sdf.to_coo().todok()), dict(spm.todok())) # Ensure dtype is preserved if possible res_dtype = object tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)}) tm.assert_equal(sdf.to_coo().dtype, res_dtype)
def test_rank_methods_series(self): tm.skip_if_no_package('scipy', min_version='0.13', app='scipy.stats.rankdata') import scipy from scipy.stats import rankdata xs = np.random.randn(9) xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates np.random.shuffle(xs) index = [chr(ord('a') + i) for i in range(len(xs))] for vals in [xs, xs + 1e6, xs * 1e-6]: ts = Series(vals, index=index) for m in ['average', 'min', 'max', 'first', 'dense']: result = ts.rank(method=m) sprank = rankdata(vals, m if m != 'first' else 'ordinal') expected = Series(sprank, index=index) if LooseVersion(scipy.__version__) >= '0.17.0': expected = expected.astype('float64') tm.assert_series_equal(result, expected)
def test_interp_scipy_basic(self): tm.skip_if_no_package('scipy', max_version='0.19.0') s = Series([1, 3, np.nan, 12, np.nan, 25]) # slinear expected = Series([1., 3., 7.5, 12., 18.5, 25.]) result = s.interpolate(method='slinear') assert_series_equal(result, expected) result = s.interpolate(method='slinear', downcast='infer') assert_series_equal(result, expected) # nearest expected = Series([1, 3, 3, 12, 12, 25]) result = s.interpolate(method='nearest') assert_series_equal(result, expected.astype('float')) result = s.interpolate(method='nearest', downcast='infer') assert_series_equal(result, expected) # zero expected = Series([1, 3, 3, 12, 12, 25]) result = s.interpolate(method='zero') assert_series_equal(result, expected.astype('float')) result = s.interpolate(method='zero', downcast='infer') assert_series_equal(result, expected) # quadratic expected = Series([1, 3., 6.769231, 12., 18.230769, 25.]) result = s.interpolate(method='quadratic') assert_series_equal(result, expected) result = s.interpolate(method='quadratic', downcast='infer') assert_series_equal(result, expected) # cubic expected = Series([1., 3., 6.8, 12., 18.2, 25.]) result = s.interpolate(method='cubic') assert_series_equal(result, expected)
def test_nansem(self): tm.skip_if_no_package('scipy.stats') self.check_funs_ddof(nanops.nansem, np.var, allow_complex=False, allow_date=False)
def test_nansem(self): tm.skip_if_no_package('scipy', min_version='0.17.0') from scipy.stats import sem self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=True, allow_obj='convert')
def test_nanskew(self): tm.skip_if_no_package('scipy.stats') from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) self.check_funs(nanops.nanskew, func, allow_complex=False, allow_str=False, allow_date=False)