def test_dropna_empty(self): s = Series([]) self.assertEqual(len(s.dropna()), 0) s.dropna(inplace=True) self.assertEqual(len(s), 0) # invalid axis self.assertRaises(ValueError, s.dropna, axis=1)
def test_dropna_empty(self): s = Series([]) assert len(s.dropna()) == 0 s.dropna(inplace=True) assert len(s) == 0 # invalid axis pytest.raises(ValueError, s.dropna, axis=1)
def pd_02(): string_data=Series(['a','b','c',np.nan,'e',None]) print string_data print string_data.isnull() print string_data.dropna() df=DataFrame(np.random.randn(7,3)) df.ix[:4,1]=np.nan df.ix[:2,2]=np.nan print df print df.dropna() print df.fillna(0) print df.fillna({1:0.5,3:-1}) print df df.fillna(0,inplace=True) print df
class Dropna(object): params = ['int', 'datetime'] param_names = ['dtype'] def setup(self, dtype): N = 10**6 data = {'int': np.random.randint(1, 10, N), 'datetime': date_range('2000-01-01', freq='S', periods=N)} self.s = Series(data[dtype]) if dtype == 'datetime': self.s[np.random.randint(1, N, 100)] = NaT def time_dropna(self, dtype): self.s.dropna()
def kama(x, n=10, pow1=2, pow2=30): """KAMA: Kaufmans Adaptive Moving Average. Params: x (Series): Time series data such as close prices. n (int): number of periods for the Efficiency Ratio (ER). pow1 (int): number of periods for the fastest EMA constant. pow2 (int): number of periods for the slowest EMA constant. Returns: Series: Kaufmans adaptive moving average of x. """ nan_count = x[pd.isnull(x)].size x = Series(x.dropna().values, name = x.name, index = x.index) change = (x - x.shift(n)).abs() volatility = (x - x.shift(1)).abs().rolling(window=n).sum() er = change / volatility sc = (er * (2.0 /(pow1 + 1.0) - 2.0 / (pow2 + 1.0)) + 2.0 / (pow2 + 1.0)) ** 2.0 kama = [np.nan] * sc.size first_value = True for i in range(len(kama)): if not pd.isnull(sc[i]): if first_value: kama[i] = x[i] first_value = False else: kama[i] = kama[i-1] + sc[i] * (x[i] - kama[i-1]) return Series(data = [np.nan] * nan_count + kama, name = "kama(%d,%d,%d)" % (n, pow1, pow2), index = x.index)
def test_dropna_intervals(self): s = Series([np.nan, 1, 2, 3], IntervalIndex.from_arrays( [np.nan, 0, 1, 2], [np.nan, 1, 2, 3])) result = s.dropna() expected = s.iloc[1:] assert_series_equal(result, expected)
def test_bins_unequal_len(): # GH3011 series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4]) bins = pd.cut(series.dropna().values, 4) # len(bins) != len(series) here with pytest.raises(ValueError): series.groupby(bins).mean()
def contains_op(cls, series: pd.Series) -> bool: if not pdt.is_datetime64_any_dtype(series): return False temp_series = series.dropna().dt time_val_map = {"hour": 0, "minute": 0, "second": 0} return all( getattr(temp_series, time_part).eq(val).all() for time_part, val in time_val_map.items())
def test_isnull_for_inf(self): s = Series(['a', np.inf, np.nan, 1.0]) with pd.option_context('mode.use_inf_as_null', True): r = s.isnull() dr = s.dropna() e = Series([False, True, True, False]) de = Series(['a', 1.0], index=[0, 3]) tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de)
def test_bins_unequal_len(): # GH3011 series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4]) bins = pd.cut(series.dropna().values, 4) # len(bins) != len(series) here msg = r"Length of grouper \(8\) and axis \(10\) must be same length" with pytest.raises(ValueError, match=msg): series.groupby(bins).mean()
def test_isna_for_inf(self): s = Series(["a", np.inf, np.nan, pd.NA, 1.0]) with pd.option_context("mode.use_inf_as_na", True): r = s.isna() dr = s.dropna() e = Series([False, True, True, True, False]) de = Series(["a", 1.0], index=[0, 4]) tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de)
def get_common_names( names: pd.Series, token: str, add_supplied_names: bool = False, add_source: bool = False, expand: bool = True, ): """ Gets common names for multiple species using the IUCN API. Parameters ---------- names : list, Series or str Scientific name(s) to get results for. token : str IUCN API authentication token. add_supplied_names : bool Add supplied scientific names column to the resulting DataFrame. add_source : bool Add source column to the resulting DataFrame. expand : bool Whether to expand result rows to match `names` size. If False, the number of rows will correspond to the number of unique names in `names`. Returns ------- DataFrame DataFrame with common names. """ if isinstance(names, (list, str)): names = pd.Series(names) endpoint = urljoin(API_URL, "species/common_names/") df = pd.DataFrame() unique_names = names.dropna().unique() for name in unique_names: response = _request(urljoin(endpoint, name), token) if response.json().get("result"): result = defaultdict(list) for item in response.json().get("result"): result[item["language"]].append(item["taxonname"]) result = pd.Series(result) else: result = pd.Series([], dtype="object") df = df.append(result, ignore_index=True) if add_supplied_names: df["supplied_name"] = unique_names if add_source: df["source"] = "IUCN" if expand: df = expand_result(df, names) return df
def __init__(self, data: Series): """ Create a new Count distribution. :param data: pandas Series. """ data = data.dropna() self._data: Series = data.astype(int) self._categories = list(range(self._data.min(), self._data.max() + 1))
def test_groupby_bins_unequal_len(self): # GH3011 series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4]) bins = pd.cut(series.dropna().values, 4) # len(bins) != len(series) here def f(): series.groupby(bins).mean() self.assertRaises(ValueError, f)
def unique(x: pd.Series) -> Any: """Return single unique value (or error if none exists).""" x = x.dropna() if x.empty: return np.nan uniques = x.unique() if uniques.size == 1: return uniques[0] raise AggregationError("Not unique.")
def inner( config: Settings, series: pd.Series, state: dict, *args, **kwargs ) -> bool: if series.hasnans: series = series.dropna() if series.empty: return False return fn(config, series, state, *args, **kwargs)
def test_dropna_intervals(self): ser = Series( [np.nan, 1, 2, 3], IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]), ) result = ser.dropna() expected = ser.iloc[1:] tm.assert_series_equal(result, expected)
def sanitize_data(self, series: pd.Series) -> Optional[pd.Series]: series = series.dropna() series = series.sort_index() # Less than 2 points are not visible if len(series) < 2: return None # Make timestamp unique and use mean of values on duplicates series = series.groupby(level=0).mean() return series
def _validate_data(self, data: Series): errors = [] for unique_val in data.dropna().unique(): if unique_val not in self.category_names: errors.append( f'"{unique_val}" is not in categories for "{self.name}".') if errors: raise ValueError('\n'.join(errors))
def test_datetime64_tz_dropna(self): # DatetimeBlock s = Series([Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-03 10:00"), pd.NaT]) result = s.dropna() expected = Series([Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")], index=[0, 2]) self.assert_series_equal(result, expected) # DatetimeBlockTZ idx = pd.DatetimeIndex(["2011-01-01 10:00", pd.NaT, "2011-01-03 10:00", pd.NaT], tz="Asia/Tokyo") s = pd.Series(idx) self.assertEqual(s.dtype, "datetime64[ns, Asia/Tokyo]") result = s.dropna() expected = Series( [Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-03 10:00", tz="Asia/Tokyo")], index=[0, 2], ) self.assertEqual(result.dtype, "datetime64[ns, Asia/Tokyo]") self.assert_series_equal(result, expected)
class Dropna(object): goal_time = 0.2 params = ['int', 'datetime'] param_names = ['dtype'] def setup(self, dtype): N = 10**6 data = { 'int': np.random.randint(1, 10, N), 'datetime': date_range('2000-01-01', freq='S', periods=N) } self.s = Series(data[dtype]) if dtype == 'datetime': self.s[np.random.randint(1, N, 100)] = NaT def time_dropna(self, dtype): self.s.dropna()
def test_groupby_bins_unequal_len(self): # GH3011 series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4]) bins = pd.cut(series.dropna().values, 4) # len(bins) != len(series) here def f(): series.groupby(bins).mean() pytest.raises(ValueError, f)
def __init__(self, data: Series): """ Create a new Ordinal distribution. :param data: Categorical pandas Series. """ data = data.dropna() self._data: Series = data self._categories: List[str] = data.cat.categories.to_list()
def get_problem_type(y: Series): """ Identifies which type of prediction problem we are interested in (if user has not specified). Ie. binary classification, multi-class classification, or regression. """ if len(y) == 0: raise ValueError("provided labels cannot have length = 0") y = y.dropna() # Remove missing values from y (there should not be any though as they were removed in Learner.general_data_processing()) num_rows = len(y) unique_values = y.unique() unique_count = len(unique_values) logger.log(20, f'Here are the first 10 unique label values in your data: {unique_values[:10]}') MULTICLASS_LIMIT = 1000 # if numeric and class count would be above this amount, assume it is regression if num_rows > 1000: REGRESS_THRESHOLD = 0.05 # if the unique-ratio is less than this, we assume multiclass classification, even when labels are integers else: REGRESS_THRESHOLD = 0.1 if unique_count == 2: problem_type = BINARY reason = "only two unique label-values observed" elif unique_values.dtype == 'object': problem_type = MULTICLASS reason = "dtype of label-column == object" elif np.issubdtype(unique_values.dtype, np.floating): unique_ratio = unique_count / float(num_rows) if (unique_ratio <= REGRESS_THRESHOLD) and (unique_count <= MULTICLASS_LIMIT): try: can_convert_to_int = np.array_equal(y, y.astype(int)) if can_convert_to_int: problem_type = MULTICLASS reason = "dtype of label-column == float, but few unique label-values observed and label-values can be converted to int" else: problem_type = REGRESSION reason = "dtype of label-column == float and label-values can't be converted to int" except: problem_type = REGRESSION reason = "dtype of label-column == float and label-values can't be converted to int" else: problem_type = REGRESSION reason = "dtype of label-column == float and many unique label-values observed" elif np.issubdtype(unique_values.dtype, np.integer): unique_ratio = unique_count / float(num_rows) if (unique_ratio <= REGRESS_THRESHOLD) and (unique_count <= MULTICLASS_LIMIT): problem_type = MULTICLASS # TODO: Check if integers are from 0 to n-1 for n unique values, if they have a wide spread, it could still be regression reason = "dtype of label-column == int, but few unique label-values observed" else: problem_type = REGRESSION reason = "dtype of label-column == int and many unique label-values observed" else: raise NotImplementedError('label dtype', unique_values.dtype, 'not supported!') logger.log(25, f"AutoGluon infers your prediction problem is: {problem_type} (because {reason}).") logger.log(25, f"If this is wrong, please specify `problem_type` argument in fit() instead " f"(You may specify problem_type as one of: [{BINARY, MULTICLASS, REGRESSION}])\n") return problem_type
def inner(series: pd.Series, state: dict, *args, **kwargs) -> bool: if "hasnans" not in state: state["hasnans"] = series.hasnans if state["hasnans"]: series = series.dropna() if series.empty: return False return fn(series, state, *args, **kwargs)
def reducer(x: pd.Series) -> float: """Reduces the (sum, count) tuple series to get the mean of each variable.""" try: y = list(map(sum, zip(*x))) return round(y[0] / y[1], 5) except TypeError: if x.notnull().any(): y = list(map(sum, zip(*x.dropna()))) return round(y[0] / y[1], 5)
def compare_delta_metrics(s: pd.Series, thresholds: list): no_nan = s.dropna() return { f"on-time rate (at most {thresholds[0]} minutes late)": len(no_nan[(no_nan <= thresholds[0]) & (no_nan >= 0)])/len(no_nan) * 100, "early rate": len(no_nan[no_nan < 0])/len(no_nan) * 100, f"gap percentage (more than {thresholds[0]} minutes late)": len(no_nan[no_nan > thresholds[0]])/len(no_nan) * 100, f"late percentage (between {thresholds[0]} and {thresholds[1]} minutes late)": len(no_nan[(no_nan > thresholds[0]) & (no_nan <= thresholds[1])])/len(no_nan) * 100, f"very late percentage (more than {thresholds[1]} minutes late)": len(no_nan[no_nan > thresholds[1]])/len(no_nan) * 100 }
def problem_type(labels: pd.Series) -> ProblemType: """ :returns: problem type according to heuristics on the labels. So far only binary classification is supported. """ # TODO: add other problem types labels = labels.dropna() n_unique = labels.unique().size if n_unique == 2: return ProblemType.BINARY return ProblemType.OTHER
def profile_named_entity(column: pd.Series) -> typing.List[str]: """Profiling this named entities column, use when this column is marked as a named entities column. Args: column: pandas Series column. Returns: list of named entities string """ return column.dropna().unique().astype(str).tolist()
def contains_op(cls, series: pd.Series) -> bool: # TODO: without the object check this passes string categories... is there a better way? if not pdt.is_object_dtype(series): return False elif series.hasnans: series = series.dropna() if series.empty: return False return all(isinstance(v, str) for v in series)
def concat(series: pd.Series): """ Args: series (pd.Series): """ series = series.dropna().astype(str) if not series.empty: return "|".join(series) else: return None
def test_dropna_pos_args_deprecation(self): # https://github.com/pandas-dev/pandas/issues/41485 ser = Series([1, 2, 3]) msg = ( r"In a future version of pandas all arguments of Series\.dropna " r"will be keyword-only") with tm.assert_produces_warning(FutureWarning, match=msg): result = ser.dropna(0) expected = Series([1, 2, 3]) tm.assert_series_equal(result, expected)
def concat_uniques(series: pd.Series): """ An aggregation custom function to be applied to each column of a groupby Args: series (pd.Series): """ series_str = series.dropna().astype(str) if not series_str.empty: return "|".join(series_str.unique()) else: return None
def make_plot( series: pd.Series, axes, #: matplotlib.axes.Axes title: str): # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.probplot.html (x, y), (m, b, r) = sp.stats.probplot(series.dropna()) color = palette_dtypes.get(series.dtype, '#9b59b6') axes.plot(x, y, color=color, marker='|', linestyle='') axes.plot(x, m * x + b, color='#4b4b4b', linestyle='-', linewidth=.9) axes.set_title(title, fontsize=10)
def _plot_qq(data: pd.Series = None, dist=stats.norm) -> go.Figure: """ :param data: :param dist: :return: """ fig, ax = plt.subplots(figsize=(8, 5)) _mpl_fig = sm.qqplot(data.dropna(), dist, fit=True, line="45", ax=ax) return tls.mpl_to_plotly(_mpl_fig)
def squeeze( daily: pd.Series, rate: pd.Series, day_shift: int, population: pd.Series, cross_variant_immunity: float, escape_variant_prevalence: pd.Series, vaccine_coverage: pd.DataFrame, ceiling: float = CEILING, ) -> pd.Series: daily_infections = (daily / rate).dropna().rename('infections') daily_infections += 1 daily_infections = daily_infections.reset_index() daily_infections['date'] -= pd.Timedelta(days=day_shift) daily_infections = daily_infections.set_index(['location_id', 'date']).loc[:, 'infections'] escape_variant_prevalence = (pd.concat( [daily_infections, escape_variant_prevalence], axis=1)) escape_variant_prevalence = escape_variant_prevalence.fillna(0) escape_variant_prevalence = ( escape_variant_prevalence.loc[daily_infections.index, 'escape_variant_prevalence']) non_ev_infections = daily_infections * (1 - escape_variant_prevalence) ev_infections = daily_infections * escape_variant_prevalence repeat_infections = (1 - cross_variant_immunity) * ( non_ev_infections.cumsum() / population).clip(0, 1) * ev_infections first_infections = daily_infections - repeat_infections cumul_infections = daily_infections.dropna().groupby(level=0).cumsum() seroprevalence = first_infections.dropna().groupby(level=0).cumsum() vaccinations = vaccine_coverage.join( daily, how='right')['cumulative_all_effective'].fillna(0) daily_vaccinations = vaccinations.groupby( level=0).diff().fillna(vaccinations) eff_daily_vaccinations = daily_vaccinations * ( 1 - seroprevalence / population).clip(0, 1) eff_vaccinations = eff_daily_vaccinations.groupby(level=0).cumsum() non_suscept = seroprevalence + eff_vaccinations max_non_suscept = non_suscept.groupby(level=0).max() max_sero = seroprevalence.groupby(level=0).max() limits = population * ceiling excess_non_suscept = (max_non_suscept - limits).clip(0, np.inf) excess_scaling_factor = (max_sero - excess_non_suscept).clip( 0, np.inf) / max_sero rate = (rate / excess_scaling_factor).fillna(rate) return rate.dropna()
def main(): """ Handling of not applicable values """ string_data = Series(['aardvark', 'artichoke', np.nan, 'avocado']) print string_data print string_data.isnull() string_data[0] = None print string_data.isnull() print None is np.nan, None == np.nan # not same # Exclude N/A print '','' NA = np.nan data = Series([1, NA, 3.5, NA, 7]) print data.dropna() print data[data.notnull()] data = DataFrame([ [1., 6.5, 3.], [1., NA, NA], [NA, NA, NA], [NA, 6.5, 3.] ]) cleaned = data.dropna() # row that all value is not NA print data print cleaned print data.dropna(how='all') data[4] = None print data.dropna(axis=1, how='all') print data.dropna(thresh=2) # non NA is more 2 # Fill NA print '','' print data.fillna(0) print data.fillna({1: 0.5, 2: -1}) _ = data.fillna(0, inplace=True) print data print '','' df = DataFrame(np.arange(18).reshape((6, 3))) df.ix[2:, 1] = NA; df.ix[4:, 2] = NA print df print df.fillna(method='ffill') print df.fillna(method='ffill', limit=2) data = Series([1., NA, 3.5, NA, 7]) print data.fillna(data.mean())
def describe_dc_as_dataframe(dc: pd.Series, ds_md: dict) -> pd.Series: """ describes the profile criteria for column Args: dc: the Series to create Profile for ds_md: the Metadata dictionary of the DataFrame that is to be profiled Returns: A Series containing calculated description values. """ dc = pd.to_numeric(dc, errors='coerce') null_values = dc.isna().sum() unique_values = len(dc.dropna().unique()) / len(dc) constancy = dc.value_counts(normalize=True).max( ) #constancy defined as amount of most frequent value divided by amount of numbers in column dc_stats = [ ["Metadaten spezifisch für Spalte", column_metadata(dc.name, ds_md)], ["Anzahl an Zeilen", len(dc)], ["Anzahl an fehlenden Werten", null_values], ["Fehlende Werte (Prozent)", (null_values / len(dc)) * TO_PERCENT], ["Distinkte Werte (Prozent)", unique_values * TO_PERCENT], ["Konstanz (Prozent)", constancy * TO_PERCENT], ["Mittelwert", format(dc.mean(), 'f')], [ "Minimumwert (Jahr, Wert)", ({ dc.idxmin().date(): format(dc.min(), 'f') } if len(dc.dropna()) > 0 else "") ], [ "Maximumwert (Jahr, Wert)", ({ dc.idxmax().date(): format(dc.max(), 'f') } if len(dc.dropna()) > 0 else "") ], ["Datenpunkte vorhanden für", check_is_consecutive(dc)] ] profile = pd.DataFrame(data=dc_stats, columns=["Kriterien", "Ergebnis"]) profile.set_index("Kriterien", inplace=True) return profile
def calculate_daily_scaling_factors( *, forecasted_daily_tests: pandas.Series, sparse_reported_totals: pandas.Series ) -> pandas.DataFrame: """ Scale the daily test counts per region coming from the Prophet forecast by the test count report from OurWorldInData, which is available before the real daily testcounts are known. Parameters ---------- forecasted_daily_tests: pandas.Series Series from the Prophet forecast containing the confirmed daily test counts sent from RKI privately as well as predicted test counts. Both data are scaled by the total reported tests by OurWorldInData (OWID! sparse_reported_totals : pandas.Series Series from OWID containing total test counts summarized for a period of time (mostly one week) for all of Germany. It is expected to contain NaN gaps in the data. The differences between this report and the forecast data will be used to make sure the total number of tests in the forecast matches the OWID data. Returns ------- correction_factor: pandas.DataFrame The scaling factor for all dates including the future. """ assert isinstance(forecasted_daily_tests, pandas.Series) assert isinstance(sparse_reported_totals, pandas.Series) df_factors = pandas.DataFrame( index=forecasted_daily_tests.index, columns=["sum_predicted", "diff_reported", "scaling_factor"] ) sum_dates = list(sparse_reported_totals.dropna().index) for dfrom, dto in zip(sum_dates[:-1], sum_dates[1:]): day = pandas.Timedelta("1D") interval = slice(dfrom + day, dto) # sum over the predictions in this inverval sum_predicted = forecasted_daily_tests.loc[dfrom + day : dto].sum() df_factors.loc[interval, ["sum_predicted"]] = sum_predicted # diff of the reports prevtot = float(sparse_reported_totals.loc[dfrom]) nexttot = float(sparse_reported_totals.loc[dto]) diff_reported = nexttot - prevtot df_factors.loc[interval, ["diff_reported"]] = diff_reported df_factors["scaling_factor"] = df_factors.diff_reported / df_factors.sum_predicted # extrapolate backwards at the beginning first = df_factors.dropna().iloc[0] df_factors.loc[:first.name, "scaling_factor"] = first.scaling_factor # continue into the future with the last known scaling factor last = df_factors.dropna().iloc[-1] df_factors.loc[last.name:, "scaling_factor"] = last.scaling_factor return df_factors
def check_if_regex_feature(X: Series, regex: str) -> bool: dtype = get_type_family_raw(X.dtype) if dtype not in ['category', 'object']: return False X = X.dropna() if len(X) > 100: # Sample to speed-up type inference X = X.sample(n=100, random_state=0) match = X.str.match(regex).all() return match
def consecutive_wins_losses(self): ''' Calculates the positive and negative runs in the trade series. ''' trade_df = self.as_dataframe().sort_values(by = 'exit') win_loss = sign(trade_df.base_return) # Create series which has just 1's and 0's positive = Series(hstack(([0], ((win_loss > 0) * 1).values, [0]))) negative = Series(hstack(([0], ((win_loss < 0) * 1).values, [0]))) pos_starts = positive.where(positive.diff() > 0) pos_starts = Series(pos_starts.dropna().index.tolist()) pos_ends = positive.where(positive.diff() < 0) pos_ends = Series(pos_ends.dropna().index.tolist()) positive_runs = pos_ends - pos_starts neg_starts = negative.where(negative.diff() > 0) neg_starts = Series(neg_starts.dropna().index.tolist()) neg_ends = negative.where(negative.diff() < 0) neg_ends = Series(neg_ends.dropna().index.tolist()) negative_runs = neg_ends - neg_starts return (positive_runs, negative_runs)
def test_datetime64_tz_dropna(self): # DatetimeBlock s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp( '2011-01-03 10:00'), pd.NaT]) result = s.dropna() expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-03 10:00')], index=[0, 2]) self.assert_series_equal(result, expected) # DatetimeBlockTZ idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT, '2011-01-03 10:00', pd.NaT], tz='Asia/Tokyo') s = pd.Series(idx) self.assertEqual(s.dtype, 'datetime64[ns, Asia/Tokyo]') result = s.dropna() expected = Series([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-03 10:00', tz='Asia/Tokyo')], index=[0, 2]) self.assertEqual(result.dtype, 'datetime64[ns, Asia/Tokyo]') self.assert_series_equal(result, expected)
def daily_returns(self): ''' Returns an unsorted and unorderd list of daily returns for all trades. Used for calculating daily or annualised statistics. ''' if self._daily_returns is None: daily = self.trade_frame(compacted = False, cumulative = False) returns = [] for col in daily: returns.extend(daily[col].tolist()) returns = Series(returns) self._daily_returns = returns.dropna() return self._daily_returns
def test_isnull_for_inf_deprecated(self): # gh-17115 s = Series(['a', np.inf, np.nan, 1.0]) with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): pd.set_option('mode.use_inf_as_null', True) r = s.isna() dr = s.dropna() pd.reset_option('mode.use_inf_as_null') e = Series([False, True, True, False]) de = Series(['a', 1.0], index=[0, 3]) tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de)
def regression(self, data_ser, instructions_ser: pd.Series): prices = data_ser.apply(lambda x: x.open) days_in_year = 252 X = range(len(prices)) A = sm.add_constant(X) sd = prices.std() Y = prices.values profittake = 1.96 # Run regression y = ax + b results = sm.OLS(Y, A).fit() (b, a) = results.params # Normalized slope # slope = (a / b) * days_in_year # Daily return regression * 1 year true_slope = (a / b) * self.lookback # Daily return regression * 1 year slope = -true_slope # Daily return regression * 1 year # Currently how far away from regression line? delta = Y - (np.dot(a, X) + b) # Don't trade if the slope is near flat slope_min = 0.063 #0.252 # Current gain if trading new_weight = np.NaN stop_price = np.NaN current_position = instructions_ser.dropna().apply(lambda x: x.risk).sum() # Long but slope turns down, then exit or Short but slope turns upward, then exit if (current_position > 0 and slope < 0) or (current_position < 0 and 0 < slope): new_weight = -current_position # Trend is up if slope > slope_min: # Price crosses the regression line if delta[-1] > 0 and delta[-2] < 0 and current_position == 0: stop_price = self.calculate_stop(data_ser, Direction.Short) new_weight = (-slope/10) # Profit take, reaches the top of 95% bollinger band if delta[-1] > profittake * sd and current_position > 0: new_weight = -current_position # Trend is down if slope < -slope_min: # Price crosses the regression line if delta[-1] < 0 and delta[-2] > 0 and current_position == 0: stop_price = self.calculate_stop(data_ser, Direction.Long) new_weight = (-slope/10) # Profit take, reaches the top of 95% bollinger band if delta[-1] < - profittake * sd and current_position < 0: new_weight = -current_position return (new_weight, stop_price, b, a, slope)
def test_value_counts(self): s = Series(['a', 'b', 'b', 'b', 'b', 'a', 'c', 'd', 'd', 'a']) hist = s.value_counts() expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c']) assert_series_equal(hist, expected) # handle NA's properly s[5:7] = np.nan hist = s.value_counts() expected = s.dropna().value_counts() assert_series_equal(hist, expected) s = Series({}) hist = s.value_counts() expected = Series([]) assert_series_equal(hist, expected)
def Main(): client = github_helpers.authenticate() keywords = raw_input("Please, enter keywords to search repositories: ") if keywords is '': keywords = 'javascript' print 'No keywords provided. It will use the keyword: ' + keywords search = client.search_repositories(keywords) first_page = search.get_page(0) languages = Series(r.language for r in first_page) languages = languages.dropna() languages.sort() percentages = (100.0 * languages.value_counts() / len(languages)).map('{:,.2f} %'.format) print 'Languages percentage:' print percentages # Create plot x = [int(r.stargazers_count) for r in first_page] y = [int(r.forks) for r in first_page] # Add one to every value for logarithmic scale x = [val + 1 for val in x] y = [val + 1 for val in y] area = [100 for r in first_page] names = [r.name for r in first_page] colors = np.random.rand(len(first_page)) pl.scatter(x, y, s=area, c=colors, alpha=0.5) for i in range(0, len(x)): pl.annotate(names[i], (x[i], y[i]), fontsize=2) pl.title("All values are with addition of 1 (for the logarithmic scale)") pl.xlabel("Stars") pl.xscale("log") pl.yscale("log") pl.ylabel("Forks") pl.tight_layout() filepath = 'reports/APIs/github' if not os.path.isdir(filepath): os.makedirs(filepath) filepath += '/search_repositories.png' pl.savefig(filepath, figsize=(1020, 1020), dpi=300) pl.close() print('A chart with high resolution and small font size (to minimize overlaps) was created at ' + filepath)
def track(frames): """Track the orientation of a wire through many frames. Parameters ---------- frames : an iterable, such as a list of images or a mr.Video object Returns ------- Series of angles in degrees, indexed by frame """ count = frames.count data = Series(index=range(1, count + 1)) for i, img in enumerate(frames): data[i + 1] = analyze(img) data = data.dropna() # Discard unused rows. return data
def ema(arg, window): """EMA: Exponential Moving Average. Params: arg (Series): Time series data such as close prices. window (int): Moving average window size. Returns: Series: Exponential moving average of arg. """ arg = Series(arg.dropna().values, index = arg.index) ema = [] w = 2.0 / (window + 1) ema.append(arg[0]) for i in range(1, len(arg)): ema.append(arg[i] * w + ema[-1] * (1.0 - w)) return Series(data = ema, name = "ema" + str(window), index = arg.index)
def test_comparison_operators_with_nas(self): ser = Series(bdate_range('1/1/2000', periods=10), dtype=object) ser[::2] = np.nan # test that comparisons work ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: val = ser[5] f = getattr(operator, op) result = f(ser, val) expected = f(ser.dropna(), val).reindex(ser.index) if op == 'ne': expected = expected.fillna(True).astype(bool) else: expected = expected.fillna(False).astype(bool) assert_series_equal(result, expected)
def test_dropEmptyRows(self): N = len(self.frame.index) mat = random.randn(N) mat[:5] = nan frame = DataFrame({'foo': mat}, index=self.frame.index) original = Series(mat, index=self.frame.index, name='foo') expected = original.dropna() inplace_frame1, inplace_frame2 = frame.copy(), frame.copy() smaller_frame = frame.dropna(how='all') # check that original was preserved assert_series_equal(frame['foo'], original) inplace_frame1.dropna(how='all', inplace=True) assert_series_equal(smaller_frame['foo'], expected) assert_series_equal(inplace_frame1['foo'], expected) smaller_frame = frame.dropna(how='all', subset=['foo']) inplace_frame2.dropna(how='all', subset=['foo'], inplace=True) assert_series_equal(smaller_frame['foo'], expected) assert_series_equal(inplace_frame2['foo'], expected)
def test_comparison_operators_with_nas(self): s = Series(bdate_range('1/1/2000', periods=10), dtype=object) s[::2] = np.nan # test that comparisons work ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] for op in ops: val = s[5] f = getattr(operator, op) result = f(s, val) expected = f(s.dropna(), val).reindex(s.index) if op == 'ne': expected = expected.fillna(True).astype(bool) else: expected = expected.fillna(False).astype(bool) assert_series_equal(result, expected) # fffffffuuuuuuuuuuuu # result = f(val, s) # expected = f(val, s.dropna()).reindex(s.index) # assert_series_equal(result, expected) # boolean &, |, ^ should work with object arrays and propagate NAs ops = ['and_', 'or_', 'xor'] mask = s.isnull() for bool_op in ops: f = getattr(operator, bool_op) filled = s.fillna(s[0]) result = f(s < s[9], s > s[3]) expected = f(filled < filled[9], filled > filled[3]) expected[mask] = False assert_series_equal(result, expected)
def tile(s, bins, labels=False, retbins=True, infinite=True): # if not np.iterable(bins): ind, label = cut(s, bins, retbins=retbins, labels=labels) # for now, pandas base cut doesn't support infinite ranges # so it bases first bin at 0 where we base on 1, and 0 is # [-inf, first] for us ind = ind + 1 else: bins = np.asarray(bins) if (np.diff(bins) < 0).any(): raise ValueError('bins must increase monotonically.') ind, label = inf_bins_to_cuts(s, bins) # build out ranges ranges = [] ranges.append(NumRange(-inf, label[0])) for x in range(len(label)-1): nr = NumRange(label[x], label[x+1]) ranges.append(nr) ranges.append(NumRange(label[-1], inf)) if not infinite: na_mask = (ind == 0) | (ind == len(bins)) np.putmask(ind, na_mask, np.nan) # redo the intindex as range index new_index = ind.astype(object) ind = Series(ind) for k, v in ind.dropna().astype(int).iteritems(): newr = ranges[v] new_index[k] = newr grouped = s.groupby(new_index, sort=True) return grouped
## drop(labels) drop elements with the selected labels from a Series. s1 = Series(arange(1.0,6),index=["a","a","b","c","d"]) s1 s1.drop("a") ################ dropna() is similar to drop() except that it only drops null values – NaN or similar. s1 = Series(arange(1.0,4.0),index=["a","b","c"]) s2 = Series(arange(1.0,4.0),index=["c","d","e"]) s3 = s1 + s2 s3 s3.dropna() ############################################################################## #### fillna ## fillna(value) fills all null values in a series with a specific value. s1 = Series(arange(1.0,4.0),index=["a","b","c"]) s2 = Series(arange(1.0,4.0),index=["c","d","e"]) s3 = s1 + s2 s3.fillna(1.0) ################
def test_axis_alias(self): s = Series([1, 2, np.nan]) assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index')) assert s.dropna().sum('rows') == 3 assert s._get_axis_number('rows') == 0 assert s._get_axis_name('rows') == 'index'
s1 = Series([7.3, -2.5, 3.4, 1.5], index=[(1,2), (2,3), (3,4), (4,5)]) s1 s2 = Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g']) string_data = Series(['aardvark', 'artichoke', np.nan, 'avocado']) string_data string_data.isnull() string_data.dropna() # 3 ways of doing pairwise correlation between two data frames # using pandas/python import pandas as pd import numpy as np from pandas import Series, DataFrame df2 = DataFrame([list("aabbb"), list("12123")]).T df1 = DataFrame(np.random.randn(15).reshape(5,3), index=[df2.ix[:, 0], df2.ix[:, 1]], columns=list("def")) df3 = DataFrame(np.random.randn(20).reshape(5,4), index=[df2.ix[:, 0], df2.ix[:, 1]], columns=list("ghij")) type(df3) def pairwise_corr(df1, df2): """ Pairwise correlation between columns of two data frames :param df1: :type df1: pandas.core.frame.DataFrame :param df2: :type df2: pandas.core.frame.DataFrame :return: :rtype: pandas.core.frame.DataFrame """ res = []
class TestMoments(tm.TestCase): _multiprocess_can_split_ = True _nan_locs = np.arange(20, 40) _inf_locs = np.array([]) def setUp(self): arr = randn(N) arr[self._nan_locs] = np.NaN self.arr = arr self.rng = bdate_range(datetime(2009, 1, 1), periods=N) self.series = Series(arr.copy(), index=self.rng) self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K)) def test_centered_axis_validation(self): # ok mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0) # bad axis self.assertRaises(ValueError, mom.rolling_mean,Series(np.ones(10)),3,center=True ,axis=1) # ok ok mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=0) mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=1) # bad axis self.assertRaises(ValueError, mom.rolling_mean,DataFrame(np.ones((10,10))),3,center=True ,axis=2) def test_rolling_sum(self): self._check_moment_func(mom.rolling_sum, np.sum) def test_rolling_count(self): counter = lambda x: np.isfinite(x).astype(float).sum() self._check_moment_func(mom.rolling_count, counter, has_min_periods=False, preserve_nan=False, fill_value=0) def test_rolling_mean(self): self._check_moment_func(mom.rolling_mean, np.mean) def test_cmov_mean(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_mean except ImportError: raise nose.SkipTest("no scikits.timeseries") vals = np.random.randn(10) xp = cmov_mean(vals, 5) rs = mom.rolling_mean(vals, 5, center=True) assert_almost_equal(xp.compressed(), rs[2:-2]) assert_almost_equal(xp.mask, np.isnan(rs)) xp = Series(rs) rs = mom.rolling_mean(Series(vals), 5, center=True) assert_series_equal(xp, rs) def test_cmov_window(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") vals = np.random.randn(10) xp = cmov_window(vals, 5, 'boxcar') rs = mom.rolling_window(vals, 5, 'boxcar', center=True) assert_almost_equal(xp.compressed(), rs[2:-2]) assert_almost_equal(xp.mask, np.isnan(rs)) xp = Series(rs) rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True) assert_series_equal(xp, rs) def test_cmov_window_corner(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") # all nan vals = np.empty(10, dtype=float) vals.fill(np.nan) rs = mom.rolling_window(vals, 5, 'boxcar', center=True) self.assertTrue(np.isnan(rs).all()) # empty vals = np.array([]) rs = mom.rolling_window(vals, 5, 'boxcar', center=True) self.assertEqual(len(rs), 0) # shorter than window vals = np.random.randn(5) rs = mom.rolling_window(vals, 10, 'boxcar') self.assertTrue(np.isnan(rs).all()) self.assertEqual(len(rs), 5) def test_cmov_window_frame(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") # DataFrame vals = np.random.randn(10, 2) xp = cmov_window(vals, 5, 'boxcar') rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True) assert_frame_equal(DataFrame(xp), rs) def test_cmov_window_na_min_periods(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") # min_periods vals = Series(np.random.randn(10)) vals[4] = np.nan vals[8] = np.nan xp = mom.rolling_mean(vals, 5, min_periods=4, center=True) rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True) assert_series_equal(xp, rs) def test_cmov_window_regular(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] for wt in win_types: vals = np.random.randn(10) xp = cmov_window(vals, 5, wt) rs = mom.rolling_window(Series(vals), 5, wt, center=True) assert_series_equal(Series(xp), rs) def test_cmov_window_special(self): tm._skip_if_no_scipy() try: from scikits.timeseries.lib import cmov_window except ImportError: raise nose.SkipTest("no scikits.timeseries") win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.}, {'width': 0.5}] for wt, k in zip(win_types, kwds): vals = np.random.randn(10) xp = cmov_window(vals, 5, (wt,) + tuple(k.values())) rs = mom.rolling_window(Series(vals), 5, wt, center=True, **k) assert_series_equal(Series(xp), rs) def test_rolling_median(self): self._check_moment_func(mom.rolling_median, np.median) def test_rolling_min(self): self._check_moment_func(mom.rolling_min, np.min) a = np.array([1, 2, 3, 4, 5]) b = mom.rolling_min(a, window=100, min_periods=1) assert_almost_equal(b, np.ones(len(a))) self.assertRaises(ValueError, mom.rolling_min, np.array([1, 2, 3]), window=3, min_periods=5) def test_rolling_max(self): self._check_moment_func(mom.rolling_max, np.max) a = np.array([1, 2, 3, 4, 5]) b = mom.rolling_max(a, window=100, min_periods=1) assert_almost_equal(a, b) self.assertRaises(ValueError, mom.rolling_max, np.array([1, 2, 3]), window=3, min_periods=5) def test_rolling_quantile(self): qs = [.1, .5, .9] def scoreatpercentile(a, per): values = np.sort(a, axis=0) idx = per / 1. * (values.shape[0] - 1) return values[int(idx)] for q in qs: def f(x, window, min_periods=None, freq=None, center=False): return mom.rolling_quantile(x, window, q, min_periods=min_periods, freq=freq, center=center) def alt(x): return scoreatpercentile(x, q) self._check_moment_func(f, alt) def test_rolling_apply(self): ser = Series([]) assert_series_equal( ser, mom.rolling_apply(ser, 10, lambda x: x.mean())) def roll_mean(x, window, min_periods=None, freq=None, center=False): return mom.rolling_apply(x, window, lambda x: x[np.isfinite(x)].mean(), min_periods=min_periods, freq=freq, center=center) self._check_moment_func(roll_mean, np.mean) def test_rolling_apply_out_of_bounds(self): # #1850 arr = np.arange(4) # it works! result = mom.rolling_apply(arr, 10, np.sum) self.assertTrue(isnull(result).all()) result = mom.rolling_apply(arr, 10, np.sum, min_periods=1) assert_almost_equal(result, result) def test_rolling_std(self): self._check_moment_func(mom.rolling_std, lambda x: np.std(x, ddof=1)) self._check_moment_func(functools.partial(mom.rolling_std, ddof=0), lambda x: np.std(x, ddof=0)) def test_rolling_std_1obs(self): result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]), 1, min_periods=1) expected = np.zeros(5) assert_almost_equal(result, expected) result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]), 3, min_periods=2) self.assertTrue(np.isnan(result[2])) def test_rolling_std_neg_sqrt(self): # unit test from Bottleneck # Test move_nanstd for neg sqrt. a = np.array([0.0011448196318903589, 0.00028718669878572767, 0.00028718669878572767, 0.00028718669878572767, 0.00028718669878572767]) b = mom.rolling_std(a, window=3) self.assertTrue(np.isfinite(b[2:]).all()) b = mom.ewmstd(a, span=3) self.assertTrue(np.isfinite(b[2:]).all()) def test_rolling_var(self): self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=1), test_stable=True) self._check_moment_func(functools.partial(mom.rolling_var, ddof=0), lambda x: np.var(x, ddof=0)) def test_rolling_skew(self): try: from scipy.stats import skew except ImportError: raise nose.SkipTest('no scipy') self._check_moment_func(mom.rolling_skew, lambda x: skew(x, bias=False)) def test_rolling_kurt(self): try: from scipy.stats import kurtosis except ImportError: raise nose.SkipTest('no scipy') self._check_moment_func(mom.rolling_kurt, lambda x: kurtosis(x, bias=False)) def test_fperr_robustness(self): # TODO: remove this once python 2.5 out of picture if PY3: raise nose.SkipTest("doesn't work on python 3") # #2114 data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f<UUUUUU\x13@q\x1c\xc7q\x1c\xc7\xf9?\xf6\x12\xdaKh/\xe1?\xf2\xc3"e\xe0\xe9\xc6?\xed\xaf\x831+\x8d\xae?\xf3\x1f\xad\xcb\x1c^\x94?\x15\x1e\xdd\xbd>\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7<pj\xa0>m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>' arr = np.frombuffer(data, dtype='<f8') if sys.byteorder != "little": arr = arr.byteswap().newbyteorder() result = mom.rolling_sum(arr, 2) self.assertTrue((result[1:] >= 0).all()) result = mom.rolling_mean(arr, 2) self.assertTrue((result[1:] >= 0).all()) result = mom.rolling_var(arr, 2) self.assertTrue((result[1:] >= 0).all()) # #2527, ugh arr = np.array([0.00012456, 0.0003, 0]) result = mom.rolling_mean(arr, 1) self.assertTrue(result[-1] >= 0) result = mom.rolling_mean(-arr, 1) self.assertTrue(result[-1] <= 0) def _check_moment_func(self, func, static_comp, window=50, has_min_periods=True, has_center=True, has_time_rule=True, preserve_nan=True, fill_value=None, test_stable=False): self._check_ndarray(func, static_comp, window=window, has_min_periods=has_min_periods, preserve_nan=preserve_nan, has_center=has_center, fill_value=fill_value, test_stable=test_stable) self._check_structures(func, static_comp, has_min_periods=has_min_periods, has_time_rule=has_time_rule, fill_value=fill_value, has_center=has_center) def _check_ndarray(self, func, static_comp, window=50, has_min_periods=True, preserve_nan=True, has_center=True, fill_value=None, test_stable=False, test_window=True): result = func(self.arr, window) assert_almost_equal(result[-1], static_comp(self.arr[-50:])) if preserve_nan: assert(np.isnan(result[self._nan_locs]).all()) # excluding NaNs correctly arr = randn(50) arr[:10] = np.NaN arr[-10:] = np.NaN if has_min_periods: result = func(arr, 50, min_periods=30) assert_almost_equal(result[-1], static_comp(arr[10:-10])) # min_periods is working correctly result = func(arr, 20, min_periods=15) self.assertTrue(np.isnan(result[23])) self.assertFalse(np.isnan(result[24])) self.assertFalse(np.isnan(result[-6])) self.assertTrue(np.isnan(result[-5])) arr2 = randn(20) result = func(arr2, 10, min_periods=5) self.assertTrue(isnull(result[3])) self.assertTrue(notnull(result[4])) # min_periods=0 result0 = func(arr, 20, min_periods=0) result1 = func(arr, 20, min_periods=1) assert_almost_equal(result0, result1) else: result = func(arr, 50) assert_almost_equal(result[-1], static_comp(arr[10:-10])) if has_center: if has_min_periods: result = func(arr, 20, min_periods=15, center=True) expected = func(arr, 20, min_periods=15) else: result = func(arr, 20, center=True) expected = func(arr, 20) assert_almost_equal(result[1], expected[10]) if fill_value is None: self.assertTrue(np.isnan(result[-9:]).all()) else: self.assertTrue((result[-9:] == 0).all()) if has_min_periods: self.assertTrue(np.isnan(expected[23])) self.assertTrue(np.isnan(result[14])) self.assertTrue(np.isnan(expected[-5])) self.assertTrue(np.isnan(result[-14])) if test_stable: result = func(self.arr + 1e9, window) assert_almost_equal(result[-1], static_comp(self.arr[-50:] + 1e9)) # Test window larger than array, #7297 if test_window: if has_min_periods: for minp in (0, len(self.arr)-1, len(self.arr)): result = func(self.arr, len(self.arr)+1, min_periods=minp) expected = func(self.arr, len(self.arr), min_periods=minp) nan_mask = np.isnan(result) self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) nan_mask = ~nan_mask assert_almost_equal(result[nan_mask], expected[nan_mask]) else: result = func(self.arr, len(self.arr)+1) expected = func(self.arr, len(self.arr)) nan_mask = np.isnan(result) self.assertTrue(np.array_equal(nan_mask, np.isnan(expected))) nan_mask = ~nan_mask assert_almost_equal(result[nan_mask], expected[nan_mask]) def _check_structures(self, func, static_comp, has_min_periods=True, has_time_rule=True, has_center=True, fill_value=None): series_result = func(self.series, 50) tm.assert_isinstance(series_result, Series) frame_result = func(self.frame, 50) self.assertEqual(type(frame_result), DataFrame) # check time_rule works if has_time_rule: win = 25 minp = 10 if has_min_periods: series_result = func(self.series[::2], win, min_periods=minp, freq='B') frame_result = func(self.frame[::2], win, min_periods=minp, freq='B') else: series_result = func(self.series[::2], win, freq='B') frame_result = func(self.frame[::2], win, freq='B') last_date = series_result.index[-1] prev_date = last_date - 24 * datetools.bday trunc_series = self.series[::2].truncate(prev_date, last_date) trunc_frame = self.frame[::2].truncate(prev_date, last_date) assert_almost_equal(series_result[-1], static_comp(trunc_series)) assert_almost_equal(frame_result.xs(last_date), trunc_frame.apply(static_comp)) if has_center: if has_min_periods: minp = 10 series_xp = func(self.series, 25, min_periods=minp).shift(-12) frame_xp = func(self.frame, 25, min_periods=minp).shift(-12) series_rs = func(self.series, 25, min_periods=minp, center=True) frame_rs = func(self.frame, 25, min_periods=minp, center=True) else: series_xp = func(self.series, 25).shift(-12) frame_xp = func(self.frame, 25).shift(-12) series_rs = func(self.series, 25, center=True) frame_rs = func(self.frame, 25, center=True) if fill_value is not None: series_xp = series_xp.fillna(fill_value) frame_xp = frame_xp.fillna(fill_value) assert_series_equal(series_xp, series_rs) assert_frame_equal(frame_xp, frame_rs) def test_ewma(self): self._check_ew(mom.ewma) arr = np.zeros(1000) arr[5] = 1 result = mom.ewma(arr, span=100, adjust=False).sum() self.assertTrue(np.abs(result - 1) < 1e-2) s = Series([1.0, 2.0, 4.0, 8.0]) expected = Series([1.0, 1.6, 2.736842, 4.923077]) for f in [lambda s: mom.ewma(s, com=2.0, adjust=True), lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False), lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=True), ]: result = f(s) assert_series_equal(result, expected) expected = Series([1.0, 1.333333, 2.222222, 4.148148]) for f in [lambda s: mom.ewma(s, com=2.0, adjust=False), lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=False), lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=True), ]: result = f(s) assert_series_equal(result, expected) def test_ewma_nan_handling(self): s = Series([1.] + [np.nan] * 5 + [1.]) result = mom.ewma(s, com=5) assert_almost_equal(result, [1.] * len(s)) s = Series([np.nan] * 2 + [1.] + [np.nan] * 2 + [1.]) result = mom.ewma(s, com=5) assert_almost_equal(result, [np.nan] * 2 + [1.] * 4) # GH 7603 s0 = Series([np.nan, 1., 101.]) s1 = Series([1., np.nan, 101.]) s2 = Series([np.nan, 1., np.nan, np.nan, 101., np.nan]) com = 2. alpha = 1. / (1. + com) def simple_wma(s, w): return (s.multiply(w).cumsum() / w.cumsum()).fillna(method='ffill') for (s, adjust, ignore_na, w) in [ (s0, True, False, [np.nan, (1.0 - alpha), 1.]), (s0, True, True, [np.nan, (1.0 - alpha), 1.]), (s0, False, False, [np.nan, (1.0 - alpha), alpha]), (s0, False, True, [np.nan, (1.0 - alpha), alpha]), (s1, True, False, [(1.0 - alpha)**2, np.nan, 1.]), (s1, True, True, [(1.0 - alpha), np.nan, 1.]), (s1, False, False, [(1.0 - alpha)**2, np.nan, alpha]), (s1, False, True, [(1.0 - alpha), np.nan, alpha]), (s2, True, False, [np.nan, (1.0 - alpha)**3, np.nan, np.nan, 1., np.nan]), (s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1., np.nan]), (s2, False, False, [np.nan, (1.0 - alpha)**3, np.nan, np.nan, alpha, np.nan]), (s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]), ]: expected = simple_wma(s, Series(w)) result = mom.ewma(s, com=com, adjust=adjust, ignore_na=ignore_na) assert_series_equal(result, expected) if ignore_na is False: # check that ignore_na defaults to False result = mom.ewma(s, com=com, adjust=adjust) assert_series_equal(result, expected) def test_ewmvar(self): self._check_ew(mom.ewmvar) def test_ewmvol(self): self._check_ew(mom.ewmvol) def test_ewma_span_com_args(self): A = mom.ewma(self.arr, com=9.5) B = mom.ewma(self.arr, span=20) assert_almost_equal(A, B) self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20) self.assertRaises(Exception, mom.ewma, self.arr) def test_ewma_halflife_arg(self): A = mom.ewma(self.arr, com=13.932726172912965) B = mom.ewma(self.arr, halflife=10.0) assert_almost_equal(A, B) self.assertRaises(Exception, mom.ewma, self.arr, span=20, halflife=50) self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, halflife=50) self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, halflife=50) self.assertRaises(Exception, mom.ewma, self.arr) def test_ew_empty_arrays(self): arr = np.array([], dtype=np.float64) funcs = [mom.ewma, mom.ewmvol, mom.ewmvar] for f in funcs: result = f(arr, 3) assert_almost_equal(result, arr) def _check_ew(self, func): self._check_ew_ndarray(func) self._check_ew_structures(func) def _check_ew_ndarray(self, func, preserve_nan=False): result = func(self.arr, com=10) if preserve_nan: assert(np.isnan(result[self._nan_locs]).all()) # excluding NaNs correctly arr = randn(50) arr[:10] = np.NaN arr[-10:] = np.NaN # ??? check something # pass in ints result2 = func(np.arange(50), span=10) self.assertEqual(result2.dtype, np.float_) def _check_ew_structures(self, func): series_result = func(self.series, com=10) tm.assert_isinstance(series_result, Series) frame_result = func(self.frame, com=10) self.assertEqual(type(frame_result), DataFrame) # binary moments def test_rolling_cov(self): A = self.series B = A + randn(len(A)) result = mom.rolling_cov(A, B, 50, min_periods=25) assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) def test_rolling_cov_pairwise(self): self._check_pairwise_moment(mom.rolling_cov, 10, min_periods=5) def test_rolling_corr(self): A = self.series B = A + randn(len(A)) result = mom.rolling_corr(A, B, 50, min_periods=25) assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) # test for correct bias correction a = tm.makeTimeSeries() b = tm.makeTimeSeries() a[:5] = np.nan b[:10] = np.nan result = mom.rolling_corr(a, b, len(a), min_periods=1) assert_almost_equal(result[-1], a.corr(b)) def test_rolling_corr_pairwise(self): self._check_pairwise_moment(mom.rolling_corr, 10, min_periods=5) def _check_pairwise_moment(self, func, *args, **kwargs): panel = func(self.frame, *args, **kwargs) actual = panel.ix[:, 1, 5] expected = func(self.frame[1], self.frame[5], *args, **kwargs) tm.assert_series_equal(actual, expected) def test_flex_binary_moment(self): # GH3155 # don't blow the stack self.assertRaises(TypeError, mom._flex_binary_moment,5,6,None) def test_corr_sanity(self): #GH 3155 df = DataFrame( np.array( [[ 0.87024726, 0.18505595], [ 0.64355431, 0.3091617 ], [ 0.92372966, 0.50552513], [ 0.00203756, 0.04520709], [ 0.84780328, 0.33394331], [ 0.78369152, 0.63919667]]) ) res = mom.rolling_corr(df[0],df[1],5,center=True) self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res])) # and some fuzzing for i in range(10): df = DataFrame(np.random.rand(30,2)) res = mom.rolling_corr(df[0],df[1],5,center=True) try: self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res])) except: print(res) def test_flex_binary_frame(self): def _check(method): series = self.frame[1] res = method(series, self.frame, 10) res2 = method(self.frame, series, 10) exp = self.frame.apply(lambda x: method(series, x, 10)) tm.assert_frame_equal(res, exp) tm.assert_frame_equal(res2, exp) frame2 = self.frame.copy() frame2.values[:] = np.random.randn(*frame2.shape) res3 = method(self.frame, frame2, 10) exp = DataFrame(dict((k, method(self.frame[k], frame2[k], 10)) for k in self.frame)) tm.assert_frame_equal(res3, exp) methods = [mom.rolling_corr, mom.rolling_cov] for meth in methods: _check(meth) def test_ewmcov(self): self._check_binary_ew(mom.ewmcov) def test_ewmcov_pairwise(self): self._check_pairwise_moment(mom.ewmcov, span=10, min_periods=5) def test_ewmcorr(self): self._check_binary_ew(mom.ewmcorr) def test_ewmcorr_pairwise(self): self._check_pairwise_moment(mom.ewmcorr, span=10, min_periods=5) def _check_binary_ew(self, func): A = Series(randn(50), index=np.arange(50)) B = A[2:] + randn(48) A[:10] = np.NaN B[-10:] = np.NaN result = func(A, B, 20, min_periods=5) self.assertTrue(np.isnan(result.values[:15]).all()) self.assertFalse(np.isnan(result.values[15:]).any()) self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5) def test_expanding_apply(self): ser = Series([]) assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean())) def expanding_mean(x, min_periods=1, freq=None): return mom.expanding_apply(x, lambda x: x.mean(), min_periods=min_periods, freq=freq) self._check_expanding(expanding_mean, np.mean) def test_expanding_apply_args_kwargs(self): def mean_w_arg(x, const): return np.mean(x) + const df = DataFrame(np.random.rand(20, 3)) expected = mom.expanding_apply(df, np.mean) + 20. assert_frame_equal(mom.expanding_apply(df, mean_w_arg, args=(20,)), expected) assert_frame_equal(mom.expanding_apply(df, mean_w_arg, kwargs={'const' : 20}), expected) def test_expanding_corr(self): A = self.series.dropna() B = (A + randn(len(A)))[:-5] result = mom.expanding_corr(A, B) rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1) assert_almost_equal(rolling_result, result) def test_expanding_count(self): result = mom.expanding_count(self.series) assert_almost_equal(result, mom.rolling_count(self.series, len(self.series))) def test_expanding_quantile(self): result = mom.expanding_quantile(self.series, 0.5) rolling_result = mom.rolling_quantile(self.series, len(self.series), 0.5, min_periods=1) assert_almost_equal(result, rolling_result) def test_expanding_cov(self): A = self.series B = (A + randn(len(A)))[:-5] result = mom.expanding_cov(A, B) rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1) assert_almost_equal(rolling_result, result) def test_expanding_max(self): self._check_expanding(mom.expanding_max, np.max, preserve_nan=False) def test_expanding_cov_pairwise(self): result = mom.expanding_cov(self.frame) rolling_result = mom.rolling_cov(self.frame, len(self.frame), min_periods=1) for i in result.items: assert_almost_equal(result[i], rolling_result[i]) def test_expanding_corr_pairwise(self): result = mom.expanding_corr(self.frame) rolling_result = mom.rolling_corr(self.frame, len(self.frame), min_periods=1) for i in result.items: assert_almost_equal(result[i], rolling_result[i]) def test_expanding_cov_diff_index(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) result = mom.expanding_cov(s1, s2) expected = Series([None, None, 2.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) result = mom.expanding_cov(s1, s2a) assert_series_equal(result, expected) s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) result = mom.expanding_cov(s1, s2) expected = Series([None, None, None, 4.5]) assert_series_equal(result, expected) def test_expanding_corr_diff_index(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) result = mom.expanding_corr(s1, s2) expected = Series([None, None, 1.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) result = mom.expanding_corr(s1, s2a) assert_series_equal(result, expected) s1 = Series([7, 8, 10], index=[0, 1, 3]) s2 = Series([7, 9, 10], index=[0, 2, 3]) result = mom.expanding_corr(s1, s2) expected = Series([None, None, None, 1.]) assert_series_equal(result, expected) def test_rolling_cov_diff_length(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) result = mom.rolling_cov(s1, s2, window=3, min_periods=2) expected = Series([None, None, 2.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) result = mom.rolling_cov(s1, s2a, window=3, min_periods=2) assert_series_equal(result, expected) def test_rolling_corr_diff_length(self): # GH 7512 s1 = Series([1, 2, 3], index=[0, 1, 2]) s2 = Series([1, 3], index=[0, 2]) result = mom.rolling_corr(s1, s2, window=3, min_periods=2) expected = Series([None, None, 1.0]) assert_series_equal(result, expected) s2a = Series([1, None, 3], index=[0, 1, 2]) result = mom.rolling_corr(s1, s2a, window=3, min_periods=2) assert_series_equal(result, expected) def test_rolling_functions_window_non_shrinkage(self): # GH 7764 s = Series(range(4)) s_expected = Series(np.nan, index=s.index) df = DataFrame([[1,5], [3, 2], [3,9], [-1,0]], columns=['A','B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns) functions = [lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5), lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5), lambda x: mom.rolling_max(x, window=10, min_periods=5), lambda x: mom.rolling_min(x, window=10, min_periods=5), lambda x: mom.rolling_sum(x, window=10, min_periods=5), lambda x: mom.rolling_mean(x, window=10, min_periods=5), lambda x: mom.rolling_std(x, window=10, min_periods=5), lambda x: mom.rolling_var(x, window=10, min_periods=5), lambda x: mom.rolling_skew(x, window=10, min_periods=5), lambda x: mom.rolling_kurt(x, window=10, min_periods=5), lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5), lambda x: mom.rolling_median(x, window=10, min_periods=5), lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5), lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5), ] for f in functions: try: s_result = f(s) assert_series_equal(s_result, s_expected) df_result = f(df) assert_frame_equal(df_result, df_expected) except (ImportError): # scipy needed for rolling_window continue functions = [lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5), lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5), # rolling_corr_pairwise is depracated, so the following line should be deleted # when rolling_corr_pairwise is removed. lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5), ] for f in functions: df_result_panel = f(df) assert_panel_equal(df_result_panel, df_expected_panel) def test_expanding_cov_pairwise_diff_length(self): # GH 7512 df1 = DataFrame([[1,5], [3, 2], [3,9]], columns=['A','B']) df1a = DataFrame([[1,5], [3,9]], index=[0,2], columns=['A','B']) df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y']) df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y']) result1 = mom.expanding_cov(df1, df2, pairwise=True)[2] result2 = mom.expanding_cov(df1, df2a, pairwise=True)[2] result3 = mom.expanding_cov(df1a, df2, pairwise=True)[2] result4 = mom.expanding_cov(df1a, df2a, pairwise=True)[2] expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A','B'], columns=['X','Y']) assert_frame_equal(result1, expected) assert_frame_equal(result2, expected) assert_frame_equal(result3, expected) assert_frame_equal(result4, expected) def test_expanding_corr_pairwise_diff_length(self): # GH 7512 df1 = DataFrame([[1,2], [3, 2], [3,4]], columns=['A','B']) df1a = DataFrame([[1,2], [3,4]], index=[0,2], columns=['A','B']) df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y']) df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y']) result1 = mom.expanding_corr(df1, df2, pairwise=True)[2] result2 = mom.expanding_corr(df1, df2a, pairwise=True)[2] result3 = mom.expanding_corr(df1a, df2, pairwise=True)[2] result4 = mom.expanding_corr(df1a, df2a, pairwise=True)[2] expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A','B'], columns=['X','Y']) assert_frame_equal(result1, expected) assert_frame_equal(result2, expected) assert_frame_equal(result3, expected) assert_frame_equal(result4, expected) def test_rolling_skew_edge_cases(self): all_nan = Series([np.NaN] * 5) # yields all NaN (0 variance) d = Series([1] * 5) x = mom.rolling_skew(d, window=5) assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) x = mom.rolling_skew(d, window=2) assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 0.177994, 1.548824] d = Series([-1.50837035, -0.1297039 , 0.19501095, 1.73508164, 0.41941401]) expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824]) x = mom.rolling_skew(d, window=4) assert_series_equal(expected, x) def test_rolling_kurt_edge_cases(self): all_nan = Series([np.NaN] * 5) # yields all NaN (0 variance) d = Series([1] * 5) x = mom.rolling_kurt(d, window=5) assert_series_equal(all_nan, x) # yields all NaN (window too small) d = Series(np.random.randn(5)) x = mom.rolling_kurt(d, window=3) assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 1.224307, 2.671499] d = Series([-1.50837035, -0.1297039 , 0.19501095, 1.73508164, 0.41941401]) expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499]) x = mom.rolling_kurt(d, window=4) assert_series_equal(expected, x) def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, has_time_rule=True, preserve_nan=True): result = func(self.arr) assert_almost_equal(result[10], static_comp(self.arr[:11])) if preserve_nan: assert(np.isnan(result[self._nan_locs]).all()) arr = randn(50) if has_min_periods: result = func(arr, min_periods=30) assert(np.isnan(result[:29]).all()) assert_almost_equal(result[-1], static_comp(arr[:50])) # min_periods is working correctly result = func(arr, min_periods=15) self.assertTrue(np.isnan(result[13])) self.assertFalse(np.isnan(result[14])) arr2 = randn(20) result = func(arr2, min_periods=5) self.assertTrue(isnull(result[3])) self.assertTrue(notnull(result[4])) # min_periods=0 result0 = func(arr, min_periods=0) result1 = func(arr, min_periods=1) assert_almost_equal(result0, result1) else: result = func(arr) assert_almost_equal(result[-1], static_comp(arr[:50])) def _check_expanding_structures(self, func): series_result = func(self.series) tm.assert_isinstance(series_result, Series) frame_result = func(self.frame) self.assertEqual(type(frame_result), DataFrame) def _check_expanding(self, func, static_comp, has_min_periods=True, has_time_rule=True, preserve_nan=True): self._check_expanding_ndarray(func, static_comp, has_min_periods=has_min_periods, has_time_rule=has_time_rule, preserve_nan=preserve_nan) self._check_expanding_structures(func) def test_rolling_max_gh6297(self): """Replicate result expected in GH #6297""" indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 2 datapoints on one of the days indices.append(datetime(1975, 1, 3, 6, 0)) series = Series(range(1, 7), index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D') assert_series_equal(expected, x) def test_rolling_max_how_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) indices.append(datetime(1975, 1, 5, 1)) indices.append(datetime(1975, 1, 5, 2)) series = Series(list(range(0, 5)) + [10, 20], index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() # Default how should be max expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D') assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D', how='median') assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 v = (4.0+10.0+20.0)/3.0 expected = Series([0.0, 1.0, 2.0, 3.0, v], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_max(series, window=1, freq='D', how='mean') assert_series_equal(expected, x) def test_rolling_min_how_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) indices.append(datetime(1975, 1, 5, 1)) indices.append(datetime(1975, 1, 5, 2)) series = Series(list(range(0, 5)) + [10, 20], index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() # Default how should be min expected = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_min(series, window=1, freq='D') assert_series_equal(expected, x) def test_rolling_median_how_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] # So that we can have 3 datapoints on last day (4, 10, and 20) indices.append(datetime(1975, 1, 5, 1)) indices.append(datetime(1975, 1, 5, 2)) series = Series(list(range(0, 5)) + [10, 20], index=indices) # Use floats instead of ints as values series = series.map(lambda x: float(x)) # Sort chronologically series = series.sort_index() # Default how should be median expected = Series([0.0, 1.0, 2.0, 3.0, 10], index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) x = mom.rolling_median(series, window=1, freq='D') assert_series_equal(expected, x)
def test_dropna(self): # GH 13737 s = Series([pd.Period('2011-01', freq='M'), pd.Period('NaT', freq='M')]) tm.assert_series_equal(s.dropna(), Series([pd.Period('2011-01', freq='M')]))
class MySeries: def __init__(self, *args, **kwargs): self.x = Series(*args, **kwargs) self.values = self.x.values self.index = self.x.index def rolling_mean(self, *args, **kwargs): return MySeries(pd.rolling_mean(self.x, *args, **kwargs)) def rolling_count(self, *args, **kwargs): return MySeries(pd.rolling_count(self.x, *args, **kwargs)) def rolling_sum(self, *args, **kwargs): return MySeries(pd.rolling_sum(self.x, *args, **kwargs)) def rolling_median(self, *args, **kwargs): return MySeries(pd.rolling_median(self.x, *args, **kwargs)) def rolling_min(self, *args, **kwargs): return MySeries(pd.rolling_min(self.x, *args, **kwargs)) def rolling_max(self, *args, **kwargs): return MySeries(pd.rolling_max(self.x, *args, **kwargs)) def rolling_std(self, *args, **kwargs): return MySeries(pd.rolling_std(self.x, *args, **kwargs)) def rolling_var(self, *args, **kwargs): return MySeries(pd.rolling_var(self.x, *args, **kwargs)) def rolling_skew(self, *args, **kwargs): return MySeries(pd.rolling_skew(self.x, *args, **kwargs)) def rolling_kurtosis(self, *args, **kwargs): return MySeries(pd.rolling_kurtosis(self.x, *args, **kwargs)) def rolling_window(self, *args, **kwargs): return MySeries(pd.rolling_window(self.x, *args, **kwargs)) def cumprod(self, *args, **kwargs): return MySeries(self.x.cumprod(*args, **kwargs)) def cumsum(self, *args, **kwargs): return MySeries(self.x.cumsum(*args, **kwargs)) def diff(self, *args, **kwargs): return MySeries(self.x.diff(*args, **kwargs)) def div(self, *args, **kwargs): return MySeries(self.x.div(*args, **kwargs)) def mul(self, *args, **kwargs): return MySeries(self.x.mul(*args, **kwargs)) def add(self, *args, **kwargs): return MySeries(self.x.add(*args, **kwargs)) def dropna(self, *args, **kwargs): return MySeries(self.x.dropna(*args, **kwargs)) def fillna(self, *args, **kwargs): return MySeries(self.x.fillna(*args, **kwargs)) def floordiv(self, *args, **kwargs): return MySeries(self.x.floordiv(*args, **kwargs)) def mod(self, *args, **kwargs): return MySeries(self.x.mod(*args, **kwargs)) def nlargest(self, *args, **kwargs): return MySeries(self.x.nlargest(*args, **kwargs)) def nonzero(self, *args, **kwargs): return MySeries(self.x.nonzero(*args, **kwargs)) def nsmallest(self, *args, **kwargs): return MySeries(self.x.nsmallest(*args, **kwargs)) def pow(self, *args, **kwargs): return MySeries(self.x.pow(*args, **kwargs)) def rank(self, *args, **kwargs): return MySeries(self.x.rank(*args, **kwargs)) def round(self, *args, **kwargs): return MySeries(self.x.round(*args, **kwargs)) def shift(self, *args, **kwargs): return MySeries(self.x.shift(*args, **kwargs)) def sub(self, *args, **kwargs): return MySeries(self.x.sub(*args, **kwargs)) def abs(self, *args, **kwargs): return MySeries(self.x.abs(*args, **kwargs)) def clip(self, *args, **kwargs): return MySeries(self.x.clip(*args, **kwargs)) def clip_lower(self, *args, **kwargs): return MySeries(self.x.clip_lower(*args, **kwargs)) def clip_upper(self, *args, **kwargs): return MySeries(self.x.clip_upper(*args, **kwargs)) def interpolate(self, *args, **kwargs): return MySeries(self.x.interpolate(*args, **kwargs)) def resample(self, *args, **kwargs): return MySeries(self.x.resample(*args, **kwargs)) def replace(self, *args, **kwargs): return MySeries(self.x.replace(*args, **kwargs))