def test_dropna_empty(self):
        s = Series([])
        self.assertEqual(len(s.dropna()), 0)
        s.dropna(inplace=True)
        self.assertEqual(len(s), 0)

        # invalid axis
        self.assertRaises(ValueError, s.dropna, axis=1)
Example #2
0
    def test_dropna_empty(self):
        s = Series([])
        assert len(s.dropna()) == 0
        s.dropna(inplace=True)
        assert len(s) == 0

        # invalid axis
        pytest.raises(ValueError, s.dropna, axis=1)
Example #3
0
def pd_02():
    string_data=Series(['a','b','c',np.nan,'e',None])
    print string_data
    print string_data.isnull()
    print string_data.dropna()
    df=DataFrame(np.random.randn(7,3))
    df.ix[:4,1]=np.nan
    df.ix[:2,2]=np.nan
    print df
    print df.dropna()
    print df.fillna(0)
    print df.fillna({1:0.5,3:-1})
    print df
    df.fillna(0,inplace=True)
    print df
Example #4
0
class Dropna(object):

    params = ['int', 'datetime']
    param_names = ['dtype']

    def setup(self, dtype):
        N = 10**6
        data = {'int': np.random.randint(1, 10, N),
                'datetime': date_range('2000-01-01', freq='S', periods=N)}
        self.s = Series(data[dtype])
        if dtype == 'datetime':
            self.s[np.random.randint(1, N, 100)] = NaT

    def time_dropna(self, dtype):
        self.s.dropna()
Example #5
0
def kama(x, n=10, pow1=2, pow2=30):
    """KAMA: Kaufmans Adaptive Moving Average.

    Params:
        x (Series): Time series data such as close prices.

        n (int): number of periods for the Efficiency Ratio (ER).

        pow1 (int): number of periods for the fastest EMA constant.

        pow2 (int): number of periods for the slowest EMA constant.

    Returns:
        Series: Kaufmans adaptive moving average of x.
    """

    nan_count = x[pd.isnull(x)].size
    x = Series(x.dropna().values, name = x.name, index = x.index)

    change = (x - x.shift(n)).abs()
    volatility = (x - x.shift(1)).abs().rolling(window=n).sum()
    er = change / volatility
    sc = (er * (2.0 /(pow1 + 1.0) - 2.0 / (pow2 + 1.0)) + 2.0 / (pow2 + 1.0)) ** 2.0

    kama = [np.nan] * sc.size
    first_value = True
    for i in range(len(kama)):
        if not pd.isnull(sc[i]):
            if first_value:
                kama[i] = x[i]
                first_value = False
            else:
                kama[i] = kama[i-1] + sc[i] * (x[i] - kama[i-1])

    return Series(data = [np.nan] * nan_count + kama, name = "kama(%d,%d,%d)" % (n, pow1, pow2), index = x.index)
Example #6
0
    def test_dropna_intervals(self):
        s = Series([np.nan, 1, 2, 3], IntervalIndex.from_arrays(
            [np.nan, 0, 1, 2],
            [np.nan, 1, 2, 3]))

        result = s.dropna()
        expected = s.iloc[1:]
        assert_series_equal(result, expected)
def test_bins_unequal_len():
    # GH3011
    series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
    bins = pd.cut(series.dropna().values, 4)

    # len(bins) != len(series) here
    with pytest.raises(ValueError):
        series.groupby(bins).mean()
Example #8
0
    def contains_op(cls, series: pd.Series) -> bool:
        if not pdt.is_datetime64_any_dtype(series):
            return False

        temp_series = series.dropna().dt
        time_val_map = {"hour": 0, "minute": 0, "second": 0}
        return all(
            getattr(temp_series, time_part).eq(val).all()
            for time_part, val in time_val_map.items())
 def test_isnull_for_inf(self):
     s = Series(['a', np.inf, np.nan, 1.0])
     with pd.option_context('mode.use_inf_as_null', True):
         r = s.isnull()
         dr = s.dropna()
     e = Series([False, True, True, False])
     de = Series(['a', 1.0], index=[0, 3])
     tm.assert_series_equal(r, e)
     tm.assert_series_equal(dr, de)
Example #10
0
def test_bins_unequal_len():
    # GH3011
    series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
    bins = pd.cut(series.dropna().values, 4)

    # len(bins) != len(series) here
    msg = r"Length of grouper \(8\) and axis \(10\) must be same length"
    with pytest.raises(ValueError, match=msg):
        series.groupby(bins).mean()
Example #11
0
 def test_isna_for_inf(self):
     s = Series(["a", np.inf, np.nan, pd.NA, 1.0])
     with pd.option_context("mode.use_inf_as_na", True):
         r = s.isna()
         dr = s.dropna()
     e = Series([False, True, True, True, False])
     de = Series(["a", 1.0], index=[0, 4])
     tm.assert_series_equal(r, e)
     tm.assert_series_equal(dr, de)
Example #12
0
def get_common_names(
    names: pd.Series,
    token: str,
    add_supplied_names: bool = False,
    add_source: bool = False,
    expand: bool = True,
):
    """
    Gets common names for multiple species using the IUCN API.

    Parameters
    ----------
    names : list, Series or str
        Scientific name(s) to get results for.
    token : str
        IUCN API authentication token.
    add_supplied_names : bool
        Add supplied scientific names column to the resulting DataFrame.
    add_source : bool
        Add source column to the resulting DataFrame.
    expand : bool
        Whether to expand result rows to match `names` size. If False,
        the number of rows will correspond to the number of unique names
        in `names`.

    Returns
    -------
    DataFrame
        DataFrame with common names.

    """
    if isinstance(names, (list, str)):
        names = pd.Series(names)

    endpoint = urljoin(API_URL, "species/common_names/")
    df = pd.DataFrame()

    unique_names = names.dropna().unique()
    for name in unique_names:
        response = _request(urljoin(endpoint, name), token)
        if response.json().get("result"):
            result = defaultdict(list)
            for item in response.json().get("result"):
                result[item["language"]].append(item["taxonname"])
            result = pd.Series(result)
        else:
            result = pd.Series([], dtype="object")
        df = df.append(result, ignore_index=True)

    if add_supplied_names:
        df["supplied_name"] = unique_names
    if add_source:
        df["source"] = "IUCN"
    if expand:
        df = expand_result(df, names)

    return df
Example #13
0
    def __init__(self, data: Series):
        """
        Create a new Count distribution.

        :param data: pandas Series.
        """
        data = data.dropna()
        self._data: Series = data.astype(int)
        self._categories = list(range(self._data.min(), self._data.max() + 1))
Example #14
0
    def test_groupby_bins_unequal_len(self):
        # GH3011
        series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
        bins = pd.cut(series.dropna().values, 4)

        # len(bins) != len(series) here
        def f():
            series.groupby(bins).mean()
        self.assertRaises(ValueError, f)
Example #15
0
def unique(x: pd.Series) -> Any:
    """Return single unique value (or error if none exists)."""
    x = x.dropna()
    if x.empty:
        return np.nan
    uniques = x.unique()
    if uniques.size == 1:
        return uniques[0]
    raise AggregationError("Not unique.")
Example #16
0
    def inner(
        config: Settings, series: pd.Series, state: dict, *args, **kwargs
    ) -> bool:
        if series.hasnans:
            series = series.dropna()
            if series.empty:
                return False

        return fn(config, series, state, *args, **kwargs)
Example #17
0
 def test_isnull_for_inf(self):
     s = Series(['a', np.inf, np.nan, 1.0])
     with pd.option_context('mode.use_inf_as_null', True):
         r = s.isnull()
         dr = s.dropna()
     e = Series([False, True, True, False])
     de = Series(['a', 1.0], index=[0, 3])
     tm.assert_series_equal(r, e)
     tm.assert_series_equal(dr, de)
Example #18
0
    def test_dropna_intervals(self):
        ser = Series(
            [np.nan, 1, 2, 3],
            IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]),
        )

        result = ser.dropna()
        expected = ser.iloc[1:]
        tm.assert_series_equal(result, expected)
Example #19
0
 def sanitize_data(self, series: pd.Series) -> Optional[pd.Series]:
     series = series.dropna()
     series = series.sort_index()
     # Less than 2 points are not visible
     if len(series) < 2:
         return None
     # Make timestamp unique and use mean of values on duplicates
     series = series.groupby(level=0).mean()
     return series
Example #20
0
    def _validate_data(self, data: Series):

        errors = []
        for unique_val in data.dropna().unique():
            if unique_val not in self.category_names:
                errors.append(
                    f'"{unique_val}" is not in categories for "{self.name}".')
        if errors:
            raise ValueError('\n'.join(errors))
Example #21
0
    def test_datetime64_tz_dropna(self):
        # DatetimeBlock
        s = Series([Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-03 10:00"), pd.NaT])
        result = s.dropna()
        expected = Series([Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")], index=[0, 2])
        self.assert_series_equal(result, expected)

        # DatetimeBlockTZ
        idx = pd.DatetimeIndex(["2011-01-01 10:00", pd.NaT, "2011-01-03 10:00", pd.NaT], tz="Asia/Tokyo")
        s = pd.Series(idx)
        self.assertEqual(s.dtype, "datetime64[ns, Asia/Tokyo]")
        result = s.dropna()
        expected = Series(
            [Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-03 10:00", tz="Asia/Tokyo")],
            index=[0, 2],
        )
        self.assertEqual(result.dtype, "datetime64[ns, Asia/Tokyo]")
        self.assert_series_equal(result, expected)
Example #22
0
class Dropna(object):

    goal_time = 0.2
    params = ['int', 'datetime']
    param_names = ['dtype']

    def setup(self, dtype):
        N = 10**6
        data = {
            'int': np.random.randint(1, 10, N),
            'datetime': date_range('2000-01-01', freq='S', periods=N)
        }
        self.s = Series(data[dtype])
        if dtype == 'datetime':
            self.s[np.random.randint(1, N, 100)] = NaT

    def time_dropna(self, dtype):
        self.s.dropna()
Example #23
0
    def test_groupby_bins_unequal_len(self):
        # GH3011
        series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
        bins = pd.cut(series.dropna().values, 4)

        # len(bins) != len(series) here
        def f():
            series.groupby(bins).mean()
        pytest.raises(ValueError, f)
Example #24
0
    def __init__(self, data: Series):
        """
        Create a new Ordinal distribution.

        :param data: Categorical pandas Series.
        """
        data = data.dropna()
        self._data: Series = data
        self._categories: List[str] = data.cat.categories.to_list()
Example #25
0
    def get_problem_type(y: Series):
        """ Identifies which type of prediction problem we are interested in (if user has not specified).
            Ie. binary classification, multi-class classification, or regression.
        """
        if len(y) == 0:
            raise ValueError("provided labels cannot have length = 0")
        y = y.dropna()  # Remove missing values from y (there should not be any though as they were removed in Learner.general_data_processing())
        num_rows = len(y)

        unique_values = y.unique()
        unique_count = len(unique_values)
        logger.log(20, f'Here are the first 10 unique label values in your data:  {unique_values[:10]}')

        MULTICLASS_LIMIT = 1000  # if numeric and class count would be above this amount, assume it is regression
        if num_rows > 1000:
            REGRESS_THRESHOLD = 0.05  # if the unique-ratio is less than this, we assume multiclass classification, even when labels are integers
        else:
            REGRESS_THRESHOLD = 0.1

        if unique_count == 2:
            problem_type = BINARY
            reason = "only two unique label-values observed"
        elif unique_values.dtype == 'object':
            problem_type = MULTICLASS
            reason = "dtype of label-column == object"
        elif np.issubdtype(unique_values.dtype, np.floating):
            unique_ratio = unique_count / float(num_rows)
            if (unique_ratio <= REGRESS_THRESHOLD) and (unique_count <= MULTICLASS_LIMIT):
                try:
                    can_convert_to_int = np.array_equal(y, y.astype(int))
                    if can_convert_to_int:
                        problem_type = MULTICLASS
                        reason = "dtype of label-column == float, but few unique label-values observed and label-values can be converted to int"
                    else:
                        problem_type = REGRESSION
                        reason = "dtype of label-column == float and label-values can't be converted to int"
                except:
                    problem_type = REGRESSION
                    reason = "dtype of label-column == float and label-values can't be converted to int"
            else:
                problem_type = REGRESSION
                reason = "dtype of label-column == float and many unique label-values observed"
        elif np.issubdtype(unique_values.dtype, np.integer):
            unique_ratio = unique_count / float(num_rows)
            if (unique_ratio <= REGRESS_THRESHOLD) and (unique_count <= MULTICLASS_LIMIT):
                problem_type = MULTICLASS  # TODO: Check if integers are from 0 to n-1 for n unique values, if they have a wide spread, it could still be regression
                reason = "dtype of label-column == int, but few unique label-values observed"
            else:
                problem_type = REGRESSION
                reason = "dtype of label-column == int and many unique label-values observed"
        else:
            raise NotImplementedError('label dtype', unique_values.dtype, 'not supported!')
        logger.log(25, f"AutoGluon infers your prediction problem is: {problem_type}  (because {reason}).")
        logger.log(25, f"If this is wrong, please specify `problem_type` argument in fit() instead "
                       f"(You may specify problem_type as one of: [{BINARY, MULTICLASS, REGRESSION}])\n")
        return problem_type
Example #26
0
    def inner(series: pd.Series, state: dict, *args, **kwargs) -> bool:
        if "hasnans" not in state:
            state["hasnans"] = series.hasnans

        if state["hasnans"]:
            series = series.dropna()
            if series.empty:
                return False

        return fn(series, state, *args, **kwargs)
Example #27
0
def reducer(x: pd.Series) -> float:
    """Reduces the (sum, count) tuple series to get the mean of each
    variable."""
    try:
        y = list(map(sum, zip(*x)))
        return round(y[0] / y[1], 5)
    except TypeError:
        if x.notnull().any():
            y = list(map(sum, zip(*x.dropna())))
            return round(y[0] / y[1], 5)
Example #28
0
def compare_delta_metrics(s: pd.Series, thresholds: list):
    no_nan = s.dropna()

    return {
        f"on-time rate (at most {thresholds[0]} minutes late)": len(no_nan[(no_nan <= thresholds[0]) & (no_nan >= 0)])/len(no_nan) * 100,
        "early rate": len(no_nan[no_nan < 0])/len(no_nan) * 100,
        f"gap percentage (more than {thresholds[0]} minutes late)": len(no_nan[no_nan > thresholds[0]])/len(no_nan) * 100,
        f"late percentage (between {thresholds[0]} and {thresholds[1]} minutes late)": len(no_nan[(no_nan > thresholds[0]) & (no_nan <= thresholds[1])])/len(no_nan) * 100,
        f"very late percentage (more than {thresholds[1]} minutes late)": len(no_nan[no_nan > thresholds[1]])/len(no_nan) * 100
    }
def problem_type(labels: pd.Series) -> ProblemType:
    """
    :returns: problem type according to heuristics on the labels. So far only binary classification is supported.
    """
    # TODO: add other problem types
    labels = labels.dropna()
    n_unique = labels.unique().size
    if n_unique == 2:
        return ProblemType.BINARY
    return ProblemType.OTHER
Example #30
0
    def profile_named_entity(column: pd.Series) -> typing.List[str]:
        """Profiling this named entities column, use when this column is marked as a named entities column.

        Args:
            column: pandas Series column.

        Returns:
            list of named entities string
        """
        return column.dropna().unique().astype(str).tolist()
Example #31
0
    def contains_op(cls, series: pd.Series) -> bool:
        # TODO: without the object check this passes string categories... is there a better way?
        if not pdt.is_object_dtype(series):
            return False
        elif series.hasnans:
            series = series.dropna()
            if series.empty:
                return False

        return all(isinstance(v, str) for v in series)
Example #32
0
def concat(series: pd.Series):
    """
    Args:
        series (pd.Series):
    """
    series = series.dropna().astype(str)
    if not series.empty:
        return "|".join(series)
    else:
        return None
Example #33
0
 def test_dropna_pos_args_deprecation(self):
     # https://github.com/pandas-dev/pandas/issues/41485
     ser = Series([1, 2, 3])
     msg = (
         r"In a future version of pandas all arguments of Series\.dropna "
         r"will be keyword-only")
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = ser.dropna(0)
     expected = Series([1, 2, 3])
     tm.assert_series_equal(result, expected)
Example #34
0
def concat_uniques(series: pd.Series):
    """ An aggregation custom function to be applied to each column of a groupby
    Args:
        series (pd.Series):
    """
    series_str = series.dropna().astype(str)
    if not series_str.empty:
        return "|".join(series_str.unique())
    else:
        return None
Example #35
0
 def make_plot(
         series: pd.Series,
         axes,  #: matplotlib.axes.Axes
         title: str):
     # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.probplot.html
     (x, y), (m, b, r) = sp.stats.probplot(series.dropna())
     color = palette_dtypes.get(series.dtype, '#9b59b6')
     axes.plot(x, y, color=color, marker='|', linestyle='')
     axes.plot(x, m * x + b, color='#4b4b4b', linestyle='-', linewidth=.9)
     axes.set_title(title, fontsize=10)
def _plot_qq(data: pd.Series = None, dist=stats.norm) -> go.Figure:
    """

    :param data:
    :param dist:
    :return:
    """
    fig, ax = plt.subplots(figsize=(8, 5))
    _mpl_fig = sm.qqplot(data.dropna(), dist, fit=True, line="45", ax=ax)
    return tls.mpl_to_plotly(_mpl_fig)
Example #37
0
def squeeze(
    daily: pd.Series,
    rate: pd.Series,
    day_shift: int,
    population: pd.Series,
    cross_variant_immunity: float,
    escape_variant_prevalence: pd.Series,
    vaccine_coverage: pd.DataFrame,
    ceiling: float = CEILING,
) -> pd.Series:
    daily_infections = (daily / rate).dropna().rename('infections')
    daily_infections += 1
    daily_infections = daily_infections.reset_index()
    daily_infections['date'] -= pd.Timedelta(days=day_shift)
    daily_infections = daily_infections.set_index(['location_id',
                                                   'date']).loc[:,
                                                                'infections']

    escape_variant_prevalence = (pd.concat(
        [daily_infections, escape_variant_prevalence], axis=1))
    escape_variant_prevalence = escape_variant_prevalence.fillna(0)
    escape_variant_prevalence = (
        escape_variant_prevalence.loc[daily_infections.index,
                                      'escape_variant_prevalence'])

    non_ev_infections = daily_infections * (1 - escape_variant_prevalence)
    ev_infections = daily_infections * escape_variant_prevalence
    repeat_infections = (1 - cross_variant_immunity) * (
        non_ev_infections.cumsum() / population).clip(0, 1) * ev_infections
    first_infections = daily_infections - repeat_infections

    cumul_infections = daily_infections.dropna().groupby(level=0).cumsum()
    seroprevalence = first_infections.dropna().groupby(level=0).cumsum()

    vaccinations = vaccine_coverage.join(
        daily, how='right')['cumulative_all_effective'].fillna(0)
    daily_vaccinations = vaccinations.groupby(
        level=0).diff().fillna(vaccinations)
    eff_daily_vaccinations = daily_vaccinations * (
        1 - seroprevalence / population).clip(0, 1)
    eff_vaccinations = eff_daily_vaccinations.groupby(level=0).cumsum()

    non_suscept = seroprevalence + eff_vaccinations
    max_non_suscept = non_suscept.groupby(level=0).max()
    max_sero = seroprevalence.groupby(level=0).max()

    limits = population * ceiling

    excess_non_suscept = (max_non_suscept - limits).clip(0, np.inf)
    excess_scaling_factor = (max_sero - excess_non_suscept).clip(
        0, np.inf) / max_sero

    rate = (rate / excess_scaling_factor).fillna(rate)

    return rate.dropna()
Example #38
0
def main():
    """
    Handling of not applicable values
    """

    string_data = Series(['aardvark', 'artichoke', np.nan, 'avocado'])
    print string_data
    print string_data.isnull()
    string_data[0] = None
    print string_data.isnull()
    print None is np.nan, None == np.nan # not same

    # Exclude N/A
    print '',''
    NA = np.nan
    data = Series([1, NA, 3.5, NA, 7])
    print data.dropna()
    print data[data.notnull()]

    data = DataFrame([
        [1., 6.5, 3.],
        [1., NA, NA],
        [NA, NA, NA],
        [NA, 6.5, 3.]
    ])
    cleaned = data.dropna() # row that all value is not NA
    print data
    print cleaned
    print data.dropna(how='all')
    data[4] = None
    print data.dropna(axis=1, how='all')
    print data.dropna(thresh=2) # non NA is more 2

    # Fill NA
    print '',''
    print data.fillna(0)
    print data.fillna({1: 0.5, 2: -1})
    _ = data.fillna(0, inplace=True)
    print data
    print '',''
    df = DataFrame(np.arange(18).reshape((6, 3)))
    df.ix[2:, 1] = NA; df.ix[4:, 2] = NA
    print df
    print df.fillna(method='ffill')
    print df.fillna(method='ffill', limit=2)
    data = Series([1., NA, 3.5, NA, 7])
    print data.fillna(data.mean())
def describe_dc_as_dataframe(dc: pd.Series, ds_md: dict) -> pd.Series:
    """ describes the profile criteria for column
    Args:
        dc: the Series to create Profile for
        ds_md: the Metadata dictionary of the DataFrame that is to be profiled
    Returns:
        A Series containing calculated description values.
    """
    dc = pd.to_numeric(dc, errors='coerce')
    null_values = dc.isna().sum()
    unique_values = len(dc.dropna().unique()) / len(dc)
    constancy = dc.value_counts(normalize=True).max(
    )  #constancy defined as amount of most frequent value divided by amount of numbers in column

    dc_stats = [
        ["Metadaten spezifisch für Spalte",
         column_metadata(dc.name, ds_md)], ["Anzahl an Zeilen",
                                            len(dc)],
        ["Anzahl an fehlenden Werten", null_values],
        ["Fehlende Werte (Prozent)", (null_values / len(dc)) * TO_PERCENT],
        ["Distinkte Werte (Prozent)", unique_values * TO_PERCENT],
        ["Konstanz (Prozent)", constancy * TO_PERCENT],
        ["Mittelwert", format(dc.mean(), 'f')],
        [
            "Minimumwert (Jahr, Wert)",
            ({
                dc.idxmin().date(): format(dc.min(), 'f')
            } if len(dc.dropna()) > 0 else "")
        ],
        [
            "Maximumwert (Jahr, Wert)",
            ({
                dc.idxmax().date(): format(dc.max(), 'f')
            } if len(dc.dropna()) > 0 else "")
        ], ["Datenpunkte vorhanden für",
            check_is_consecutive(dc)]
    ]

    profile = pd.DataFrame(data=dc_stats, columns=["Kriterien", "Ergebnis"])
    profile.set_index("Kriterien", inplace=True)

    return profile
Example #40
0
def calculate_daily_scaling_factors(
    *,
    forecasted_daily_tests: pandas.Series,
    sparse_reported_totals: pandas.Series
) -> pandas.DataFrame:
    """ Scale the daily test counts per region coming from the Prophet forecast by the test count report 
    from OurWorldInData, which is available before the real daily testcounts are known.
    
    Parameters
    ----------
    forecasted_daily_tests: pandas.Series
        Series from the Prophet forecast containing the confirmed daily test counts
        sent from RKI privately as well as predicted test counts.
        Both data are scaled by the total reported tests by OurWorldInData (OWID!
    sparse_reported_totals : pandas.Series
        Series from OWID containing total test counts summarized for a period of time 
        (mostly one week) for all of Germany. It is expected to contain NaN gaps in the data.
        The differences between this report  and the forecast data will be used to make sure
        the total number of tests in the forecast  matches the OWID data.

    Returns
    -------
    correction_factor: pandas.DataFrame
        The scaling factor for all dates including the future.
    """
    assert isinstance(forecasted_daily_tests, pandas.Series)
    assert isinstance(sparse_reported_totals, pandas.Series)
    
    df_factors = pandas.DataFrame(
        index=forecasted_daily_tests.index,
        columns=["sum_predicted", "diff_reported", "scaling_factor"]
    )
    sum_dates = list(sparse_reported_totals.dropna().index)
    for dfrom, dto in zip(sum_dates[:-1], sum_dates[1:]):
        day = pandas.Timedelta("1D")
        interval = slice(dfrom + day, dto)
        # sum over the predictions in this inverval
        sum_predicted = forecasted_daily_tests.loc[dfrom + day : dto].sum()
        df_factors.loc[interval, ["sum_predicted"]] = sum_predicted

        # diff of the reports
        prevtot = float(sparse_reported_totals.loc[dfrom])
        nexttot = float(sparse_reported_totals.loc[dto])
        diff_reported = nexttot - prevtot
        df_factors.loc[interval, ["diff_reported"]] = diff_reported

    df_factors["scaling_factor"] = df_factors.diff_reported / df_factors.sum_predicted
    # extrapolate backwards at the beginning
    first = df_factors.dropna().iloc[0]
    df_factors.loc[:first.name, "scaling_factor"] = first.scaling_factor
    # continue into the future with the last known scaling factor
    last = df_factors.dropna().iloc[-1]
    df_factors.loc[last.name:, "scaling_factor"] = last.scaling_factor
    return df_factors
def check_if_regex_feature(X: Series, regex: str) -> bool:
    dtype = get_type_family_raw(X.dtype)
    if dtype not in ['category', 'object']:
        return False

    X = X.dropna()
    if len(X) > 100:
        # Sample to speed-up type inference
        X = X.sample(n=100, random_state=0)
    match = X.str.match(regex).all()
    return match
 def consecutive_wins_losses(self):
     '''
     Calculates the positive and negative runs in the trade series.
     '''
     trade_df = self.as_dataframe().sort_values(by = 'exit')
     win_loss = sign(trade_df.base_return)
     # Create series which has just 1's and 0's
     positive = Series(hstack(([0], ((win_loss > 0) * 1).values, [0])))
     negative = Series(hstack(([0], ((win_loss < 0) * 1).values, [0])))
     pos_starts = positive.where(positive.diff() > 0)
     pos_starts = Series(pos_starts.dropna().index.tolist())
     pos_ends = positive.where(positive.diff() < 0)
     pos_ends = Series(pos_ends.dropna().index.tolist())
     positive_runs = pos_ends - pos_starts
     neg_starts = negative.where(negative.diff() > 0)
     neg_starts = Series(neg_starts.dropna().index.tolist())
     neg_ends = negative.where(negative.diff() < 0)
     neg_ends = Series(neg_ends.dropna().index.tolist())
     negative_runs = neg_ends - neg_starts
     return (positive_runs, negative_runs)
    def test_datetime64_tz_dropna(self):
        # DatetimeBlock
        s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp(
            '2011-01-03 10:00'), pd.NaT])
        result = s.dropna()
        expected = Series([Timestamp('2011-01-01 10:00'),
                           Timestamp('2011-01-03 10:00')], index=[0, 2])
        self.assert_series_equal(result, expected)

        # DatetimeBlockTZ
        idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
                                '2011-01-03 10:00', pd.NaT],
                               tz='Asia/Tokyo')
        s = pd.Series(idx)
        self.assertEqual(s.dtype, 'datetime64[ns, Asia/Tokyo]')
        result = s.dropna()
        expected = Series([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
                           Timestamp('2011-01-03 10:00', tz='Asia/Tokyo')],
                          index=[0, 2])
        self.assertEqual(result.dtype, 'datetime64[ns, Asia/Tokyo]')
        self.assert_series_equal(result, expected)
 def daily_returns(self):
     '''
     Returns an unsorted and unorderd list of daily returns for all trades.
     Used for calculating daily or annualised statistics.
     '''
     if self._daily_returns is None:
         daily = self.trade_frame(compacted = False, cumulative = False)
         returns = []
         for col in daily:
             returns.extend(daily[col].tolist())
         returns = Series(returns)
         self._daily_returns = returns.dropna()
     return self._daily_returns
Example #45
0
    def test_isnull_for_inf_deprecated(self):
        # gh-17115
        s = Series(['a', np.inf, np.nan, 1.0])
        with tm.assert_produces_warning(DeprecationWarning,
                                        check_stacklevel=False):
            pd.set_option('mode.use_inf_as_null', True)
            r = s.isna()
            dr = s.dropna()
            pd.reset_option('mode.use_inf_as_null')

        e = Series([False, True, True, False])
        de = Series(['a', 1.0], index=[0, 3])
        tm.assert_series_equal(r, e)
        tm.assert_series_equal(dr, de)
    def regression(self, data_ser, instructions_ser: pd.Series):
        prices = data_ser.apply(lambda x: x.open)
        days_in_year = 252
        X = range(len(prices))
        A = sm.add_constant(X)
        sd = prices.std()
        Y = prices.values
        profittake = 1.96
        # Run regression y = ax + b
        results = sm.OLS(Y, A).fit()
        (b, a) = results.params
        # Normalized slope
        # slope = (a / b) * days_in_year  # Daily return regression * 1 year
        true_slope = (a / b) * self.lookback  # Daily return regression * 1 year
        slope = -true_slope # Daily return regression * 1 year
        # Currently how far away from regression line?
        delta = Y - (np.dot(a, X) + b)
        # Don't trade if the slope is near flat
        slope_min = 0.063 #0.252
        # Current gain if trading
        new_weight = np.NaN
        stop_price = np.NaN
        current_position = instructions_ser.dropna().apply(lambda x: x.risk).sum()
        # Long but slope turns down, then exit or Short but slope turns upward, then exit
        if (current_position > 0 and slope < 0) or (current_position < 0 and 0 < slope):
            new_weight = -current_position

        # Trend is up
        if slope > slope_min:
            # Price crosses the regression line
            if delta[-1] > 0 and delta[-2] < 0 and current_position == 0:
                stop_price = self.calculate_stop(data_ser, Direction.Short)
                new_weight = (-slope/10)
            # Profit take, reaches the top of 95% bollinger band
            if delta[-1] > profittake * sd and current_position > 0:
                new_weight = -current_position

        # Trend is down
        if slope < -slope_min:
            # Price crosses the regression line
            if delta[-1] < 0 and delta[-2] > 0 and current_position == 0:
                stop_price = self.calculate_stop(data_ser, Direction.Long)
                new_weight = (-slope/10)

            # Profit take, reaches the top of 95% bollinger band
            if delta[-1] < - profittake * sd and current_position < 0:
                new_weight = -current_position

        return (new_weight, stop_price, b, a, slope)
Example #47
0
    def test_value_counts(self):
        s = Series(['a', 'b', 'b', 'b', 'b', 'a', 'c', 'd', 'd', 'a'])
        hist = s.value_counts()
        expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
        assert_series_equal(hist, expected)

        # handle NA's properly
        s[5:7] = np.nan
        hist = s.value_counts()
        expected = s.dropna().value_counts()
        assert_series_equal(hist, expected)

        s = Series({})
        hist = s.value_counts()
        expected = Series([])
        assert_series_equal(hist, expected)
Example #48
0
def Main():
  client = github_helpers.authenticate()
  keywords = raw_input("Please, enter keywords to search repositories: ")
  if keywords is '':
    keywords = 'javascript'
    print 'No keywords provided. It will use the keyword: ' + keywords
  search = client.search_repositories(keywords)
  first_page = search.get_page(0)

  languages = Series(r.language for r in first_page)
  languages = languages.dropna()
  languages.sort()

  percentages = (100.0 * languages.value_counts() / len(languages)).map('{:,.2f} %'.format)

  print 'Languages percentage:'
  print percentages

  # Create plot
  x = [int(r.stargazers_count) for r in first_page]
  y = [int(r.forks) for r in first_page]

  # Add one to every value for logarithmic scale
  x = [val + 1 for val in x]
  y = [val + 1 for val in y]

  area = [100 for r in first_page]
  names = [r.name for r in first_page]
  colors = np.random.rand(len(first_page))
  pl.scatter(x, y, s=area, c=colors, alpha=0.5)
  for i in range(0, len(x)):
    pl.annotate(names[i], (x[i], y[i]), fontsize=2)
  pl.title("All values are with addition of 1 (for the logarithmic scale)")
  pl.xlabel("Stars")
  pl.xscale("log")
  pl.yscale("log")
  pl.ylabel("Forks")
  pl.tight_layout()
  filepath = 'reports/APIs/github'
  if not os.path.isdir(filepath): os.makedirs(filepath)
  filepath += '/search_repositories.png'
  pl.savefig(filepath, figsize=(1020, 1020), dpi=300)
  pl.close()
  print('A chart with high resolution and small font size (to minimize overlaps) was created at ' +
    filepath)
Example #49
0
def track(frames):
    """Track the orientation of a wire through many frames.

    Parameters
    ----------
    frames : an iterable, such as a list of images or a mr.Video
        object

    Returns
    -------
    Series of angles in degrees, indexed by frame
    """
    count = frames.count
    data = Series(index=range(1, count + 1))
    for i, img in enumerate(frames):
        data[i + 1] = analyze(img)
    data = data.dropna() # Discard unused rows.
    return data
Example #50
0
def ema(arg, window):
    """EMA: Exponential Moving Average.

    Params:
        arg (Series): Time series data such as close prices.

        window (int): Moving average window size.

    Returns:
        Series: Exponential moving average of arg.
    """
    arg = Series(arg.dropna().values, index = arg.index)
    ema = []
    w = 2.0 / (window + 1)
    ema.append(arg[0])
    for i in range(1, len(arg)):
        ema.append(arg[i] * w + ema[-1] * (1.0 - w))

    return Series(data = ema, name = "ema" + str(window), index = arg.index)
Example #51
0
    def test_comparison_operators_with_nas(self):
        ser = Series(bdate_range('1/1/2000', periods=10), dtype=object)
        ser[::2] = np.nan

        # test that comparisons work
        ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne']
        for op in ops:
            val = ser[5]

            f = getattr(operator, op)
            result = f(ser, val)

            expected = f(ser.dropna(), val).reindex(ser.index)

            if op == 'ne':
                expected = expected.fillna(True).astype(bool)
            else:
                expected = expected.fillna(False).astype(bool)

            assert_series_equal(result, expected)
Example #52
0
    def test_dropEmptyRows(self):
        N = len(self.frame.index)
        mat = random.randn(N)
        mat[:5] = nan

        frame = DataFrame({'foo': mat}, index=self.frame.index)
        original = Series(mat, index=self.frame.index, name='foo')
        expected = original.dropna()
        inplace_frame1, inplace_frame2 = frame.copy(), frame.copy()

        smaller_frame = frame.dropna(how='all')
        # check that original was preserved
        assert_series_equal(frame['foo'], original)
        inplace_frame1.dropna(how='all', inplace=True)
        assert_series_equal(smaller_frame['foo'], expected)
        assert_series_equal(inplace_frame1['foo'], expected)

        smaller_frame = frame.dropna(how='all', subset=['foo'])
        inplace_frame2.dropna(how='all', subset=['foo'], inplace=True)
        assert_series_equal(smaller_frame['foo'], expected)
        assert_series_equal(inplace_frame2['foo'], expected)
Example #53
0
    def test_comparison_operators_with_nas(self):
        s = Series(bdate_range('1/1/2000', periods=10), dtype=object)
        s[::2] = np.nan

        # test that comparisons work
        ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne']
        for op in ops:
            val = s[5]

            f = getattr(operator, op)
            result = f(s, val)

            expected = f(s.dropna(), val).reindex(s.index)

            if op == 'ne':
                expected = expected.fillna(True).astype(bool)
            else:
                expected = expected.fillna(False).astype(bool)

            assert_series_equal(result, expected)

            # fffffffuuuuuuuuuuuu
            # result = f(val, s)
            # expected = f(val, s.dropna()).reindex(s.index)
            # assert_series_equal(result, expected)

            # boolean &, |, ^ should work with object arrays and propagate NAs

        ops = ['and_', 'or_', 'xor']
        mask = s.isnull()
        for bool_op in ops:
            f = getattr(operator, bool_op)

            filled = s.fillna(s[0])

            result = f(s < s[9], s > s[3])

            expected = f(filled < filled[9], filled > filled[3])
            expected[mask] = False
            assert_series_equal(result, expected)
Example #54
0
def tile(s, bins, labels=False, retbins=True, infinite=True):
    # 
    if not np.iterable(bins):
        ind, label = cut(s, bins, retbins=retbins, labels=labels)
        # for now, pandas base cut doesn't support infinite ranges
        # so it bases first bin at 0 where we base on 1, and 0 is 
        # [-inf, first] for us
        ind = ind + 1
    else:
        bins = np.asarray(bins)
        if (np.diff(bins) < 0).any():
            raise ValueError('bins must increase monotonically.')
        ind, label = inf_bins_to_cuts(s, bins)
    

    # build out ranges
    ranges = []
    ranges.append(NumRange(-inf, label[0]))
    for x in range(len(label)-1):
       nr = NumRange(label[x], label[x+1]) 
       ranges.append(nr)
    ranges.append(NumRange(label[-1], inf))

    if not infinite:
        na_mask = (ind == 0) | (ind == len(bins))
        np.putmask(ind, na_mask, np.nan)

    # redo the intindex as range index
    new_index = ind.astype(object)
    ind = Series(ind)


    for k, v in ind.dropna().astype(int).iteritems():
        newr = ranges[v]
        new_index[k] = newr

    grouped = s.groupby(new_index, sort=True)
    return grouped
	
## drop(labels) drop elements with the selected labels from a Series.
s1 = Series(arange(1.0,6),index=["a","a","b","c","d"])
s1
s1.drop("a")
	################

	dropna() is similar to drop() except that it only drops null values – NaN or similar.

	
s1 = Series(arange(1.0,4.0),index=["a","b","c"])
s2 = Series(arange(1.0,4.0),index=["c","d","e"])
s3 = s1 + s2
s3
s3.dropna()

##############################################################################

#### fillna

##  fillna(value) fills all null values in a series with a specific value.


s1 = Series(arange(1.0,4.0),index=["a","b","c"])
s2 = Series(arange(1.0,4.0),index=["c","d","e"])
s3 = s1 + s2
s3.fillna(1.0)

################
Example #56
0
 def test_axis_alias(self):
     s = Series([1, 2, np.nan])
     assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))
     assert s.dropna().sum('rows') == 3
     assert s._get_axis_number('rows') == 0
     assert s._get_axis_name('rows') == 'index'
Example #57
0
s1 = Series([7.3, -2.5, 3.4, 1.5], index=[(1,2), (2,3), (3,4), (4,5)])
s1
s2 = Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])

string_data = Series(['aardvark', 'artichoke', np.nan, 'avocado'])
string_data
string_data.isnull()
string_data.dropna()

# 3 ways of doing pairwise correlation between two data frames
# using pandas/python

import pandas as pd
import numpy as np
from pandas import Series, DataFrame
df2 = DataFrame([list("aabbb"), list("12123")]).T
df1 = DataFrame(np.random.randn(15).reshape(5,3), index=[df2.ix[:, 0], df2.ix[:, 1]], columns=list("def"))
df3 = DataFrame(np.random.randn(20).reshape(5,4), index=[df2.ix[:, 0], df2.ix[:, 1]], columns=list("ghij"))
type(df3)

def pairwise_corr(df1, df2):
    """
    Pairwise correlation between columns of two data frames
    :param df1:
    :type df1: pandas.core.frame.DataFrame
    :param df2:
    :type df2: pandas.core.frame.DataFrame
    :return:
    :rtype: pandas.core.frame.DataFrame
    """
    res = []
Example #58
0
class TestMoments(tm.TestCase):

    _multiprocess_can_split_ = True

    _nan_locs = np.arange(20, 40)
    _inf_locs = np.array([])

    def setUp(self):
        arr = randn(N)
        arr[self._nan_locs] = np.NaN

        self.arr = arr
        self.rng = bdate_range(datetime(2009, 1, 1), periods=N)

        self.series = Series(arr.copy(), index=self.rng)

        self.frame = DataFrame(randn(N, K), index=self.rng,
                               columns=np.arange(K))

    def test_centered_axis_validation(self):
        # ok
        mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0)
        # bad axis
        self.assertRaises(ValueError, mom.rolling_mean,Series(np.ones(10)),3,center=True ,axis=1)

        # ok ok
        mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=0)
        mom.rolling_mean(DataFrame(np.ones((10,10))),3,center=True ,axis=1)
        # bad axis
        self.assertRaises(ValueError, mom.rolling_mean,DataFrame(np.ones((10,10))),3,center=True ,axis=2)

    def test_rolling_sum(self):
        self._check_moment_func(mom.rolling_sum, np.sum)

    def test_rolling_count(self):
        counter = lambda x: np.isfinite(x).astype(float).sum()
        self._check_moment_func(mom.rolling_count, counter,
                                has_min_periods=False,
                                preserve_nan=False,
                                fill_value=0)

    def test_rolling_mean(self):
        self._check_moment_func(mom.rolling_mean, np.mean)

    def test_cmov_mean(self):
        tm._skip_if_no_scipy()
        try:
            from scikits.timeseries.lib import cmov_mean
        except ImportError:
            raise nose.SkipTest("no scikits.timeseries")

        vals = np.random.randn(10)
        xp = cmov_mean(vals, 5)

        rs = mom.rolling_mean(vals, 5, center=True)
        assert_almost_equal(xp.compressed(), rs[2:-2])
        assert_almost_equal(xp.mask, np.isnan(rs))

        xp = Series(rs)
        rs = mom.rolling_mean(Series(vals), 5, center=True)
        assert_series_equal(xp, rs)

    def test_cmov_window(self):
        tm._skip_if_no_scipy()
        try:
            from scikits.timeseries.lib import cmov_window
        except ImportError:
            raise nose.SkipTest("no scikits.timeseries")

        vals = np.random.randn(10)
        xp = cmov_window(vals, 5, 'boxcar')

        rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
        assert_almost_equal(xp.compressed(), rs[2:-2])
        assert_almost_equal(xp.mask, np.isnan(rs))

        xp = Series(rs)
        rs = mom.rolling_window(Series(vals), 5, 'boxcar', center=True)
        assert_series_equal(xp, rs)

    def test_cmov_window_corner(self):
        tm._skip_if_no_scipy()
        try:
            from scikits.timeseries.lib import cmov_window
        except ImportError:
            raise nose.SkipTest("no scikits.timeseries")

        # all nan
        vals = np.empty(10, dtype=float)
        vals.fill(np.nan)
        rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
        self.assertTrue(np.isnan(rs).all())

        # empty
        vals = np.array([])
        rs = mom.rolling_window(vals, 5, 'boxcar', center=True)
        self.assertEqual(len(rs), 0)

        # shorter than window
        vals = np.random.randn(5)
        rs = mom.rolling_window(vals, 10, 'boxcar')
        self.assertTrue(np.isnan(rs).all())
        self.assertEqual(len(rs), 5)

    def test_cmov_window_frame(self):
        tm._skip_if_no_scipy()
        try:
            from scikits.timeseries.lib import cmov_window
        except ImportError:
            raise nose.SkipTest("no scikits.timeseries")

        # DataFrame
        vals = np.random.randn(10, 2)
        xp = cmov_window(vals, 5, 'boxcar')
        rs = mom.rolling_window(DataFrame(vals), 5, 'boxcar', center=True)
        assert_frame_equal(DataFrame(xp), rs)

    def test_cmov_window_na_min_periods(self):
        tm._skip_if_no_scipy()
        try:
            from scikits.timeseries.lib import cmov_window
        except ImportError:
            raise nose.SkipTest("no scikits.timeseries")

        # min_periods
        vals = Series(np.random.randn(10))
        vals[4] = np.nan
        vals[8] = np.nan

        xp = mom.rolling_mean(vals, 5, min_periods=4, center=True)
        rs = mom.rolling_window(vals, 5, 'boxcar', min_periods=4, center=True)

        assert_series_equal(xp, rs)

    def test_cmov_window_regular(self):
        tm._skip_if_no_scipy()
        try:
            from scikits.timeseries.lib import cmov_window
        except ImportError:
            raise nose.SkipTest("no scikits.timeseries")

        win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
                     'blackmanharris', 'nuttall', 'barthann']
        for wt in win_types:
            vals = np.random.randn(10)
            xp = cmov_window(vals, 5, wt)

            rs = mom.rolling_window(Series(vals), 5, wt, center=True)
            assert_series_equal(Series(xp), rs)

    def test_cmov_window_special(self):
        tm._skip_if_no_scipy()
        try:
            from scikits.timeseries.lib import cmov_window
        except ImportError:
            raise nose.SkipTest("no scikits.timeseries")

        win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian']
        kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.},
                {'width': 0.5}]

        for wt, k in zip(win_types, kwds):
            vals = np.random.randn(10)
            xp = cmov_window(vals, 5, (wt,) + tuple(k.values()))

            rs = mom.rolling_window(Series(vals), 5, wt, center=True,
                                    **k)
            assert_series_equal(Series(xp), rs)

    def test_rolling_median(self):
        self._check_moment_func(mom.rolling_median, np.median)

    def test_rolling_min(self):
        self._check_moment_func(mom.rolling_min, np.min)

        a = np.array([1, 2, 3, 4, 5])
        b = mom.rolling_min(a, window=100, min_periods=1)
        assert_almost_equal(b, np.ones(len(a)))

        self.assertRaises(ValueError, mom.rolling_min, np.array([1,
                          2, 3]), window=3, min_periods=5)

    def test_rolling_max(self):
        self._check_moment_func(mom.rolling_max, np.max)

        a = np.array([1, 2, 3, 4, 5])
        b = mom.rolling_max(a, window=100, min_periods=1)
        assert_almost_equal(a, b)

        self.assertRaises(ValueError, mom.rolling_max, np.array([1,
                          2, 3]), window=3, min_periods=5)

    def test_rolling_quantile(self):
        qs = [.1, .5, .9]

        def scoreatpercentile(a, per):
            values = np.sort(a, axis=0)

            idx = per / 1. * (values.shape[0] - 1)
            return values[int(idx)]

        for q in qs:
            def f(x, window, min_periods=None, freq=None, center=False):
                return mom.rolling_quantile(x, window, q,
                                            min_periods=min_periods,
                                            freq=freq,
                                            center=center)

            def alt(x):
                return scoreatpercentile(x, q)

            self._check_moment_func(f, alt)

    def test_rolling_apply(self):
        ser = Series([])
        assert_series_equal(
            ser, mom.rolling_apply(ser, 10, lambda x: x.mean()))

        def roll_mean(x, window, min_periods=None, freq=None, center=False):
            return mom.rolling_apply(x, window,
                                     lambda x: x[np.isfinite(x)].mean(),
                                     min_periods=min_periods,
                                     freq=freq,
                                     center=center)
        self._check_moment_func(roll_mean, np.mean)

    def test_rolling_apply_out_of_bounds(self):
        # #1850
        arr = np.arange(4)

        # it works!
        result = mom.rolling_apply(arr, 10, np.sum)
        self.assertTrue(isnull(result).all())

        result = mom.rolling_apply(arr, 10, np.sum, min_periods=1)
        assert_almost_equal(result, result)

    def test_rolling_std(self):
        self._check_moment_func(mom.rolling_std,
                                lambda x: np.std(x, ddof=1))
        self._check_moment_func(functools.partial(mom.rolling_std, ddof=0),
                                lambda x: np.std(x, ddof=0))

    def test_rolling_std_1obs(self):
        result = mom.rolling_std(np.array([1., 2., 3., 4., 5.]),
                                 1, min_periods=1)
        expected = np.zeros(5)

        assert_almost_equal(result, expected)

        result = mom.rolling_std(np.array([np.nan, np.nan, 3., 4., 5.]),
                                 3, min_periods=2)
        self.assertTrue(np.isnan(result[2]))

    def test_rolling_std_neg_sqrt(self):
        # unit test from Bottleneck

        # Test move_nanstd for neg sqrt.

        a = np.array([0.0011448196318903589,
                      0.00028718669878572767,
                      0.00028718669878572767,
                      0.00028718669878572767,
                      0.00028718669878572767])
        b = mom.rolling_std(a, window=3)
        self.assertTrue(np.isfinite(b[2:]).all())

        b = mom.ewmstd(a, span=3)
        self.assertTrue(np.isfinite(b[2:]).all())

    def test_rolling_var(self):
        self._check_moment_func(mom.rolling_var,
                                lambda x: np.var(x, ddof=1),
                                test_stable=True)
        self._check_moment_func(functools.partial(mom.rolling_var, ddof=0),
                                lambda x: np.var(x, ddof=0))

    def test_rolling_skew(self):
        try:
            from scipy.stats import skew
        except ImportError:
            raise nose.SkipTest('no scipy')
        self._check_moment_func(mom.rolling_skew,
                                lambda x: skew(x, bias=False))

    def test_rolling_kurt(self):
        try:
            from scipy.stats import kurtosis
        except ImportError:
            raise nose.SkipTest('no scipy')
        self._check_moment_func(mom.rolling_kurt,
                                lambda x: kurtosis(x, bias=False))

    def test_fperr_robustness(self):
        # TODO: remove this once python 2.5 out of picture
        if PY3:
            raise nose.SkipTest("doesn't work on python 3")

        # #2114
        data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f<UUUUUU\x13@q\x1c\xc7q\x1c\xc7\xf9?\xf6\x12\xdaKh/\xe1?\xf2\xc3"e\xe0\xe9\xc6?\xed\xaf\x831+\x8d\xae?\xf3\x1f\xad\xcb\x1c^\x94?\x15\x1e\xdd\xbd>\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7<pj\xa0>m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>'

        arr = np.frombuffer(data, dtype='<f8')
        if sys.byteorder != "little":
            arr = arr.byteswap().newbyteorder()

        result = mom.rolling_sum(arr, 2)
        self.assertTrue((result[1:] >= 0).all())

        result = mom.rolling_mean(arr, 2)
        self.assertTrue((result[1:] >= 0).all())

        result = mom.rolling_var(arr, 2)
        self.assertTrue((result[1:] >= 0).all())

        # #2527, ugh
        arr = np.array([0.00012456, 0.0003, 0])
        result = mom.rolling_mean(arr, 1)
        self.assertTrue(result[-1] >= 0)

        result = mom.rolling_mean(-arr, 1)
        self.assertTrue(result[-1] <= 0)

    def _check_moment_func(self, func, static_comp, window=50,
                           has_min_periods=True,
                           has_center=True,
                           has_time_rule=True,
                           preserve_nan=True,
                           fill_value=None,
                           test_stable=False):

        self._check_ndarray(func, static_comp, window=window,
                            has_min_periods=has_min_periods,
                            preserve_nan=preserve_nan,
                            has_center=has_center,
                            fill_value=fill_value,
                            test_stable=test_stable)

        self._check_structures(func, static_comp,
                               has_min_periods=has_min_periods,
                               has_time_rule=has_time_rule,
                               fill_value=fill_value,
                               has_center=has_center)

    def _check_ndarray(self, func, static_comp, window=50,
                       has_min_periods=True,
                       preserve_nan=True,
                       has_center=True,
                       fill_value=None,
                       test_stable=False,
                       test_window=True):

        result = func(self.arr, window)
        assert_almost_equal(result[-1],
                            static_comp(self.arr[-50:]))

        if preserve_nan:
            assert(np.isnan(result[self._nan_locs]).all())

        # excluding NaNs correctly
        arr = randn(50)
        arr[:10] = np.NaN
        arr[-10:] = np.NaN

        if has_min_periods:
            result = func(arr, 50, min_periods=30)
            assert_almost_equal(result[-1], static_comp(arr[10:-10]))

            # min_periods is working correctly
            result = func(arr, 20, min_periods=15)
            self.assertTrue(np.isnan(result[23]))
            self.assertFalse(np.isnan(result[24]))

            self.assertFalse(np.isnan(result[-6]))
            self.assertTrue(np.isnan(result[-5]))

            arr2 = randn(20)
            result = func(arr2, 10, min_periods=5)
            self.assertTrue(isnull(result[3]))
            self.assertTrue(notnull(result[4]))

            # min_periods=0
            result0 = func(arr, 20, min_periods=0)
            result1 = func(arr, 20, min_periods=1)
            assert_almost_equal(result0, result1)
        else:
            result = func(arr, 50)
            assert_almost_equal(result[-1], static_comp(arr[10:-10]))

        if has_center:
            if has_min_periods:
                result = func(arr, 20, min_periods=15, center=True)
                expected = func(arr, 20, min_periods=15)
            else:
                result = func(arr, 20, center=True)
                expected = func(arr, 20)

            assert_almost_equal(result[1], expected[10])
            if fill_value is None:
                self.assertTrue(np.isnan(result[-9:]).all())
            else:
                self.assertTrue((result[-9:] == 0).all())
            if has_min_periods:
                self.assertTrue(np.isnan(expected[23]))
                self.assertTrue(np.isnan(result[14]))
                self.assertTrue(np.isnan(expected[-5]))
                self.assertTrue(np.isnan(result[-14]))

        if test_stable:
            result = func(self.arr + 1e9, window)
            assert_almost_equal(result[-1],
                                static_comp(self.arr[-50:] + 1e9))

        # Test window larger than array, #7297
        if test_window:
            if has_min_periods:
                for minp in (0, len(self.arr)-1, len(self.arr)):
                    result = func(self.arr, len(self.arr)+1, min_periods=minp)
                    expected = func(self.arr, len(self.arr), min_periods=minp)
                    nan_mask = np.isnan(result)
                    self.assertTrue(np.array_equal(nan_mask,
                                                   np.isnan(expected)))
                    nan_mask = ~nan_mask
                    assert_almost_equal(result[nan_mask], expected[nan_mask])
            else:
                result = func(self.arr, len(self.arr)+1)
                expected = func(self.arr, len(self.arr))
                nan_mask = np.isnan(result)
                self.assertTrue(np.array_equal(nan_mask, np.isnan(expected)))
                nan_mask = ~nan_mask
                assert_almost_equal(result[nan_mask], expected[nan_mask])




    def _check_structures(self, func, static_comp,
                          has_min_periods=True, has_time_rule=True,
                          has_center=True,
                          fill_value=None):

        series_result = func(self.series, 50)
        tm.assert_isinstance(series_result, Series)

        frame_result = func(self.frame, 50)
        self.assertEqual(type(frame_result), DataFrame)

        # check time_rule works
        if has_time_rule:
            win = 25
            minp = 10

            if has_min_periods:
                series_result = func(self.series[::2], win, min_periods=minp,
                                     freq='B')
                frame_result = func(self.frame[::2], win, min_periods=minp,
                                    freq='B')
            else:
                series_result = func(self.series[::2], win, freq='B')
                frame_result = func(self.frame[::2], win, freq='B')

            last_date = series_result.index[-1]
            prev_date = last_date - 24 * datetools.bday

            trunc_series = self.series[::2].truncate(prev_date, last_date)
            trunc_frame = self.frame[::2].truncate(prev_date, last_date)

            assert_almost_equal(series_result[-1], static_comp(trunc_series))

            assert_almost_equal(frame_result.xs(last_date),
                                trunc_frame.apply(static_comp))

        if has_center:
            if has_min_periods:
                minp = 10
                series_xp = func(self.series, 25, min_periods=minp).shift(-12)
                frame_xp = func(self.frame, 25, min_periods=minp).shift(-12)

                series_rs = func(self.series, 25, min_periods=minp,
                                 center=True)
                frame_rs = func(self.frame, 25, min_periods=minp,
                                center=True)

            else:
                series_xp = func(self.series, 25).shift(-12)
                frame_xp = func(self.frame, 25).shift(-12)

                series_rs = func(self.series, 25, center=True)
                frame_rs = func(self.frame, 25, center=True)

            if fill_value is not None:
                series_xp = series_xp.fillna(fill_value)
                frame_xp = frame_xp.fillna(fill_value)
            assert_series_equal(series_xp, series_rs)
            assert_frame_equal(frame_xp, frame_rs)

    def test_ewma(self):
        self._check_ew(mom.ewma)

        arr = np.zeros(1000)
        arr[5] = 1
        result = mom.ewma(arr, span=100, adjust=False).sum()
        self.assertTrue(np.abs(result - 1) < 1e-2)

        s = Series([1.0, 2.0, 4.0, 8.0])
        
        expected = Series([1.0, 1.6, 2.736842, 4.923077])
        for f in [lambda s: mom.ewma(s, com=2.0, adjust=True),
                  lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=False),
                  lambda s: mom.ewma(s, com=2.0, adjust=True, ignore_na=True),
                 ]:
            result = f(s)
            assert_series_equal(result, expected)

        expected = Series([1.0, 1.333333, 2.222222, 4.148148])
        for f in [lambda s: mom.ewma(s, com=2.0, adjust=False),
                  lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=False),
                  lambda s: mom.ewma(s, com=2.0, adjust=False, ignore_na=True),
                 ]:
            result = f(s)
            assert_series_equal(result, expected)

    def test_ewma_nan_handling(self):
        s = Series([1.] + [np.nan] * 5 + [1.])
        result = mom.ewma(s, com=5)
        assert_almost_equal(result, [1.] * len(s))

        s = Series([np.nan] * 2 + [1.] + [np.nan] * 2 + [1.])
        result = mom.ewma(s, com=5)
        assert_almost_equal(result, [np.nan] * 2 + [1.] * 4)

        # GH 7603
        s0 = Series([np.nan, 1., 101.])
        s1 = Series([1., np.nan, 101.])
        s2 = Series([np.nan, 1., np.nan, np.nan, 101., np.nan])
        com = 2.
        alpha = 1. / (1. + com)

        def simple_wma(s, w):
            return (s.multiply(w).cumsum() / w.cumsum()).fillna(method='ffill')

        for (s, adjust, ignore_na, w) in [
                (s0, True, False, [np.nan, (1.0 - alpha), 1.]),
                (s0, True, True, [np.nan, (1.0 - alpha), 1.]),
                (s0, False, False, [np.nan, (1.0 - alpha), alpha]),
                (s0, False, True, [np.nan, (1.0 - alpha), alpha]),
                (s1, True, False, [(1.0 - alpha)**2, np.nan, 1.]),
                (s1, True, True, [(1.0 - alpha), np.nan, 1.]),
                (s1, False, False, [(1.0 - alpha)**2, np.nan, alpha]),
                (s1, False, True, [(1.0 - alpha), np.nan, alpha]),
                (s2, True, False, [np.nan, (1.0 - alpha)**3, np.nan, np.nan, 1., np.nan]),
                (s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1., np.nan]),
                (s2, False, False, [np.nan, (1.0 - alpha)**3, np.nan, np.nan, alpha, np.nan]),
                (s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]),
                ]:
            expected = simple_wma(s, Series(w))
            result = mom.ewma(s, com=com, adjust=adjust, ignore_na=ignore_na)
            assert_series_equal(result, expected)
            if ignore_na is False:
                # check that ignore_na defaults to False
                result = mom.ewma(s, com=com, adjust=adjust)
                assert_series_equal(result, expected)

    def test_ewmvar(self):
        self._check_ew(mom.ewmvar)

    def test_ewmvol(self):
        self._check_ew(mom.ewmvol)

    def test_ewma_span_com_args(self):
        A = mom.ewma(self.arr, com=9.5)
        B = mom.ewma(self.arr, span=20)
        assert_almost_equal(A, B)

        self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20)
        self.assertRaises(Exception, mom.ewma, self.arr)

    def test_ewma_halflife_arg(self):
        A = mom.ewma(self.arr, com=13.932726172912965)
        B = mom.ewma(self.arr, halflife=10.0)
        assert_almost_equal(A, B)

        self.assertRaises(Exception, mom.ewma, self.arr, span=20, halflife=50)
        self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, halflife=50)
        self.assertRaises(Exception, mom.ewma, self.arr, com=9.5, span=20, halflife=50)
        self.assertRaises(Exception, mom.ewma, self.arr)

    def test_ew_empty_arrays(self):
        arr = np.array([], dtype=np.float64)

        funcs = [mom.ewma, mom.ewmvol, mom.ewmvar]
        for f in funcs:
            result = f(arr, 3)
            assert_almost_equal(result, arr)

    def _check_ew(self, func):
        self._check_ew_ndarray(func)
        self._check_ew_structures(func)

    def _check_ew_ndarray(self, func, preserve_nan=False):
        result = func(self.arr, com=10)
        if preserve_nan:
            assert(np.isnan(result[self._nan_locs]).all())

        # excluding NaNs correctly
        arr = randn(50)
        arr[:10] = np.NaN
        arr[-10:] = np.NaN

        # ??? check something

        # pass in ints
        result2 = func(np.arange(50), span=10)
        self.assertEqual(result2.dtype, np.float_)

    def _check_ew_structures(self, func):
        series_result = func(self.series, com=10)
        tm.assert_isinstance(series_result, Series)
        frame_result = func(self.frame, com=10)
        self.assertEqual(type(frame_result), DataFrame)

    # binary moments
    def test_rolling_cov(self):
        A = self.series
        B = A + randn(len(A))

        result = mom.rolling_cov(A, B, 50, min_periods=25)
        assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1])

    def test_rolling_cov_pairwise(self):
        self._check_pairwise_moment(mom.rolling_cov, 10, min_periods=5)

    def test_rolling_corr(self):
        A = self.series
        B = A + randn(len(A))

        result = mom.rolling_corr(A, B, 50, min_periods=25)
        assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])

        # test for correct bias correction
        a = tm.makeTimeSeries()
        b = tm.makeTimeSeries()
        a[:5] = np.nan
        b[:10] = np.nan

        result = mom.rolling_corr(a, b, len(a), min_periods=1)
        assert_almost_equal(result[-1], a.corr(b))

    def test_rolling_corr_pairwise(self):
        self._check_pairwise_moment(mom.rolling_corr, 10, min_periods=5)

    def _check_pairwise_moment(self, func, *args, **kwargs):
        panel = func(self.frame, *args, **kwargs)

        actual = panel.ix[:, 1, 5]
        expected = func(self.frame[1], self.frame[5], *args, **kwargs)
        tm.assert_series_equal(actual, expected)

    def test_flex_binary_moment(self):
        # GH3155
        # don't blow the stack
        self.assertRaises(TypeError, mom._flex_binary_moment,5,6,None)

    def test_corr_sanity(self):
        #GH 3155
        df = DataFrame(
            np.array(
                    [[ 0.87024726,  0.18505595],
                      [ 0.64355431,  0.3091617 ],
                      [ 0.92372966,  0.50552513],
                      [ 0.00203756,  0.04520709],
                      [ 0.84780328,  0.33394331],
                      [ 0.78369152,  0.63919667]])
            )

        res = mom.rolling_corr(df[0],df[1],5,center=True)
        self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res]))

        # and some fuzzing
        for i in range(10):
            df = DataFrame(np.random.rand(30,2))
            res = mom.rolling_corr(df[0],df[1],5,center=True)
            try:
                self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res]))
            except:
                print(res)


    def test_flex_binary_frame(self):
        def _check(method):
            series = self.frame[1]

            res = method(series, self.frame, 10)
            res2 = method(self.frame, series, 10)
            exp = self.frame.apply(lambda x: method(series, x, 10))

            tm.assert_frame_equal(res, exp)
            tm.assert_frame_equal(res2, exp)

            frame2 = self.frame.copy()
            frame2.values[:] = np.random.randn(*frame2.shape)

            res3 = method(self.frame, frame2, 10)
            exp = DataFrame(dict((k, method(self.frame[k], frame2[k], 10))
                                 for k in self.frame))
            tm.assert_frame_equal(res3, exp)

        methods = [mom.rolling_corr, mom.rolling_cov]
        for meth in methods:
            _check(meth)

    def test_ewmcov(self):
        self._check_binary_ew(mom.ewmcov)

    def test_ewmcov_pairwise(self):
        self._check_pairwise_moment(mom.ewmcov, span=10, min_periods=5)

    def test_ewmcorr(self):
        self._check_binary_ew(mom.ewmcorr)

    def test_ewmcorr_pairwise(self):
        self._check_pairwise_moment(mom.ewmcorr, span=10, min_periods=5)

    def _check_binary_ew(self, func):
        A = Series(randn(50), index=np.arange(50))
        B = A[2:] + randn(48)

        A[:10] = np.NaN
        B[-10:] = np.NaN

        result = func(A, B, 20, min_periods=5)

        self.assertTrue(np.isnan(result.values[:15]).all())
        self.assertFalse(np.isnan(result.values[15:]).any())

        self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5)

    def test_expanding_apply(self):
        ser = Series([])
        assert_series_equal(ser, mom.expanding_apply(ser, lambda x: x.mean()))

        def expanding_mean(x, min_periods=1, freq=None):
            return mom.expanding_apply(x,
                                       lambda x: x.mean(),
                                       min_periods=min_periods,
                                       freq=freq)
        self._check_expanding(expanding_mean, np.mean)

    def test_expanding_apply_args_kwargs(self):
        def mean_w_arg(x, const):
            return np.mean(x) + const

        df = DataFrame(np.random.rand(20, 3))

        expected = mom.expanding_apply(df, np.mean) + 20.

        assert_frame_equal(mom.expanding_apply(df, mean_w_arg, args=(20,)),
                            expected)
        assert_frame_equal(mom.expanding_apply(df, mean_w_arg,
                                               kwargs={'const' : 20}),
                            expected)


    def test_expanding_corr(self):
        A = self.series.dropna()
        B = (A + randn(len(A)))[:-5]

        result = mom.expanding_corr(A, B)

        rolling_result = mom.rolling_corr(A, B, len(A), min_periods=1)

        assert_almost_equal(rolling_result, result)

    def test_expanding_count(self):
        result = mom.expanding_count(self.series)
        assert_almost_equal(result, mom.rolling_count(self.series,
                                                      len(self.series)))

    def test_expanding_quantile(self):
        result = mom.expanding_quantile(self.series, 0.5)

        rolling_result = mom.rolling_quantile(self.series,
                                              len(self.series),
                                              0.5, min_periods=1)

        assert_almost_equal(result, rolling_result)

    def test_expanding_cov(self):
        A = self.series
        B = (A + randn(len(A)))[:-5]

        result = mom.expanding_cov(A, B)

        rolling_result = mom.rolling_cov(A, B, len(A), min_periods=1)

        assert_almost_equal(rolling_result, result)

    def test_expanding_max(self):
        self._check_expanding(mom.expanding_max, np.max, preserve_nan=False)

    def test_expanding_cov_pairwise(self):
        result = mom.expanding_cov(self.frame)

        rolling_result = mom.rolling_cov(self.frame, len(self.frame),
                                         min_periods=1)

        for i in result.items:
            assert_almost_equal(result[i], rolling_result[i])

    def test_expanding_corr_pairwise(self):
        result = mom.expanding_corr(self.frame)

        rolling_result = mom.rolling_corr(self.frame, len(self.frame),
                                          min_periods=1)

        for i in result.items:
            assert_almost_equal(result[i], rolling_result[i])

    def test_expanding_cov_diff_index(self):
        # GH 7512
        s1 = Series([1, 2, 3], index=[0, 1, 2])
        s2 = Series([1, 3], index=[0, 2])
        result = mom.expanding_cov(s1, s2)
        expected = Series([None, None, 2.0])
        assert_series_equal(result, expected)

        s2a = Series([1, None, 3], index=[0, 1, 2])
        result = mom.expanding_cov(s1, s2a)
        assert_series_equal(result, expected)

        s1 = Series([7, 8, 10], index=[0, 1, 3])
        s2 = Series([7, 9, 10], index=[0, 2, 3])
        result = mom.expanding_cov(s1, s2)
        expected = Series([None, None, None, 4.5])
        assert_series_equal(result, expected)

    def test_expanding_corr_diff_index(self):
        # GH 7512
        s1 = Series([1, 2, 3], index=[0, 1, 2])
        s2 = Series([1, 3], index=[0, 2])
        result = mom.expanding_corr(s1, s2)
        expected = Series([None, None, 1.0])
        assert_series_equal(result, expected)

        s2a = Series([1, None, 3], index=[0, 1, 2])
        result = mom.expanding_corr(s1, s2a)
        assert_series_equal(result, expected)

        s1 = Series([7, 8, 10], index=[0, 1, 3])
        s2 = Series([7, 9, 10], index=[0, 2, 3])
        result = mom.expanding_corr(s1, s2)
        expected = Series([None, None, None, 1.])
        assert_series_equal(result, expected)

    def test_rolling_cov_diff_length(self):
        # GH 7512
        s1 = Series([1, 2, 3], index=[0, 1, 2])
        s2 = Series([1, 3], index=[0, 2])
        result = mom.rolling_cov(s1, s2, window=3, min_periods=2)
        expected = Series([None, None, 2.0])
        assert_series_equal(result, expected)

        s2a = Series([1, None, 3], index=[0, 1, 2])
        result = mom.rolling_cov(s1, s2a, window=3, min_periods=2)
        assert_series_equal(result, expected)

    def test_rolling_corr_diff_length(self):
        # GH 7512
        s1 = Series([1, 2, 3], index=[0, 1, 2])
        s2 = Series([1, 3], index=[0, 2])
        result = mom.rolling_corr(s1, s2, window=3, min_periods=2)
        expected = Series([None, None, 1.0])
        assert_series_equal(result, expected)

        s2a = Series([1, None, 3], index=[0, 1, 2])
        result = mom.rolling_corr(s1, s2a, window=3, min_periods=2)
        assert_series_equal(result, expected)

    def test_rolling_functions_window_non_shrinkage(self):
        # GH 7764
        s = Series(range(4))
        s_expected = Series(np.nan, index=s.index)
        df = DataFrame([[1,5], [3, 2], [3,9], [-1,0]], columns=['A','B'])
        df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)
        df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns)

        functions = [lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5),
                     lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5),
                     lambda x: mom.rolling_max(x, window=10, min_periods=5),
                     lambda x: mom.rolling_min(x, window=10, min_periods=5),
                     lambda x: mom.rolling_sum(x, window=10, min_periods=5),
                     lambda x: mom.rolling_mean(x, window=10, min_periods=5),
                     lambda x: mom.rolling_std(x, window=10, min_periods=5),
                     lambda x: mom.rolling_var(x, window=10, min_periods=5),
                     lambda x: mom.rolling_skew(x, window=10, min_periods=5),
                     lambda x: mom.rolling_kurt(x, window=10, min_periods=5),
                     lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5),
                     lambda x: mom.rolling_median(x, window=10, min_periods=5),
                     lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5),
                     lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5),
                    ]
        for f in functions:
            try:
                s_result = f(s)
                assert_series_equal(s_result, s_expected)

                df_result = f(df)
                assert_frame_equal(df_result, df_expected)
            except (ImportError):

                # scipy needed for rolling_window
                continue

        functions = [lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5),
                     lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5),
                     # rolling_corr_pairwise is depracated, so the following line should be deleted
                     # when rolling_corr_pairwise is removed.
                     lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5),
                    ]
        for f in functions:
            df_result_panel = f(df)
            assert_panel_equal(df_result_panel, df_expected_panel)

    def test_expanding_cov_pairwise_diff_length(self):
        # GH 7512
        df1 = DataFrame([[1,5], [3, 2], [3,9]], columns=['A','B'])
        df1a = DataFrame([[1,5], [3,9]], index=[0,2], columns=['A','B'])
        df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y'])
        df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y'])
        result1 = mom.expanding_cov(df1, df2, pairwise=True)[2]
        result2 = mom.expanding_cov(df1, df2a, pairwise=True)[2]
        result3 = mom.expanding_cov(df1a, df2, pairwise=True)[2]
        result4 = mom.expanding_cov(df1a, df2a, pairwise=True)[2]
        expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A','B'], columns=['X','Y'])
        assert_frame_equal(result1, expected)
        assert_frame_equal(result2, expected)
        assert_frame_equal(result3, expected)
        assert_frame_equal(result4, expected)

    def test_expanding_corr_pairwise_diff_length(self):
        # GH 7512
        df1 = DataFrame([[1,2], [3, 2], [3,4]], columns=['A','B'])
        df1a = DataFrame([[1,2], [3,4]], index=[0,2], columns=['A','B'])
        df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y'])
        df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y'])
        result1 = mom.expanding_corr(df1, df2, pairwise=True)[2]
        result2 = mom.expanding_corr(df1, df2a, pairwise=True)[2]
        result3 = mom.expanding_corr(df1a, df2, pairwise=True)[2]
        result4 = mom.expanding_corr(df1a, df2a, pairwise=True)[2]
        expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A','B'], columns=['X','Y'])
        assert_frame_equal(result1, expected)
        assert_frame_equal(result2, expected)
        assert_frame_equal(result3, expected)
        assert_frame_equal(result4, expected)

    def test_rolling_skew_edge_cases(self):

        all_nan = Series([np.NaN] * 5)

        # yields all NaN (0 variance)
        d = Series([1] * 5)
        x = mom.rolling_skew(d, window=5)
        assert_series_equal(all_nan, x)

        # yields all NaN (window too small)
        d = Series(np.random.randn(5))
        x = mom.rolling_skew(d, window=2)
        assert_series_equal(all_nan, x)

        # yields [NaN, NaN, NaN, 0.177994, 1.548824]
        d = Series([-1.50837035, -0.1297039 ,  0.19501095,
                       1.73508164,  0.41941401])
        expected = Series([np.NaN, np.NaN, np.NaN,
                              0.177994, 1.548824])
        x = mom.rolling_skew(d, window=4)
        assert_series_equal(expected, x)

    def test_rolling_kurt_edge_cases(self):

        all_nan = Series([np.NaN] * 5)

        # yields all NaN (0 variance)
        d = Series([1] * 5)
        x = mom.rolling_kurt(d, window=5)
        assert_series_equal(all_nan, x)

        # yields all NaN (window too small)
        d = Series(np.random.randn(5))
        x = mom.rolling_kurt(d, window=3)
        assert_series_equal(all_nan, x)

        # yields [NaN, NaN, NaN, 1.224307, 2.671499]
        d = Series([-1.50837035, -0.1297039 ,  0.19501095,
                    1.73508164,  0.41941401])
        expected = Series([np.NaN, np.NaN, np.NaN,
                           1.224307, 2.671499])
        x = mom.rolling_kurt(d, window=4)
        assert_series_equal(expected, x)

    def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True,
                                 has_time_rule=True, preserve_nan=True):
        result = func(self.arr)

        assert_almost_equal(result[10],
                            static_comp(self.arr[:11]))

        if preserve_nan:
            assert(np.isnan(result[self._nan_locs]).all())

        arr = randn(50)

        if has_min_periods:
            result = func(arr, min_periods=30)
            assert(np.isnan(result[:29]).all())
            assert_almost_equal(result[-1], static_comp(arr[:50]))

            # min_periods is working correctly
            result = func(arr, min_periods=15)
            self.assertTrue(np.isnan(result[13]))
            self.assertFalse(np.isnan(result[14]))

            arr2 = randn(20)
            result = func(arr2, min_periods=5)
            self.assertTrue(isnull(result[3]))
            self.assertTrue(notnull(result[4]))

            # min_periods=0
            result0 = func(arr, min_periods=0)
            result1 = func(arr, min_periods=1)
            assert_almost_equal(result0, result1)
        else:
            result = func(arr)
            assert_almost_equal(result[-1], static_comp(arr[:50]))

    def _check_expanding_structures(self, func):
        series_result = func(self.series)
        tm.assert_isinstance(series_result, Series)
        frame_result = func(self.frame)
        self.assertEqual(type(frame_result), DataFrame)

    def _check_expanding(self, func, static_comp, has_min_periods=True,
                         has_time_rule=True,
                         preserve_nan=True):
        self._check_expanding_ndarray(func, static_comp,
                                      has_min_periods=has_min_periods,
                                      has_time_rule=has_time_rule,
                                      preserve_nan=preserve_nan)
        self._check_expanding_structures(func)

    def test_rolling_max_gh6297(self):
        """Replicate result expected in GH #6297"""

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 2 datapoints on one of the days
        indices.append(datetime(1975, 1, 3, 6, 0))
        series = Series(range(1, 7), index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        expected = Series([1.0, 2.0, 6.0, 4.0, 5.0],
                          index=[datetime(1975, 1, i, 0)
                                 for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D')
        assert_series_equal(expected, x)

    def test_rolling_max_how_resample(self):

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 3 datapoints on last day (4, 10, and 20)
        indices.append(datetime(1975, 1, 5, 1))
        indices.append(datetime(1975, 1, 5, 2))
        series = Series(list(range(0, 5)) + [10, 20], index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        # Default how should be max
        expected = Series([0.0, 1.0, 2.0, 3.0, 20.0],
                          index=[datetime(1975, 1, i, 0)
                                 for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D')
        assert_series_equal(expected, x)

        # Now specify median (10.0)
        expected = Series([0.0, 1.0, 2.0, 3.0, 10.0],
                          index=[datetime(1975, 1, i, 0)
                                 for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D', how='median')
        assert_series_equal(expected, x)

        # Now specify mean (4+10+20)/3
        v = (4.0+10.0+20.0)/3.0
        expected = Series([0.0, 1.0, 2.0, 3.0, v],
                          index=[datetime(1975, 1, i, 0)
                                 for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D', how='mean')
        assert_series_equal(expected, x)


    def test_rolling_min_how_resample(self):

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 3 datapoints on last day (4, 10, and 20)
        indices.append(datetime(1975, 1, 5, 1))
        indices.append(datetime(1975, 1, 5, 2))
        series = Series(list(range(0, 5)) + [10, 20], index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        # Default how should be min
        expected = Series([0.0, 1.0, 2.0, 3.0, 4.0],
                          index=[datetime(1975, 1, i, 0)
                                 for i in range(1, 6)])
        x = mom.rolling_min(series, window=1, freq='D')
        assert_series_equal(expected, x)

    def test_rolling_median_how_resample(self):

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 3 datapoints on last day (4, 10, and 20)
        indices.append(datetime(1975, 1, 5, 1))
        indices.append(datetime(1975, 1, 5, 2))
        series = Series(list(range(0, 5)) + [10, 20], index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        # Default how should be median
        expected = Series([0.0, 1.0, 2.0, 3.0, 10],
                          index=[datetime(1975, 1, i, 0)
                                 for i in range(1, 6)])
        x = mom.rolling_median(series, window=1, freq='D')
        assert_series_equal(expected, x)
Example #59
0
 def test_dropna(self):
     # GH 13737
     s = Series([pd.Period('2011-01', freq='M'),
                 pd.Period('NaT', freq='M')])
     tm.assert_series_equal(s.dropna(),
                            Series([pd.Period('2011-01', freq='M')]))
class MySeries:
    def __init__(self, *args, **kwargs):
        self.x = Series(*args, **kwargs)
        self.values = self.x.values
        self.index = self.x.index
    
    def rolling_mean(self, *args, **kwargs):
        return MySeries(pd.rolling_mean(self.x, *args, **kwargs))

    def rolling_count(self, *args, **kwargs):
        return MySeries(pd.rolling_count(self.x, *args, **kwargs))

    def rolling_sum(self, *args, **kwargs):
        return MySeries(pd.rolling_sum(self.x, *args, **kwargs))

    def rolling_median(self, *args, **kwargs):
        return MySeries(pd.rolling_median(self.x, *args, **kwargs))
        
    def rolling_min(self, *args, **kwargs):
        return MySeries(pd.rolling_min(self.x, *args, **kwargs))

    def rolling_max(self, *args, **kwargs):
        return MySeries(pd.rolling_max(self.x, *args, **kwargs))

    def rolling_std(self, *args, **kwargs):
        return MySeries(pd.rolling_std(self.x, *args, **kwargs))

    def rolling_var(self, *args, **kwargs):
        return MySeries(pd.rolling_var(self.x, *args, **kwargs))

    def rolling_skew(self, *args, **kwargs):
        return MySeries(pd.rolling_skew(self.x, *args, **kwargs))

    def rolling_kurtosis(self, *args, **kwargs):
        return MySeries(pd.rolling_kurtosis(self.x, *args, **kwargs))

    def rolling_window(self, *args, **kwargs):
        return MySeries(pd.rolling_window(self.x, *args, **kwargs))

    def cumprod(self, *args, **kwargs):
        return MySeries(self.x.cumprod(*args, **kwargs))

    def cumsum(self, *args, **kwargs):
        return MySeries(self.x.cumsum(*args, **kwargs))

    def diff(self, *args, **kwargs):
        return MySeries(self.x.diff(*args, **kwargs))

    def div(self, *args, **kwargs):
        return MySeries(self.x.div(*args, **kwargs))

    def mul(self, *args, **kwargs):
        return MySeries(self.x.mul(*args, **kwargs))

    def add(self, *args, **kwargs):
        return MySeries(self.x.add(*args, **kwargs))

    def dropna(self, *args, **kwargs):
        return MySeries(self.x.dropna(*args, **kwargs))
    
    def fillna(self, *args, **kwargs):
        return MySeries(self.x.fillna(*args, **kwargs))

    def floordiv(self, *args, **kwargs):
        return MySeries(self.x.floordiv(*args, **kwargs))

    def mod(self, *args, **kwargs):
        return MySeries(self.x.mod(*args, **kwargs))

    def nlargest(self, *args, **kwargs):
        return MySeries(self.x.nlargest(*args, **kwargs))

    def nonzero(self, *args, **kwargs):
        return MySeries(self.x.nonzero(*args, **kwargs))

    def nsmallest(self, *args, **kwargs):
        return MySeries(self.x.nsmallest(*args, **kwargs))

    def pow(self, *args, **kwargs):
        return MySeries(self.x.pow(*args, **kwargs))

    def rank(self, *args, **kwargs):
        return MySeries(self.x.rank(*args, **kwargs))

    def round(self, *args, **kwargs):
        return MySeries(self.x.round(*args, **kwargs))

    def shift(self, *args, **kwargs):
        return MySeries(self.x.shift(*args, **kwargs))

    def sub(self, *args, **kwargs):
        return MySeries(self.x.sub(*args, **kwargs))

    def abs(self, *args, **kwargs):
        return MySeries(self.x.abs(*args, **kwargs))

    def clip(self, *args, **kwargs):
        return MySeries(self.x.clip(*args, **kwargs))

    def clip_lower(self, *args, **kwargs):
        return MySeries(self.x.clip_lower(*args, **kwargs))

    def clip_upper(self, *args, **kwargs):
        return MySeries(self.x.clip_upper(*args, **kwargs))
    
    def interpolate(self, *args, **kwargs):
        return MySeries(self.x.interpolate(*args, **kwargs))

    def resample(self, *args, **kwargs):
        return MySeries(self.x.resample(*args, **kwargs))
        
    def replace(self, *args, **kwargs):
        return MySeries(self.x.replace(*args, **kwargs))