Beispiel #1
0
def brar(open_,
         high,
         low,
         close,
         length=None,
         scalar=None,
         drift=None,
         offset=None,
         **kwargs):
    """Indicator: BRAR (BRAR)"""
    # Validate Arguments
    open_ = verify_series(open_)
    high = verify_series(high)
    low = verify_series(low)
    close = verify_series(close)
    length = int(length) if length and length > 0 else 26
    scalar = float(scalar) if scalar else 100
    high_open_range = non_zero_range(high, open_)
    open_low_range = non_zero_range(open_, low)
    drift = get_drift(drift)
    offset = get_offset(offset)

    # Calculate Result
    hcy = non_zero_range(high, close.shift(drift))
    cyl = non_zero_range(close.shift(drift), low)

    hcy[hcy < 0] = 0  # Zero negative values
    cyl[cyl < 0] = 0  # ""

    ar = scalar * high_open_range.rolling(length).sum()
    ar /= open_low_range.rolling(length).sum()

    br = scalar * hcy.rolling(length).sum()
    br /= cyl.rolling(length).sum()

    # Offset
    if offset != 0:
        ar = ar.shift(offset)
        br = ar.shift(offset)

    # Handle fills
    if 'fillna' in kwargs:
        ar.fillna(kwargs['fillna'], inplace=True)
        br.fillna(kwargs['fillna'], inplace=True)
    if 'fill_method' in kwargs:
        ar.fillna(method=kwargs['fill_method'], inplace=True)
        br.fillna(method=kwargs['fill_method'], inplace=True)

    # Name and Categorize it
    _props = f"_{length}"
    ar.name = f"AR{_props}"
    br.name = f"BR{_props}"
    ar.category = br.category = 'momentum'

    # Prepare DataFrame to return
    brardf = DataFrame({ar.name: ar, br.name: br})
    brardf.name = f"BRAR{_props}"
    brardf.category = 'momentum'

    return brardf
Beispiel #2
0
def test_cythonized_aggers(op_name):
    data = {
        "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan],
        "B": ["A", "B"] * 6,
        "C": np.random.randn(12),
    }
    df = DataFrame(data)
    df.loc[2:10:2, "C"] = np.nan

    op = lambda x: getattr(x, op_name)()

    # single column
    grouped = df.drop(["B"], axis=1).groupby("A")
    exp = {cat: op(group["C"]) for cat, group in grouped}
    exp = DataFrame({"C": exp})
    exp.index.name = "A"
    result = op(grouped)
    tm.assert_frame_equal(result, exp)

    # multiple columns
    grouped = df.groupby(["A", "B"])
    expd = {}
    for (cat1, cat2), group in grouped:
        expd.setdefault(cat1, {})[cat2] = op(group["C"])
    exp = DataFrame(expd).T.stack(dropna=False)
    exp.index.names = ["A", "B"]
    exp.name = "C"

    result = op(grouped)["C"]
    if op_name in ["sum", "prod"]:
        tm.assert_series_equal(result, exp)
Beispiel #3
0
        def _testit(name):

            op = lambda x: getattr(x, name)()

            # single column
            grouped = df.drop(['B'], axis=1).groupby('A')
            exp = {}
            for cat, group in grouped:
                exp[cat] = op(group['C'])
            exp = DataFrame({'C': exp})
            exp.index.name = 'A'
            result = op(grouped)
            assert_frame_equal(result, exp)

            # multiple columns
            grouped = df.groupby(['A', 'B'])
            expd = {}
            for (cat1, cat2), group in grouped:
                expd.setdefault(cat1, {})[cat2] = op(group['C'])
            exp = DataFrame(expd).T.stack(dropna=False)
            exp.index.names = ['A', 'B']
            exp.name = 'C'

            result = op(grouped)['C']
            if not tm._incompat_bottleneck_version(name):
                assert_series_equal(result, exp)
Beispiel #4
0
def join_closest_index(df: pd.DataFrame,
                       other: pd.DataFrame,
                       other_name: str = 'other') -> pd.DataFrame:
    """
    Join `df` with the closest row (by index) of `other`
    :param df: index in time,
    :param other: index in time, constant intervals
    :param other_name: name of the joined columns
    :return: df
    """
    original_index = df.index
    round_index = other.index.values[1] - other.index.values[0]
    df.index = np.floor(df.index / round_index).astype(int)
    other_offset = (other.index / round_index).astype(int).min() - 1
    other = other.reset_index(
        drop=True)  # make sure whole int span is exactly covered
    other.index = other.index + other_offset

    other.name = other_name
    if len(other.columns) == 1:
        df = df.join(other)
    else:
        df = df.join(other, rsuffix=f'_{other_name}')
    df.index = original_index
    return df
Beispiel #5
0
def true_range(high, low, close, drift=None, offset=None, **kwargs):
    """Indicator: True Range"""
    # Validate arguments
    high = verify_series(high)
    low = verify_series(low)
    close = verify_series(close)
    high_low_range = non_zero_range(high, low)
    drift = get_drift(drift)
    offset = get_offset(offset)

    # Calculate Result
    prev_close = close.shift(drift)
    ranges = [high_low_range, high - prev_close, prev_close - low]
    true_range = DataFrame(ranges).T
    true_range = true_range.abs().max(axis=1)

    # Offset
    if offset != 0:
        true_range = true_range.shift(offset)

    # Handle fills
    if 'fillna' in kwargs:
        true_range.fillna(kwargs['fillna'], inplace=True)
    if 'fill_method' in kwargs:
        true_range.fillna(method=kwargs['fill_method'], inplace=True)

    # Name and Categorize it
    true_range.name = f"TRUERANGE_{drift}"
    true_range.category = 'volatility'

    return true_range
Beispiel #6
0
def test_cythonized_aggers(op_name):
    data = {'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., np.nan, np.nan],
            'B': ['A', 'B'] * 6,
            'C': np.random.randn(12)}
    df = DataFrame(data)
    df.loc[2:10:2, 'C'] = np.nan

    op = lambda x: getattr(x, op_name)()

    # single column
    grouped = df.drop(['B'], axis=1).groupby('A')
    exp = {cat: op(group['C']) for cat, group in grouped}
    exp = DataFrame({'C': exp})
    exp.index.name = 'A'
    result = op(grouped)
    tm.assert_frame_equal(result, exp)

    # multiple columns
    grouped = df.groupby(['A', 'B'])
    expd = {}
    for (cat1, cat2), group in grouped:
        expd.setdefault(cat1, {})[cat2] = op(group['C'])
    exp = DataFrame(expd).T.stack(dropna=False)
    exp.index.names = ['A', 'B']
    exp.name = 'C'

    result = op(grouped)['C']
    if op_name in ['sum', 'prod']:
        tm.assert_series_equal(result, exp)
Beispiel #7
0
def ppo(close,
        fast=None,
        slow=None,
        signal=None,
        scalar=None,
        offset=None,
        **kwargs):
    """Indicator: Percentage Price Oscillator (PPO)"""
    # Validate Arguments
    close = verify_series(close)
    fast = int(fast) if fast and fast > 0 else 12
    slow = int(slow) if slow and slow > 0 else 26
    signal = int(signal) if signal and signal > 0 else 9
    scalar = float(scalar) if scalar else 100
    if slow < fast:
        fast, slow = slow, fast
    min_periods = int(
        kwargs["min_periods"]) if "min_periods" in kwargs and kwargs[
            "min_periods"] is not None else fast
    offset = get_offset(offset)

    # Calculate Result
    fastma = sma(close, length=fast)
    slowma = sma(close, length=slow)
    ppo = scalar * (fastma - slowma)
    ppo /= slowma

    signalma = ema(ppo, length=signal)
    histogram = ppo - signalma

    # Offset
    if offset != 0:
        ppo = ppo.shift(offset)
        histogram = histogram.shift(offset)
        signalma = signalma.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        ppo.fillna(kwargs["fillna"], inplace=True)
        histogram.fillna(kwargs["fillna"], inplace=True)
        signalma.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        ppo.fillna(method=kwargs["fill_method"], inplace=True)
        histogram.fillna(method=kwargs["fill_method"], inplace=True)
        signalma.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    _props = f"_{fast}_{slow}_{signal}"
    ppo.name = f"PPO{_props}"
    histogram.name = f"PPOh{_props}"
    signalma.name = f"PPOs{_props}"
    ppo.category = histogram.category = signalma.category = "momentum"

    # Prepare DataFrame to return
    data = {ppo.name: ppo, histogram.name: histogram, signalma.name: signalma}
    df = DataFrame(data)
    df.name = f"PPO{_props}"
    df.category = ppo.category

    return df
Beispiel #8
0
    def test_factor_weights(self,
                            factor_vals,
                            tickers,
                            groups,
                            demeaned,
                            group_adjust,
                            equal_weight,
                            expected_vals):

        index = date_range('1/12/2000', periods=len(factor_vals))
        factor = DataFrame(index=index,
                           columns=tickers,
                           data=factor_vals).stack()
        factor.index = factor.index.set_names(['date', 'asset'])
        factor.name = 'factor'

        factor_data = DataFrame()
        factor_data['factor'] = factor
        groups = Series(groups)
        factor_data['group'] = \
            Series(index=factor.index,
                   data=groups[factor.index.get_level_values('asset')].values)

        weights = \
            factor_weights(factor_data, demeaned, group_adjust, equal_weight)

        expected = Series(data=expected_vals,
                          index=factor_data.index,
                          name='factor')

        assert_series_equal(weights, expected)
Beispiel #9
0
def accbands(high,
             low,
             close,
             length=None,
             c=None,
             drift=None,
             mamode=None,
             offset=None,
             **kwargs):
    """Indicator: Acceleration Bands (ACCBANDS)"""
    # Validate arguments
    high = verify_series(high)
    low = verify_series(low)
    close = verify_series(close)
    high_low_range = non_zero_range(high, low)
    length = int(length) if length and length > 0 else 20
    c = float(c) if c and c > 0 else 4
    mamode = mamode if isinstance(mamode, str) else "sma"
    drift = get_drift(drift)
    offset = get_offset(offset)

    # Calculate Result
    hl_ratio = high_low_range / (high + low)
    hl_ratio *= c
    _lower = low * (1 - hl_ratio)
    _upper = high * (1 + hl_ratio)

    lower = ma(mamode, _lower, length=length)
    mid = ma(mamode, close, length=length)
    upper = ma(mamode, _upper, length=length)

    # Offset
    if offset != 0:
        lower = lower.shift(offset)
        mid = mid.shift(offset)
        upper = upper.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        lower.fillna(kwargs["fillna"], inplace=True)
        mid.fillna(kwargs["fillna"], inplace=True)
        upper.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        lower.fillna(method=kwargs["fill_method"], inplace=True)
        mid.fillna(method=kwargs["fill_method"], inplace=True)
        upper.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    lower.name = f"ACCBL_{length}"
    mid.name = f"ACCBM_{length}"
    upper.name = f"ACCBU_{length}"
    mid.category = upper.category = lower.category = "volatility"

    # Prepare DataFrame to return
    data = {lower.name: lower, mid.name: mid, upper.name: upper}
    accbandsdf = DataFrame(data)
    accbandsdf.name = f"ACCBANDS_{length}"
    accbandsdf.category = mid.category

    return accbandsdf
Beispiel #10
0
def kc(high, low, close, length=None, scalar=None, mamode=None, offset=None, **kwargs):
    """Indicator: Keltner Channels (KC)"""
    # Validate arguments
    high = verify_series(high)
    low = verify_series(low)
    close = verify_series(close)
    length = int(length) if length and length > 0 else 20
    scalar = float(scalar) if scalar and scalar > 0 else 2
    mamode = mamode.lower() if mamode else None
    offset = get_offset(offset)

    # Calculate Result
    use_tr = kwargs.pop("tr", True)
    if use_tr:
        range_ = true_range(high, low, close)
    else:
        range_ = high_low_range(high, low)

    _mode = ""
    if mamode == "sma":
        basis = sma(close, length)
        band = sma(range_, length=length)
        _mode += "s"
    elif mamode is None or mamode == "ema":
        basis = ema(close, length=length)
        band = ema(range_, length=length)

    lower = basis - scalar * band
    upper = basis + scalar * band

    # Offset
    if offset != 0:
        lower = lower.shift(offset)
        basis = basis.shift(offset)
        upper = upper.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        lower.fillna(kwargs["fillna"], inplace=True)
        basis.fillna(kwargs["fillna"], inplace=True)
        upper.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        lower.fillna(method=kwargs["fill_method"], inplace=True)
        basis.fillna(method=kwargs["fill_method"], inplace=True)
        upper.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    _props = f"{_mode if len(_mode) else ''}_{length}_{scalar}"
    lower.name = f"KCL{_props}"
    basis.name = f"KCB{_props}"
    upper.name = f"KCU{_props}"
    basis.category = upper.category = lower.category = "volatility"

    # Prepare DataFrame to return
    data = {lower.name: lower, basis.name: basis, upper.name: upper}
    kcdf = DataFrame(data)
    kcdf.name = f"KC{_props}"
    kcdf.category = basis.category

    return kcdf
Beispiel #11
0
def test_cythonized_aggers(op_name):
    data = {
        'A': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1., np.nan, np.nan],
        'B': ['A', 'B'] * 6,
        'C': np.random.randn(12)
    }
    df = DataFrame(data)
    df.loc[2:10:2, 'C'] = np.nan

    op = lambda x: getattr(x, op_name)()

    # single column
    grouped = df.drop(['B'], axis=1).groupby('A')
    exp = {cat: op(group['C']) for cat, group in grouped}
    exp = DataFrame({'C': exp})
    exp.index.name = 'A'
    result = op(grouped)
    tm.assert_frame_equal(result, exp)

    # multiple columns
    grouped = df.groupby(['A', 'B'])
    expd = {}
    for (cat1, cat2), group in grouped:
        expd.setdefault(cat1, {})[cat2] = op(group['C'])
    exp = DataFrame(expd).T.stack(dropna=False)
    exp.index.names = ['A', 'B']
    exp.name = 'C'

    result = op(grouped)['C']
    if op_name in ['sum', 'prod']:
        tm.assert_series_equal(result, exp)
    def test_factor_weights(
        self,
        factor_vals,
        tickers,
        groups,
        demeaned,
        group_adjust,
        equal_weight,
        expected_vals,
    ):

        index = date_range("1/12/2000", periods=len(factor_vals))
        factor = DataFrame(
            index=index, columns=tickers, data=factor_vals
        ).stack()
        factor.index = factor.index.set_names(["date", "asset"])
        factor.name = "factor"

        factor_data = DataFrame()
        factor_data["factor"] = factor
        groups = Series(groups)
        factor_data["group"] = Series(
            index=factor.index,
            data=groups[factor.index.get_level_values("asset")].values,
        )

        weights = factor_weights(
            factor_data, demeaned, group_adjust, equal_weight
        )

        expected = Series(
            data=expected_vals, index=factor_data.index, name="factor"
        )

        assert_series_equal(weights, expected)
Beispiel #13
0
def adx(high, low, close, length=None, scalar=None, drift=None, offset=None, **kwargs):
    """Indicator: ADX"""
    # Validate Arguments
    high = verify_series(high)
    low = verify_series(low)
    close = verify_series(close)
    length = length if length and length > 0 else 14
    scalar = float(scalar) if scalar else 100
    drift = get_drift(drift)
    offset = get_offset(offset)

    # Calculate Result
    atr_ = atr(high=high, low=low, close=close, length=length)

    up = high - high.shift(drift)  # high.diff(drift)
    dn = low.shift(drift) - low  # low.diff(-drift).shift(drift)

    pos = ((up > dn) & (up > 0)) * up
    neg = ((dn > up) & (dn > 0)) * dn

    pos = pos.apply(zero)
    neg = neg.apply(zero)

    k = scalar / atr_
    dmp = k * rma(close=pos, length=length)
    dmn = k * rma(close=neg, length=length)

    dx = scalar * (dmp - dmn).abs() / (dmp + dmn)
    adx = rma(close=dx, length=length)

    # Offset
    if offset != 0:
        dmp = dmp.shift(offset)
        dmn = dmn.shift(offset)
        adx = adx.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        adx.fillna(kwargs["fillna"], inplace=True)
        dmp.fillna(kwargs["fillna"], inplace=True)
        dmn.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        adx.fillna(method=kwargs["fill_method"], inplace=True)
        dmp.fillna(method=kwargs["fill_method"], inplace=True)
        dmn.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    adx.name = f"ADX_{length}"
    dmp.name = f"DMP_{length}"
    dmn.name = f"DMN_{length}"

    adx.category = dmp.category = dmn.category = "trend"

    # Prepare DataFrame to return
    data = {adx.name: adx, dmp.name: dmp, dmn.name: dmn}
    adxdf = DataFrame(data)
    adxdf.name = f"ADX_{length}"
    adxdf.category = "trend"

    return adxdf
    def get_productForm(self):
        products_list = []
        prop_list = []
        self_SQL_PRODUCT = SQL_PRODUCT.format({'product': self.product})
        self_SQL_PROP = SQL_PROP.format({'prop': self.prop})

        try:
            CUR.execute(self_SQL_PRODUCT)
            results_product = CUR.fetchall()
            for row in results_product:
                products_list.append(row)

            CUR.execute(self_SQL_PROP)
            results_prop = CUR.fetchall()
            for row in results_prop:
                prop_list.append(row[0])
        except Exception as e:
            raise e
        finally:
            # 将prop表的值做为product表的列名
            frame = DataFrame(products_list, columns=prop_list[:])
            frame.name = self.product + "_productForm"
            frame.index.names = ['product id']
            frame.columns.names = ['attribute']
            self.productForm = frame.fillna(0)
            print("Info: The {0[product]}'productForm is up to date.".format(
                {'product': self.product}))
Beispiel #15
0
def factor_alpha_beta(
        factor_data: pd.DataFrame,
        returns: pd.DataFrame = None,
        demeaned: bool = True,
        group_adjust: bool = False,
        equal_weight: bool = False,
):
    """
    计算因子的 alpha (超额收益), alpha 的 t-统计量 以及 beta 值

    参数
    ---
    :param factor_data: 索引为 ['日期' '股票'] 的 MultiIndex, values 包括因子值,远期收益,因子分位,因子分组 [可选]
    :param returns: 因子远期收益,默认为 None, 如果为 None 的时候,会通过调用 `factor_returns` 来计算相应的收益
    :param demeaned: 是否基于一个多空组合
    :param group_adjust: 是否进行行业中性处理
    :param equal_weight:

    返回
    ---
    """
    if returns is None:
        returns = factor_returns(
            factor_data,
            demeaned,
            group_adjust,
            equal_weight
        )

    universe_ret = (
        factor_data.groupby(level="datetime")[get_forward_returns_columns(
            factor_data.columns
        )].mean().loc[returns.index]
    )

    if isinstance(returns, pd.Series):
        returns.name = universe_ret.columns.values[0]
        returns = pd.DataFrame(returns)

    alpha_beta = pd.DataFrame()
    for period in returns.columns.values:
        x = universe_ret[period].values
        y = returns[period].values
        x = add_constant(x)

        reg_fit = OLS(y, x).fit()
        try:
            alpha, beta = reg_fit.params
        except ValueError:
            alpha_beta.loc["Ann. alpha", period] = np.nan
            alpha_beta.loc["beta", period] = np.nan
        else:
            freq_adjust = pd.Timedelta(days=DAYS_PER_YEAR) / pd.Timedelta(
                utils.get_period(period.replace("period_",
                                                ""))
            )
            alpha_beta.loc["Ann. alpha",
                           period] = (1 + alpha)**freq_adjust - 1.0
            alpha_beta.loc["beta", period] = beta
    return alpha_beta
Beispiel #16
0
        def _testit(name):

            op = lambda x: getattr(x, name)()

            # single column
            grouped = df.drop(['B'], axis=1).groupby('A')
            exp = {}
            for cat, group in grouped:
                exp[cat] = op(group['C'])
            exp = DataFrame({'C': exp})
            exp.index.name = 'A'
            result = op(grouped)
            assert_frame_equal(result, exp)

            # multiple columns
            grouped = df.groupby(['A', 'B'])
            expd = {}
            for (cat1, cat2), group in grouped:
                expd.setdefault(cat1, {})[cat2] = op(group['C'])
            exp = DataFrame(expd).T.stack(dropna=False)
            exp.index.names = ['A', 'B']
            exp.name = 'C'

            result = op(grouped)['C']
            if name in ['sum', 'prod']:
                assert_series_equal(result, exp)
Beispiel #17
0
def aroon(high, low, length=None, scalar=None, talib=None, offset=None, **kwargs):
    """Indicator: Aroon & Aroon Oscillator"""
    # Validate Arguments
    length = length if length and length > 0 else 14
    scalar = float(scalar) if scalar else 100
    high = verify_series(high, length)
    low = verify_series(low, length)
    offset = get_offset(offset)
    mode_tal = bool(talib) if isinstance(talib, bool) else True

    if high is None or low is None: return

    # Calculate Result
    if Imports["talib"] and mode_tal:
        from talib import AROON, AROONOSC
        aroon_down, aroon_up = AROON(high, low, length)
        aroon_osc = AROONOSC(high, low, length)
    else:
        periods_from_hh = high.rolling(length + 1).apply(recent_maximum_index, raw=True)
        periods_from_ll = low.rolling(length + 1).apply(recent_minimum_index, raw=True)

        aroon_up = aroon_down = scalar
        aroon_up *= 1 - (periods_from_hh / length)
        aroon_down *= 1 - (periods_from_ll / length)
        aroon_osc = aroon_up - aroon_down

    # Handle fills
    if "fillna" in kwargs:
        aroon_up.fillna(kwargs["fillna"], inplace=True)
        aroon_down.fillna(kwargs["fillna"], inplace=True)
        aroon_osc.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        aroon_up.fillna(method=kwargs["fill_method"], inplace=True)
        aroon_down.fillna(method=kwargs["fill_method"], inplace=True)
        aroon_osc.fillna(method=kwargs["fill_method"], inplace=True)

    # Offset
    if offset != 0:
        aroon_up = aroon_up.shift(offset)
        aroon_down = aroon_down.shift(offset)
        aroon_osc = aroon_osc.shift(offset)

    # Name and Categorize it
    aroon_up.name = f"AROONU_{length}"
    aroon_down.name = f"AROOND_{length}"
    aroon_osc.name = f"AROONOSC_{length}"

    aroon_down.category = aroon_up.category = aroon_osc.category = "trend"

    # Prepare DataFrame to return
    data = {
        aroon_down.name: aroon_down,
        aroon_up.name: aroon_up,
        aroon_osc.name: aroon_osc,
    }
    aroondf = DataFrame(data)
    aroondf.name = f"AROON_{length}"
    aroondf.category = aroon_down.category

    return aroondf
Beispiel #18
0
def bbands(close, length=None, std=None, mamode=None, offset=None, **kwargs):
    """Indicator: Bollinger Bands (BBANDS)"""
    # Validate arguments
    close = verify_series(close)
    length = int(length) if length and length > 0 else 5
    std = float(std) if std and std > 0 else 2.0
    mamode = mamode if isinstance(mamode, str) else "sma"
    offset = get_offset(offset)

    # Calculate Result
    standard_deviation = stdev(close=close, length=length)
    deviations = std * standard_deviation

    mid = ma(mamode, close, length=length, **kwargs)

    lower = mid - deviations
    upper = mid + deviations

    bandwidth = 100 * (upper - lower) / mid

    # Offset
    if offset != 0:
        lower = lower.shift(offset)
        mid = mid.shift(offset)
        upper = upper.shift(offset)
        bandwidth = bandwidth.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        lower.fillna(kwargs["fillna"], inplace=True)
        mid.fillna(kwargs["fillna"], inplace=True)
        upper.fillna(kwargs["fillna"], inplace=True)
        bandwidth.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        lower.fillna(method=kwargs["fill_method"], inplace=True)
        mid.fillna(method=kwargs["fill_method"], inplace=True)
        upper.fillna(method=kwargs["fill_method"], inplace=True)
        bandwidth.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    lower.name = f"BBL_{length}_{std}"
    mid.name = f"BBM_{length}_{std}"
    upper.name = f"BBU_{length}_{std}"
    bandwidth.name = f"BBB_{length}_{std}"
    upper.category = lower.category = "volatility"
    mid.category = bandwidth.category = upper.category

    # Prepare DataFrame to return
    data = {
        lower.name: lower,
        mid.name: mid,
        upper.name: upper,
        bandwidth.name: bandwidth
    }
    bbandsdf = DataFrame(data)
    bbandsdf.name = f"BBANDS_{length}_{std}"
    bbandsdf.category = mid.category

    return bbandsdf
Beispiel #19
0
def kvo(high,
        low,
        close,
        volume,
        fast=None,
        slow=None,
        signal=None,
        mamode=None,
        drift=None,
        offset=None,
        **kwargs):
    """Indicator: Klinger Volume Oscillator (KVO)"""
    # Validate arguments
    fast = int(fast) if fast and fast > 0 else 34
    slow = int(slow) if slow and slow > 0 else 55
    signal = int(signal) if signal and signal > 0 else 13
    mamode = mamode.lower() if mamode and isinstance(mamode, str) else "ema"
    _length = max(fast, slow, signal)
    high = verify_series(high, _length)
    low = verify_series(low, _length)
    close = verify_series(close, _length)
    volume = verify_series(volume, _length)
    drift = get_drift(drift)
    offset = get_offset(offset)

    if high is None or low is None or close is None or volume is None: return

    # Calculate Result
    signed_volume = volume * signed_series(hlc3(high, low, close), 1)
    sv = signed_volume.loc[signed_volume.first_valid_index():, ]
    kvo = ma(mamode, sv, length=fast) - ma(mamode, sv, length=slow)
    kvo_signal = ma(mamode, kvo.loc[kvo.first_valid_index():, ], length=signal)

    # Offset
    if offset != 0:
        kvo = kvo.shift(offset)
        kvo_signal = kvo_signal.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        kvo.fillna(kwargs["fillna"], inplace=True)
        kvo_signal.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        kvo.fillna(method=kwargs["fill_method"], inplace=True)
        kvo_signal.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    _props = f"_{fast}_{slow}_{signal}"
    kvo.name = f"KVO{_props}"
    kvo_signal.name = f"KVOs{_props}"
    kvo.category = kvo_signal.category = "volume"

    # Prepare DataFrame to return
    data = {kvo.name: kvo, kvo_signal.name: kvo_signal}
    df = DataFrame(data)
    df.name = f"KVO{_props}"
    df.category = kvo.category

    return df
Beispiel #20
0
def aberration(high,
               low,
               close,
               length=None,
               atr_length=None,
               offset=None,
               **kwargs):
    """Indicator: Aberration (ABER)"""
    # Validate arguments
    high = verify_series(high)
    low = verify_series(low)
    close = verify_series(close)
    length = int(length) if length and length > 0 else 5
    atr_length = int(atr_length) if atr_length and atr_length > 0 else 15
    offset = get_offset(offset)

    # Calculate Result
    atr_ = atr(high=high, low=low, close=close, length=atr_length)
    jg = hlc3(high=high, low=low, close=close)

    zg = sma(jg, length)
    sg = zg + atr_
    xg = zg - atr_

    # Offset
    if offset != 0:
        zg = zg.shift(offset)
        sg = sg.shift(offset)
        xg = xg.shift(offset)
        atr_ = atr_.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        zg.fillna(kwargs["fillna"], inplace=True)
        sg.fillna(kwargs["fillna"], inplace=True)
        xg.fillna(kwargs["fillna"], inplace=True)
        atr_.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        zg.fillna(method=kwargs["fill_method"], inplace=True)
        sg.fillna(method=kwargs["fill_method"], inplace=True)
        xg.fillna(method=kwargs["fill_method"], inplace=True)
        atr_.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    _props = f"_{length}_{atr_length}"
    zg.name = f"ABER_ZG{_props}"
    sg.name = f"ABER_SG{_props}"
    xg.name = f"ABER_XG{_props}"
    atr_.name = f"ABER_ATR{_props}"
    zg.category = sg.category = "volatility"
    xg.category = atr_.category = zg.category

    # Prepare DataFrame to return
    data = {zg.name: zg, sg.name: sg, xg.name: xg, atr_.name: atr_}
    aberdf = DataFrame(data)
    aberdf.name = f"ABER{_props}"
    aberdf.category = zg.category

    return aberdf
Beispiel #21
0
def fisher(high, low, length=None, signal=None, offset=None, **kwargs):
    """Indicator: Fisher Transform (FISHT)"""
    # Validate Arguments
    length = int(length) if length and length > 0 else 9
    signal = int(signal) if signal and signal > 0 else 1
    _length = max(length, signal)
    high = verify_series(high, _length)
    low = verify_series(low, _length)
    offset = get_offset(offset)

    if high is None or low is None: return

    # Calculate Result
    hl2_ = hl2(high, low)
    highest_hl2 = hl2_.rolling(length).max()
    lowest_hl2 = hl2_.rolling(length).min()

    hlr = high_low_range(highest_hl2, lowest_hl2)
    hlr[hlr < 0.001] = 0.001

    position = ((hl2_ - lowest_hl2) / hlr) - 0.5

    v = 0
    m = high.size
    result = [npNaN for _ in range(0, length - 1)] + [0]
    for i in range(length, m):
        v = 0.66 * position.iloc[i] + 0.67 * v
        if v < -0.99: v = -0.999
        if v > 0.99: v = 0.999
        result.append(0.5 * (nplog((1 + v) / (1 - v)) + result[i - 1]))
    fisher = Series(result, index=high.index)
    signalma = fisher.shift(signal)

    # Offset
    if offset != 0:
        fisher = fisher.shift(offset)
        signalma = signalma.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        fisher.fillna(kwargs["fillna"], inplace=True)
        signalma.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        fisher.fillna(method=kwargs["fill_method"], inplace=True)
        signalma.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    _props = f"_{length}_{signal}"
    fisher.name = f"FISHERT{_props}"
    signalma.name = f"FISHERTs{_props}"
    fisher.category = signalma.category = "momentum"

    # Prepare DataFrame to return
    data = {fisher.name: fisher, signalma.name: signalma}
    df = DataFrame(data)
    df.name = f"FISHERT{_props}"
    df.category = fisher.category

    return df
def pvo(volume,
        fast=None,
        slow=None,
        signal=None,
        scalar=None,
        offset=None,
        **kwargs):
    """Indicator: Percentage Volume Oscillator (PVO)"""
    # Validate Arguments
    fast = int(fast) if fast and fast > 0 else 12
    slow = int(slow) if slow and slow > 0 else 26
    signal = int(signal) if signal and signal > 0 else 9
    scalar = float(scalar) if scalar else 100
    if slow < fast:
        fast, slow = slow, fast
    volume = verify_series(volume, max(fast, slow, signal))
    offset = get_offset(offset)

    if volume is None: return

    # Calculate Result
    fastma = ema(volume, length=fast)
    slowma = ema(volume, length=slow)
    pvo = scalar * (fastma - slowma)
    pvo /= slowma

    signalma = ema(pvo, length=signal)
    histogram = pvo - signalma

    # Offset
    if offset != 0:
        pvo = pvo.shift(offset)
        histogram = histogram.shift(offset)
        signalma = signalma.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        pvo.fillna(kwargs["fillna"], inplace=True)
        histogram.fillna(kwargs["fillna"], inplace=True)
        signalma.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        pvo.fillna(method=kwargs["fill_method"], inplace=True)
        histogram.fillna(method=kwargs["fill_method"], inplace=True)
        signalma.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    _props = f"_{fast}_{slow}_{signal}"
    pvo.name = f"PVO{_props}"
    histogram.name = f"PVOh{_props}"
    signalma.name = f"PVOs{_props}"
    pvo.category = histogram.category = signalma.category = "momentum"

    #
    data = {pvo.name: pvo, histogram.name: histogram, signalma.name: signalma}
    df = DataFrame(data)
    df.name = pvo.name
    df.category = pvo.category

    return df
Beispiel #23
0
    def _gather_frames_column(self, key):
        results = {}
        for name, df in self.frames.items():
            results[name] = df[key]

        df = DataFrame(results)
        df.name = key
        return df
Beispiel #24
0
    def _gather_frames_column(self, key):
        results = {}
        for name, df in self.frames.items():
            results[name] = df[key]

        df = DataFrame(results)
        df.name = key
        return df
Beispiel #25
0
    def get_pandas_df(self):

        columns = self.columns_label + self.get_all_used_atoms_in_order(
            self.mass_spectrum)
        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
        df = DataFrame(dict_data_list, columns=columns)
        df.name = self.output_file
        return df
Beispiel #26
0
def adx(high, low, close, length=None, drift=None, offset=None, **kwargs):
    """Indicator: ADX"""
    # Validate Arguments
    high = verify_series(high)
    low = verify_series(low)
    close = verify_series(close)
    length = length if length and length > 0 else 14
    drift = get_drift(drift)
    offset = get_offset(offset)

    # Calculate Result
    _atr = atr(high=high, low=low, close=close, length=length)

    up = high - high.shift(drift)
    dn = low.shift(drift) - low

    pos = ((up > dn) & (up > 0)) * up
    neg = ((dn > up) & (dn > 0)) * dn

    pos = pos.apply(zero)
    neg = neg.apply(zero)

    dmp = (100 / _atr) * rma(close=pos, length=length)
    dmn = (100 / _atr) * rma(close=neg, length=length)

    dx = 100 * (dmp - dmn).abs() / (dmp + dmn)
    adx = rma(close=dx, length=length)

    # Offset
    if offset != 0:
        dmp = dmp.shift(offset)
        dmn = dmn.shift(offset)
        adx = adx.shift(offset)

    # Handle fills
    if 'fillna' in kwargs:
        adx.fillna(kwargs['fillna'], inplace=True)
        dmp.fillna(kwargs['fillna'], inplace=True)
        dmn.fillna(kwargs['fillna'], inplace=True)
    if 'fill_method' in kwargs:
        adx.fillna(method=kwargs['fill_method'], inplace=True)
        dmp.fillna(method=kwargs['fill_method'], inplace=True)
        dmn.fillna(method=kwargs['fill_method'], inplace=True)

    # Name and Categorize it
    adx.name = f"ADX_{length}"
    dmp.name = f"DMP_{length}"
    dmn.name = f"DMN_{length}"

    adx.category = dmp.category = dmn.category = 'trend'

    # Prepare DataFrame to return
    data = {adx.name: adx, dmp.name: dmp, dmn.name: dmn}
    adxdf = DataFrame(data)
    adxdf.name = f"ADX_{length}"
    adxdf.category = 'trend'

    return adxdf
Beispiel #27
0
def donchian(high,
             low,
             lower_length=None,
             upper_length=None,
             offset=None,
             **kwargs):
    """Indicator: Donchian Channels (DC)"""
    # Validate arguments
    high = verify_series(high)
    low = verify_series(low)
    lower_length = int(
        lower_length) if lower_length and lower_length > 0 else 20
    upper_length = int(
        upper_length) if upper_length and upper_length > 0 else 20
    lower_min_periods = int(
        kwargs['lower_min_periods']
    ) if 'lower_min_periods' in kwargs and kwargs[
        'lower_min_periods'] is not None else lower_length
    upper_min_periods = int(
        kwargs['upper_min_periods']
    ) if 'upper_min_periods' in kwargs and kwargs[
        'upper_min_periods'] is not None else upper_length
    offset = get_offset(offset)

    # Calculate Result
    lower = low.rolling(lower_length, min_periods=lower_min_periods).min()
    upper = high.rolling(upper_length, min_periods=upper_min_periods).max()
    mid = 0.5 * (lower + upper)

    # Handle fills
    if 'fillna' in kwargs:
        lower.fillna(kwargs['fillna'], inplace=True)
        mid.fillna(kwargs['fillna'], inplace=True)
        upper.fillna(kwargs['fillna'], inplace=True)
    if 'fill_method' in kwargs:
        lower.fillna(method=kwargs['fill_method'], inplace=True)
        mid.fillna(method=kwargs['fill_method'], inplace=True)
        upper.fillna(method=kwargs['fill_method'], inplace=True)

    # Offset
    if offset != 0:
        lower = lower.shift(offset)
        mid = mid.shift(offset)
        upper = upper.shift(offset)

    # Name and Categorize it
    lower.name = f"DCL_{lower_length}_{upper_length}"
    mid.name = f"DCM_{lower_length}_{upper_length}"
    upper.name = f"DCU_{lower_length}_{upper_length}"
    mid.category = upper.category = lower.category = 'volatility'

    # Prepare DataFrame to return
    data = {lower.name: lower, mid.name: mid, upper.name: upper}
    dcdf = DataFrame(data)
    dcdf.name = f"DC_{lower_length}_{upper_length}"
    dcdf.category = 'volatility'

    return dcdf
Beispiel #28
0
def amat(close=None, fast=None, slow=None, mamode=None, lookback=None, offset=None, **kwargs):
    """Indicator: Archer Moving Averages Trends (AMAT)"""
    # Validate Arguments
    close = verify_series(close)
    fast = int(fast) if fast and fast > 0 else 8
    slow = int(slow) if slow and slow > 0 else 21
    lookback = int(lookback) if lookback and lookback > 0 else 2
    mamode = mamode.lower() if mamode else "ema"
    offset = get_offset(offset)

    # Calculate Result
    if mamode == "hma":
        fast_ma = hma(close=close, length=fast, **kwargs)
        slow_ma = hma(close=close, length=slow, **kwargs)
    elif mamode == "linreg":
        fast_ma = linreg(close=close, length=fast, **kwargs)
        slow_ma = linreg(close=close, length=slow, **kwargs)
    elif mamode == "rma":
        fast_ma = rma(close=close, length=fast, **kwargs)
        slow_ma = rma(close=close, length=slow, **kwargs)
    elif mamode == "sma":
        fast_ma = sma(close=close, length=fast, **kwargs)
        slow_ma = sma(close=close, length=slow, **kwargs)
    elif mamode == "wma":
        fast_ma = wma(close=close, length=fast, **kwargs)
        slow_ma = wma(close=close, length=slow, **kwargs)
    else:  # "ema"
        fast_ma = ema(close=close, length=fast, **kwargs)
        slow_ma = ema(close=close, length=slow, **kwargs)

    mas_long = long_run(fast_ma, slow_ma, length=lookback)
    mas_short = short_run(fast_ma, slow_ma, length=lookback)

    # Offset
    if offset != 0:
        mas_long = mas_long.shift(offset)
        mas_short = mas_short.shift(offset)

    # # Handle fills
    if "fillna" in kwargs:
        mas_long.fillna(kwargs["fillna"], inplace=True)
        mas_short.fillna(kwargs["fillna"], inplace=True)

    if "fill_method" in kwargs:
        mas_long.fillna(method=kwargs["fill_method"], inplace=True)
        mas_short.fillna(method=kwargs["fill_method"], inplace=True)

    # Prepare DataFrame to return
    amatdf = DataFrame({
        f"AMAT_{mas_long.name}": mas_long,
        f"AMAT_{mas_short.name}": mas_short
    })

    # Name and Categorize it
    amatdf.name = f"AMAT_{mamode.upper()}_{fast}_{slow}_{lookback}"
    amatdf.category = "trend"

    return amatdf
Beispiel #29
0
def accbands(high, low, close, length=None, c=None, drift=None, mamode=None, offset=None, **kwargs):
    """Indicator: Acceleration Bands (ACCBANDS)"""
    # Validate arguments
    high = verify_series(high)
    low = verify_series(low)
    close = verify_series(close)
    high_low_range = non_zero_range(high, low)
    length = int(length) if length and length > 0 else 20
    c = float(c) if c and c > 0 else 4
    min_periods = int(kwargs['min_periods']) if 'min_periods' in kwargs and kwargs['min_periods'] is not None else length
    mamode = mamode.lower() if mamode else 'sma'
    drift = get_drift(drift)
    offset = get_offset(offset)

    # Calculate Result
    hl_ratio  = high_low_range / (high + low)
    hl_ratio *= c
    _lower = low * (1 - hl_ratio)
    _upper = high * (1 + hl_ratio)

    if mamode is None or mamode == 'sma':
        lower = _lower.rolling(length, min_periods=min_periods).mean()
        mid   = close.rolling(length, min_periods=min_periods).mean()
        upper = _upper.rolling(length, min_periods=min_periods).mean()
    elif mamode == 'ema':
        lower = _lower.ewm(span=length, min_periods=min_periods).mean()
        mid   = close.ewm(span=length, min_periods=min_periods).mean()
        upper = _upper.ewm(span=length, min_periods=min_periods).mean()

    # Offset
    if offset != 0:
        lower = lower.shift(offset)
        mid = mid.shift(offset)
        upper = upper.shift(offset)

    # Handle fills
    if 'fillna' in kwargs:
        lower.fillna(kwargs['fillna'], inplace=True)
        mid.fillna(kwargs['fillna'], inplace=True)
        upper.fillna(kwargs['fillna'], inplace=True)
    if 'fill_method' in kwargs:
        lower.fillna(method=kwargs['fill_method'], inplace=True)
        mid.fillna(method=kwargs['fill_method'], inplace=True)
        upper.fillna(method=kwargs['fill_method'], inplace=True)

    # Name and Categorize it
    lower.name = f"ACCBL_{length}"
    mid.name = f"ACCBM_{length}"
    upper.name = f"ACCBU_{length}"
    mid.category = upper.category = lower.category = 'volatility'

    # Prepare DataFrame to return
    data = {lower.name: lower, mid.name: mid, upper.name: upper}
    accbandsdf = DataFrame(data)
    accbandsdf.name = f"ACCBANDS_{length}"
    accbandsdf.category = 'volatility'

    return accbandsdf
Beispiel #30
0
def cdl_z(open_,
          high,
          low,
          close,
          length=None,
          full=None,
          ddof=None,
          offset=None,
          **kwargs):
    """Candle Type: Z Score"""
    # Validate Arguments
    length = int(length) if length and length > 0 else 30
    ddof = int(ddof) if ddof and ddof >= 0 and ddof < length else 1
    open_ = verify_series(open_, length)
    high = verify_series(high, length)
    low = verify_series(low, length)
    close = verify_series(close, length)
    offset = get_offset(offset)
    full = bool(full) if full is not None and full else False

    if open_ is None or high is None or low is None or close is None: return

    # Calculate Result
    if full:
        length = close.size

    z_open = zscore(open_, length=length, ddof=ddof)
    z_high = zscore(high, length=length, ddof=ddof)
    z_low = zscore(low, length=length, ddof=ddof)
    z_close = zscore(close, length=length, ddof=ddof)

    _full = "a" if full else ""
    _props = _full if full else f"_{length}_{ddof}"
    df = DataFrame({
        f"open_Z{_props}": z_open,
        f"high_Z{_props}": z_high,
        f"low_Z{_props}": z_low,
        f"close_Z{_props}": z_close,
    })

    if full:
        df.fillna(method="backfill", axis=0, inplace=True)

    # Offset
    if offset != 0:
        df = df.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        df.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        df.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    df.name = f"CDL_Z{_props}"
    df.category = "candles"

    return df
def stoch(high,
          low,
          close,
          k=None,
          d=None,
          smooth_k=None,
          offset=None,
          **kwargs):
    """Indicator: Stochastic Oscillator (STOCH)"""
    # Validate arguments
    k = k if k and k > 0 else 14
    d = d if d and d > 0 else 3
    smooth_k = smooth_k if smooth_k and smooth_k > 0 else 3
    _length = max(k, d, smooth_k)
    high = verify_series(high, _length)
    low = verify_series(low, _length)
    close = verify_series(close, _length)
    offset = get_offset(offset)

    if high is None or low is None or close is None: return

    # Calculate Result
    lowest_low = low.rolling(k).min()
    highest_high = high.rolling(k).max()

    stoch = 100 * (close - lowest_low)
    stoch /= non_zero_range(highest_high, lowest_low)

    stoch_k = sma(stoch, length=smooth_k)
    stoch_d = sma(stoch_k, length=d)

    # Offset
    if offset != 0:
        stoch_k = stoch_k.shift(offset)
        stoch_d = stoch_d.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        stoch_k.fillna(kwargs["fillna"], inplace=True)
        stoch_d.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        stoch_k.fillna(method=kwargs["fill_method"], inplace=True)
        stoch_d.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    _name = "STOCH"
    _props = f"_{k}_{d}_{smooth_k}"
    stoch_k.name = f"{_name}k{_props}"
    stoch_d.name = f"{_name}d{_props}"
    stoch_k.category = stoch_d.category = "momentum"

    # Prepare DataFrame to return
    data = {stoch_k.name: stoch_k, stoch_d.name: stoch_d}
    df = DataFrame(data)
    df.name = f"{_name}{_props}"
    df.category = stoch_k.category

    return df
Beispiel #32
0
def kvo(high, low, close, volume, fast=None, slow=None, length_sig=None, mamode=None, drift=None, offset=None, **kwargs):
    """Indicator: Klinger Volume Oscillator (KVO)"""
    # Validate arguments
    fast = int(fast) if fast and fast > 0 else 34
    slow = int(slow) if slow and slow > 0 else 55
    length_sig = int(length_sig) if length_sig and length_sig > 0 else 13
    mamode = mamode.lower() if mamode and isinstance(mamode, str) else "ema"
    _length = max(fast, slow, length_sig)
    high = verify_series(high, _length)
    low = verify_series(low, _length)
    close = verify_series(close, _length)
    volume = verify_series(volume, _length)
    drift = get_drift(drift)
    offset = get_offset(offset)

    if high is None or low is None or close is None or volume is None: return

    # Calculate Result
    mom = hlc3(high, low, close).diff(drift)
    trend = npWhere(mom > 0, 1, 0) + npWhere(mom < 0, -1, 0)
    dm = non_zero_range(high, low)

    m = high.size
    cm = [0] * m
    for i in range(1, m):
        cm[i] = (cm[i - 1] + dm[i]) if trend[i] == trend[i - 1] else (dm[i - 1] + dm[i])

    vf = 100 * volume * trend * abs(2 * dm / cm - 1)

    kvo = ma(mamode, vf, length=fast) - ma(mamode, vf, length=slow)
    kvo_signal = ma(mamode, kvo, length=length_sig)

    # Offset
    if offset != 0:
        kvo = kvo.shift(offset)
        kvo_signal = kvo_signal.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        kvo.fillna(kwargs["fillna"], inplace=True)
        kvo_signal.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        kvo.fillna(method=kwargs["fill_method"], inplace=True)
        kvo_signal.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    kvo.name = f"KVO_{fast}_{slow}"
    kvo_signal.name = f"KVOSig_{length_sig}"
    kvo.category = kvo_signal.category = "volume"

    # Prepare DataFrame to return
    data = {kvo.name: kvo, kvo_signal.name: kvo_signal}
    kvoandsig = DataFrame(data)
    kvoandsig.name = f"KVO_{fast}_{slow}_{length_sig}"
    kvoandsig.category = kvo.category

    return kvoandsig
Beispiel #33
0
	def test_get_columns(self):
		X = DataFrame([[1, 0], [2, 0], [3, 0]], columns = [1, 2])
		self.assertEqual(["1", "2"], _get_column_names(X).tolist())
		X.columns = numpy.asarray([1.0, 2.0])
		self.assertEqual(["1.0", "2.0"], _get_column_names(X).tolist())
		X = Series([1, 2, 3], name = 1)
		self.assertEqual("1", _get_column_names(X).tolist())
		X.name = 1.0
		self.assertEqual("1.0", _get_column_names(X).tolist())
Beispiel #34
0
    def _init_dataframe(self, df, name=None):
        name = name or df.name
        self.columns = [name]
        if name is None:
            raise Exception("need a name for df")

        for col, series in df.iteritems():
            frame = DataFrame({name: series})
            frame.name = col
            self.frames[col] = frame
Beispiel #35
0
    def _gather_column(self, key):
        if key in self._cache:
            return self._cache[key]

        results = {}
        for name, df in self.frames.iteritems():
            results[name] = df[key]

        df = DataFrame(results)
        df.name = key
        self._cache[key] = df
        return df
Beispiel #36
0
    def test_common_start_returns(self, before, after, mean_by_date, demeaned,
                                  expected_vals):
        dr = date_range(start='2015-1-17', end='2015-2-2')
        dr.name = 'date'
        tickers = ['A', 'B', 'C', 'D']
        r1, r2, r3, r4 = (1.20, 1.40, 0.90, 0.80)
        prices = DataFrame(index=dr, columns=tickers,
                           data=[[r1**1, r2**1, r3**1, r4**1],
                                 [r1**2, r2**2, r3**2, r4**2],
                                 [r1**3, r2**3, r3**3, r4**3],
                                 [r1**4, r2**4, r3**4, r4**4],
                                 [r1**5, r2**5, r3**5, r4**5],
                                 [r1**6, r2**6, r3**6, r4**6],
                                 [r1**7, r2**7, r3**7, r4**7],
                                 [r1**8, r2**8, r3**8, r4**8],
                                 [r1**9, r2**9, r3**9, r4**9],
                                 [r1**10, r2**10, r3**10, r4**10],
                                 [r1**11, r2**11, r3**11, r4**11],
                                 [r1**12, r2**12, r3**12, r4**12],
                                 [r1**13, r2**13, r3**13, r4**13],
                                 [r1**14, r2**14, r3**14, r4**14],
                                 [r1**15, r2**15, r3**15, r4**15],
                                 [r1**16, r2**16, r3**16, r4**16],
                                 [r1**17, r2**17, r3**17, r4**17]])
        dr2 = date_range(start='2015-1-21', end='2015-1-29')
        factor = DataFrame(index=dr2, columns=tickers,
                           data=[[3, 4, 2, 1],
                                 [3, 4, 2, 1],
                                 [3, 4, 2, 1],
                                 [3, 4, 2, 1],
                                 [3, 4, 2, 1],
                                 [3, 4, 2, 1],
                                 [3, 4, 2, 1],
                                 [3, 4, 2, 1],
                                 [3, 4, 2, 1]]).stack()
        factor.index = factor.index.set_names(['date', 'asset'])
        factor.name = 'factor'

        cmrt = common_start_returns(
            factor,
            prices,
            before,
            after,
            False,
            mean_by_date,
            factor if demeaned else None)
        cmrt = DataFrame({'mean': cmrt.mean(axis=1), 'std': cmrt.std(axis=1)})
        expected = DataFrame(index=range(-before, after + 1),
                             columns=['mean', 'std'], data=expected_vals)
        assert_frame_equal(cmrt, expected)
Beispiel #37
0
def compute_one(t, df, **kwargs):
    if t.grouper.iscolumn:
        grouper = compute(t.grouper, {t.child: df}) # a Series
    elif isinstance(t.grouper, Projection) and t.grouper.child is t.child:
        grouper = t.grouper.columns  # list of column names

    if isinstance(t.apply, Summary):
        names = t.apply.names
        preapply = DataFrame(dict(zip(
            names,
            [compute(v.child, {t.child: df}) for v in t.apply.values])))

        df2 = concat_nodup(df, preapply)

        groups = df2.groupby(grouper)

        d = defaultdict(list)
        for name, v in zip(names, t.apply.values):
            d[name].append(getattr(Series, v.symbol))

        result = groups.agg(dict(d))

        # Rearrange columns to match names order
        result = result[sorted(list(result.columns),
                               key=lambda t: names.index(t[0]))]
        result.columns = t.apply.names  # flatten down multiindex

    if isinstance(t.apply, Reduction):
        names = t.apply.dshape[0].names
        preapply = compute(t.apply.child, {t.child: df})
        # Pandas and Blaze column naming schemes differ
        # Coerce DataFrame column names to match Blaze's names
        preapply = preapply.copy()
        if isinstance(preapply, Series):
            preapply.name = names[0]
        else:
            preapply.columns = names

        df2 = concat_nodup(df, preapply)

        if t.apply.child.iscolumn:
            groups = df2.groupby(grouper)[names[0]]
        else:
            groups = df2.groupby(grouper)[names]

        result = compute_one(t.apply, groups) # do reduction

    result = DataFrame(result).reset_index()
    result.columns = t.columns
    return result
Beispiel #38
0
def collector2df(collector, station, sos_name):
    """Request CSV response from SOS and convert to Pandas DataFrames."""
    collector.features = [station]
    collector.variables = [sos_name]

    long_name = get_station_longName(station)
    try:

        response = collector.raw(responseFormat="text/csv")
        data_df = read_csv(BytesIO(response.encode("utf-8")), parse_dates=True, index_col="date_time")
    except ExceptionReport as e:
        # warn("Station %s is not NAVD datum. %s" % (long_name, e))
        print(str(e))
        data_df = DataFrame()  # Assigning an empty DataFrame for now.

    data_df.name = long_name
    return data_df
Beispiel #39
0
def ForITOL(H):
    NBINS=10
    values, bins=cut(H["MIByBranch"]["I(Ti,G)"].TurnOver,bins=NBINS, retbins=True)
    try:
        from matplotlib import pyplot as plt
        import matplotlib
    except ImportError:
        if NBINS==10:
            zz=Series(["#FFF1A9", "#FEE187", "#FECA66", "#FEAB49", "#FD8C3C","#FC5B2E","#ED2E21", "#D41020", "#B00026", "#800026"], index=values.cat.categories)
        else: raise ImportError
    else:
        cm = plt.get_cmap('YlOrRd')
        z=arange(1,(NBINS+1),1)/float(NBINS)
        zz=Series([matplotlib.colors.rgb2hex(x).upper() for x in cm(z)], index=values.cat.categories)
    XITOL=DataFrame({"branch name":list(values.index.get_level_values("Name")), "mode":"range","label":list(values.values), "color":zz[values]})
    H["MIByBranch"].loc[:,("I(Ti,G)","Color")]=zz[values].values
    #print "wwww"
    #print H["MIByBranch"]["I(Ti,G)"]
    #print H["MIByBranch"].columns
    #H["MIByBranch"].reindex(H["MIByBranch"].index)
    L=len(H["MIByBranch"].columns)
    H["MIByBranch"]=H["MIByBranch"].iloc[:,sum([range(4),[L-1],range(4,L-1)],[])]
    #H["MIByBranch"]=H["MIByBranch"].iloc[:,[0,1,2,3,11,4,5,6,7,8,9,10]]
    #print "CIAO"
    #print H["MIByBranch"].columns
    XITOL.set_index("branch name",inplace=True)
    XITOL["label"]=["_to_".join(x.split(", "))[1:-1] for x in XITOL["label"]]
    #print XITOL.iloc[0:3,:]
    label=numpy.array(["NotSignificant","Significant"])[(H["MIByBranch"]["I(Ti,G)"].MultTest*1).values]
    color=numpy.array(["#000000","#00FFFF"])[(H["MIByBranch"]["I(Ti,G)"].MultTest*1).values]
    XITOLbis=DataFrame({"mode":"clade","label":label, "color":color}, index=list(values.index.get_level_values("Name")) )
    XITOLbis.name="branch name"
    XITOL=XITOL.append(XITOLbis)
    XITOL=XITOL[[  "mode", "color","label"]]
    Pie=H["MIByBranch"]["By Group Relative Frequency"].query("Is_Leaf==True")
    color=spacedColors(Pie.shape[1])
    Pie.index=Pie.index.get_level_values("Name")
    Pie.columns=MultiIndex(levels=[[Pie.columns],[color]],labels=[range(Pie.shape[1])]*2, names=["LABELS","COLORS"])
    Pie.index.name=""
    #Transform in integer to do not upset ITOL
    HIST=(Pie*H["counts"].index.get_level_values("Total Counts")[0]).astype(int)
    return XITOL, HIST,H,values.cat.categories.tolist()
Beispiel #40
0
def collector2df(collector, station, sos_name, provider='COOPS'):
    """Request CSV response from SOS and convert to Pandas DataFrames."""
    collector.features = [station]
    collector.variables = [sos_name]
    
    long_name = get_station_longName(station, provider)
    try:

        response = collector.raw(responseFormat="text/csv")
        data_df = read_csv(BytesIO(response.encode('utf-8')),
                           parse_dates=True,
                           index_col='date_time')
        col = 'sea_water_temperature (C)'
        data_df['Observed Data'] = data_df[col]
    except ExceptionReport as e:
        # warn("Station %s is not NAVD datum. %s" % (long_name, e))
        print(str(e))
        data_df = DataFrame()  # Assigning an empty DataFrame for now.

    data_df.name = long_name
    return data_df
Beispiel #41
0
def coops2df(collector, coops_id, sos_name):
    """Request CSV response from SOS and convert to Pandas DataFrames."""

    collector.features = [coops_id]
    collector.variables = [sos_name]
    long_name = get_Coops_longName(coops_id)

    try:
        response = collector.raw(responseFormat="text/csv")
        data_df = read_csv(BytesIO(response.encode('utf-8')),
                           parse_dates=True,
                           index_col='date_time')
        col = 'wind_speed (m/s)'

        data_df['Observed Data'] = data_df[col]
    except ExceptionReport as e:
        warn("Station %s is not NAVD datum. %s" % (long_name, e))
        data_df = DataFrame()  # Assing an empty DataFrame for now.

    data_df.name = long_name
    return data_df
Beispiel #42
0
def editor(interrogation, 
           operation=None,
           denominator=False,
           sort_by=False,
           keep_stats=False,
           keep_top=False,
           just_totals=False,
           threshold='medium',
           just_entries=False,
           skip_entries=False,
           merge_entries=False,
           just_subcorpora=False,
           skip_subcorpora=False,
           span_subcorpora=False,
           merge_subcorpora=False,
           replace_names=False,
           replace_subcorpus_names=False,
           projection=False,
           remove_above_p=False,
           p=0.05, 
           print_info=False,
           spelling=False,
           selfdrop=True,
           calc_all=True,
           keyword_measure='ll',
           **kwargs
          ):
    """
    See corpkit.interrogation.Interrogation.edit() for docstring
    """

    # grab arguments, in case we get dict input and have to iterate
    locs = locals()

    import corpkit

    import re
    import collections
    import pandas as pd
    import numpy as np

    from pandas import DataFrame, Series
    from time import localtime, strftime
    
    try:
        get_ipython().getoutput()
    except TypeError:
        have_ipython = True
    except NameError:
        have_ipython = False
    try:
        from IPython.display import display, clear_output
    except ImportError:
        pass

    # to use if we also need to worry about concordance lines
    return_conc = False

    from corpkit.interrogation import Interrodict, Interrogation, Concordance
    if interrogation.__class__ == Interrodict:
        locs.pop('interrogation', None)
        from collections import OrderedDict
        outdict = OrderedDict()
        for i, (k, v) in enumerate(interrogation.items()):
            # only print the first time around
            if i != 0:
                locs['print_info'] = False

            if isinstance(denominator, STRINGTYPE) and denominator.lower() == 'self':
                denominator = interrogation

            # if df2 is also a dict, get the relevant entry

            if isinstance(denominator, (dict, Interrodict)):
                #if sorted(set([i.lower() for i in list(dataframe1.keys())])) == \
                #   sorted(set([i.lower() for i in list(denominator.keys())])):
                #   locs['denominator'] = denominator[k]

                # fix: this repeats itself for every key, when it doesn't need to
                # denominator_sum: 
                if kwargs.get('denominator_sum'):
                    locs['denominator'] = denominator.collapse(axis='key')

                if kwargs.get('denominator_totals'):
                    locs['denominator'] = denominator[k].totals
                else:
                    locs['denominator'] = denominator[k].results


            outdict[k] = v.results.edit(**locs)
        if print_info:
            
            thetime = strftime("%H:%M:%S", localtime())
            print("\n%s: Finished! Output is a dictionary with keys:\n\n         '%s'\n" % (thetime, "'\n         '".join(sorted(outdict.keys()))))
        return Interrodict(outdict)

    elif isinstance(interrogation, (DataFrame, Series)):
        dataframe1 = interrogation
    elif isinstance(interrogation, Interrogation):
        #if interrogation.__dict__.get('concordance', None) is not None:
        #    concordances = interrogation.concordance
        branch = kwargs.pop('branch', 'results')
        if branch.lower().startswith('r') :
            dataframe1 = interrogation.results
        elif branch.lower().startswith('t'):
            dataframe1 = interrogation.totals
        elif branch.lower().startswith('c'):
            dataframe1 = interrogation.concordance
            return_conc = True
        else:
            dataframe1 = interrogation.results
    
    elif isinstance(interrogation, Concordance) or \
                        all(x in list(dataframe1.columns) for x in [ 'l', 'm', 'r']):
        return_conc = True
        print('heree')
        dataframe1 = interrogation
    # hope for the best
    else:
        dataframe1 = interrogation

    the_time_started = strftime("%Y-%m-%d %H:%M:%S")

    pd.options.mode.chained_assignment = None

    try:
        from process import checkstack
    except ImportError:
        from corpkit.process import checkstack
        
    if checkstack('pythontex'):
        print_info=False

    def combiney(df, df2, operation='%', threshold='medium', prinf=True):
        """mash df and df2 together in appropriate way"""
        totals = False
        # delete under threshold
        if just_totals:
            if using_totals:
                if not single_totals:
                    to_drop = list(df2[df2['Combined total'] < threshold].index)
                    df = df.drop([e for e in to_drop if e in list(df.index)])
                    if prinf:
                        to_show = []
                        [to_show.append(w) for w in to_drop[:5]]
                        if len(to_drop) > 10:
                            to_show.append('...')
                            [to_show.append(w) for w in to_drop[-5:]]
                        if len(to_drop) > 0:
                            print('Removing %d entries below threshold:\n    %s' % (len(to_drop), '\n    '.join(to_show)))
                        if len(to_drop) > 10:
                            print('... and %d more ... \n' % (len(to_drop) - len(to_show) + 1))
                        else:
                            print('')
                else:
                    denom = df2
        else:
            denom = list(df2)
        if single_totals:
            if operation == '%':
                totals = df.sum() * 100.0 / float(df.sum().sum())
                df = df * 100.0
                try:
                    df = df.div(denom, axis=0)
                except ValueError:
                    
                    thetime = strftime("%H:%M:%S", localtime())
                    print('%s: cannot combine DataFrame 1 and 2: different shapes' % thetime)
            elif operation == '+':
                try:
                    df = df.add(denom, axis=0)
                except ValueError:
                    
                    thetime = strftime("%H:%M:%S", localtime())
                    print('%s: cannot combine DataFrame 1 and 2: different shapes' % thetime)
            elif operation == '-':
                try:
                    df = df.sub(denom, axis=0)
                except ValueError:
                    
                    thetime = strftime("%H:%M:%S", localtime())
                    print('%s: cannot combine DataFrame 1 and 2: different shapes' % thetime)
            elif operation == '*':
                totals = df.sum() * float(df.sum().sum())
                try:
                    df = df.mul(denom, axis=0)
                except ValueError:
                    
                    thetime = strftime("%H:%M:%S", localtime())
                    print('%s: cannot combine DataFrame 1 and 2: different shapes' % thetime)
            elif operation == '/':
                try:
                    totals = df.sum() / float(df.sum().sum())
                    df = df.div(denom, axis=0)
                except ValueError:
                    
                    thetime = strftime("%H:%M:%S", localtime())
                    print('%s: cannot combine DataFrame 1 and 2: different shapes' % thetime)

            elif operation == 'a':
                for c in [c for c in list(df.columns) if int(c) > 1]:
                    df[c] = df[c] * (1.0 / int(c))
                df = df.sum(axis=1) / df2
            
            elif operation.startswith('c'):
                import warnings
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    df = pandas.concat([df, df2], axis=1)
            return df, totals

        elif not single_totals:
            if not operation.startswith('a'):
                # generate totals
                if operation == '%':
                    totals = df.sum() * 100.0 / float(df2.sum().sum())
                if operation == '*':
                    totals = df.sum() * float(df2.sum().sum())
                if operation == '/':
                    totals = df.sum() / float(df2.sum().sum())
                if operation.startswith('c'):
                    # add here the info that merging will not work 
                    # with identical colnames
                    import warnings
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore")
                        d = pd.concat([df.T, df2.T])
                        # make index nums
                        d = d.reset_index()
                        # sum and remove duplicates
                        d = d.groupby('index').sum()
                        dx = d.reset_index('index')
                        dx.index = list(dx['index'])
                        df = dx.drop('index', axis=1).T

                def editf(datum):
                    meth = {'%': datum.div,
                            '*': datum.mul,
                            '/': datum.div,
                            '+': datum.add,
                            '-': datum.sub}

                    if datum.name in list(df2.columns):

                        method = meth[operation]
                        mathed = method(df2[datum.name], fill_value=0.0)
                        if operation == '%':
                            return mathed * 100.0
                        else:
                            return mathed
                    else:
                        return datum * 0.0

                df = df.apply(editf)

            else:
                for c in [c for c in list(df.columns) if int(c) > 1]:
                    df[c] = df[c] * (1.0 / int(c))
                df = df.sum(axis=1) / df2.T.sum()

        return df, totals

    def parse_input(df, the_input):
        """turn whatever has been passed in into list of words that can 
           be used as pandas indices---maybe a bad way to go about it"""
        parsed_input = False
        import re
        if the_input == 'all':
            the_input = r'.*'
        if isinstance(the_input, int):
            try:
                the_input = str(the_input)
            except:
                pass
            the_input = [the_input]
        elif isinstance(the_input, STRINGTYPE):
            regex = re.compile(the_input)
            parsed_input = [w for w in list(df) if re.search(regex, w)]
            return parsed_input
        from corpkit.dictionaries.process_types import Wordlist
        if isinstance(the_input, Wordlist) or the_input.__class__ == Wordlist:
            the_input = list(the_input)
        if isinstance(the_input, list):
            if isinstance(the_input[0], int):
                parsed_input = [word for index, word in enumerate(list(df)) if index in the_input]
            elif isinstance(the_input[0], STRINGTYPE):
                try:
                    parsed_input = [word for word in the_input if word in df.columns]
                except AttributeError: # if series
                    parsed_input = [word for word in the_input if word in df.index]
        return parsed_input

    def synonymise(df, pos='n'):
        """pass a df and a pos and convert df columns to most common synonyms"""
        from nltk.corpus import wordnet as wn
        #from dictionaries.taxonomies import taxonomies
        from collections import Counter
        fixed = []
        for w in list(df.columns):
            try:
                syns = []
                for syns in wn.synsets(w, pos=pos):
                    for w in syns:
                        synonyms.append(w)
                top_syn = Counter(syns).most_common(1)[0][0]
                fixed.append(top_syn)
            except:
                fixed.append(w)
        df.columns = fixed
        return df

    def convert_spell(df, convert_to='US', print_info=print_info):
        """turn dataframes into us/uk spelling"""
        from dictionaries.word_transforms import usa_convert
        if print_info:
            print('Converting spelling ... \n')
        if convert_to == 'UK':
            usa_convert = {v: k for k, v in list(usa_convert.items())}
        fixed = []
        for val in list(df.columns):
            try:
                fixed.append(usa_convert[val])
            except:
                fixed.append(val)
        df.columns = fixed
        return df

    def merge_duplicates(df, print_info=print_info):
        if print_info:
            print('Merging duplicate entries ... \n')
        # now we have to merge all duplicates
        for dup in df.columns.get_duplicates():
            #num_dupes = len(list(df[dup].columns))
            temp = df[dup].sum(axis=1)
            #df = df.drop([dup for d in range(num_dupes)], axis=1)
            df = df.drop(dup, axis=1)
            df[dup] = temp
        return df

    def name_replacer(df, replace_names, print_info=print_info):
        """replace entry names and merge"""
        import re
        # get input into list of tuples
        # if it's a string, we want to delete it
        if isinstance(replace_names, STRINGTYPE):
            replace_names = [(replace_names, '')]
        # this is for some malformed list
        if not isinstance(replace_names, dict):
            if isinstance(replace_names[0], STRINGTYPE):
                replace_names = [replace_names]
        # if dict, make into list of tupes
        if isinstance(replace_names, dict):
            replace_names = [(v, k) for k, v in replace_names.items()]
        for to_find, replacement in replace_names:
            if print_info:
                if replacement:
                    print('Replacing "%s" with "%s" ...\n' % (to_find, replacement))
                else:
                    print('Deleting "%s" from entry names ...\n' % to_find)
            to_find = re.compile(to_find)
            if not replacement:
                replacement = ''
            df.columns = [re.sub(to_find, replacement, l) for l in list(df.columns)]
        df = merge_duplicates(df, print_info=False)
        return df

    def just_these_entries(df, parsed_input, prinf=True):
        entries = [word for word in list(df) if word not in parsed_input]
        if prinf:
            print('Keeping %d entries:\n    %s' % \
                (len(parsed_input), '\n    '.join(parsed_input[:10])))
            if len(parsed_input) > 10:
                print('... and %d more ... \n' % (len(parsed_input) - 10))
            else:
                print('')
        df = df.drop(entries, axis=1)
        return df

    def skip_these_entries(df, parsed_input, prinf=True):
        if prinf:     
            print('Skipping %d entries:\n    %s' % \
                (len(parsed_input), '\n    '.join(parsed_input[:10])))
            if len(parsed_input) > 10:
                print('... and %d more ... \n' % (len(parsed_input) - 10))
            else:
                print('')
        df = df.drop(parsed_input, axis=1)
        return df

    def newname_getter(df, parsed_input, newname='combine', prinf=True, merging_subcorpora=False):
        """makes appropriate name for merged entries"""
        if merging_subcorpora:
            if newname is False:
                newname = 'combine'
        if isinstance(newname, int):
            the_newname = list(df.columns)[newname]
        elif isinstance(newname, STRINGTYPE):
            if newname == 'combine':
                if len(parsed_input) <= 3:
                    the_newname = '/'.join(parsed_input)
                elif len(parsed_input) > 3:
                    the_newname = '/'.join(parsed_input[:3]) + '...'
            else:
                the_newname = newname
        if not newname:
            # revise this code
            import operator
            sumdict = {}
            for item in parsed_input:
                summed = sum(list(df[item]))
                sumdict[item] = summed
            the_newname = max(iter(sumdict.items()), key=operator.itemgetter(1))[0]
        if not isinstance(the_newname, STRINGTYPE):
            the_newname = str(the_newname, errors='ignore')
        return the_newname

    def merge_these_entries(df, parsed_input, the_newname, prinf=True, merging='entries'):
        # make new entry with sum of parsed input
        if len(parsed_input) == 0:
            import warnings
            warnings.warn('No %s could be automatically merged.\n' % merging)
        else:
            if prinf:
                print('Merging %d %s as "%s":\n    %s' % \
                    (len(parsed_input), merging, the_newname, '\n    '.join(parsed_input[:10])))
                if len(parsed_input) > 10:
                    print('... and %d more ... \n' % (len(parsed_input) - 10))
                else:
                    print('')
        # remove old entries
        temp = sum([df[i] for i in parsed_input])

        if isinstance(df, Series):
            df = df.drop(parsed_input, errors='ignore')
            nms = list(df.index)
        else:
            df = df.drop(parsed_input, axis=1, errors='ignore')
            nms = list(df.columns)
        if the_newname in nms:
            df[the_newname] = df[the_newname] + temp
        else:
            df[the_newname] = temp
        return df

    def just_these_subcorpora(df, lst_of_subcorpora, prinf=True):        
        if isinstance(lst_of_subcorpora[0], int):
            lst_of_subcorpora = [str(l) for l in lst_of_subcorpora]
        good_years = [subcorpus for subcorpus in list(df.index) if subcorpus in lst_of_subcorpora]
        if prinf:
            print('Keeping %d subcorpora:\n    %s' % (len(good_years), '\n    '.join(good_years[:10])))
            if len(good_years) > 10:
                print('... and %d more ... \n' % (len(good_years) - 10))
            else:
                print('')
        df = df.drop([subcorpus for subcorpus in list(df.index) if subcorpus not in good_years], axis=0)
        return df

    def skip_these_subcorpora(df, lst_of_subcorpora, prinf=True):
        if isinstance(lst_of_subcorpora, int):
            lst_of_subcorpora = [lst_of_subcorpora]
        if isinstance(lst_of_subcorpora[0], int):
            lst_of_subcorpora = [str(l) for l in lst_of_subcorpora]
        bad_years = [subcorpus for subcorpus in list(df.index) if subcorpus in lst_of_subcorpora]
        if len(bad_years) == 0:
            import warnings
            warnings.warn('No subcorpora skipped.\n')
        else:
            if prinf:       
                print('Skipping %d subcorpora:\n    %s' % (len(bad_years), '\n    '.join([str(i) for i in bad_years[:10]])))
                if len(bad_years) > 10:
                    print('... and %d more ... \n' % (len(bad_years) - 10))
                else:
                    print('')
        df = df.drop([subcorpus for subcorpus in list(df.index) if subcorpus in bad_years], axis=0)
        return df

    def span_these_subcorpora(df, lst_of_subcorpora, prinf=True):
        """select only a span of suborpora (first, last)"""

        fir, sec = lst_of_subcorpora
        if len(lst_of_subcorpora) == 0:
            import warnings
            warnings.warn('Span not identified.\n')
        else:        
            if prinf:        
                print('Keeping subcorpora:\n    %d--%d\n' % (int(fir), int(sec)))
        sbs = list(df.index)
        df = df.ix[sbs.index(fir):sbs.index(sec) + 1]

        return df

    def projector(df, list_of_tuples, prinf=True):
        """project abs values"""
        if isinstance(list_of_tuples, list):
            tdict = {}
            for a, b in list_of_tuples:
                tdict[a] = b
            list_of_tuples = tdict
        for subcorpus, projection_value in list(list_of_tuples.items()):
            if isinstance(subcorpus, int):
                subcorpus = str(subcorpus)
            df.ix[subcorpus] = df.ix[subcorpus] * projection_value
            if prinf:
                if isinstance(projection_value, float):
                    print('Projection: %s * %s' % (subcorpus, projection_value))
                if isinstance(projection_value, int):
                    print('Projection: %s * %d' % (subcorpus, projection_value))
        if prinf:
            print('')
        return df

    def do_stats(df):
        """do linregress and add to df"""
        try: 
            from scipy.stats import linregress
        except ImportError:
            
            thetime = strftime("%H:%M:%S", localtime())
            print('%s: sort type not available in this verion of corpkit.' % thetime)
            return False

        indices = list(df.index)
        first_year = list(df.index)[0]
        try:
            x = [int(y) - int(first_year) for y in indices]
        except ValueError:
            x = list(range(len(indices)))
        
        statfields = ['slope', 'intercept', 'r', 'p', 'stderr']

        stats = []
        if isinstance(df, Series):
            y = list(df.values)
            sl = Series(list(linregress(x, y)), index=statfields)

        else:    
            for entry in list(df.columns):
                y = list(df[entry])
                stats.append(list(linregress(x, y)))
            sl = DataFrame(zip(*stats), index=statfields, columns=list(df.columns))
        df = df.append(sl)
        
        # drop infinites and nans
        df = df.replace([np.inf, -np.inf], np.nan)
        df = df.fillna(0.0)
        return df

    def resort(df, sort_by = False, keep_stats = False):
        """sort results, potentially using scipy's linregress"""
        
        # translate options and make sure they are parseable
        stat_field = ['slope', 'intercept', 'r', 'p', 'stderr']
        easy_sorts = ['total', 'infreq', 'name', 'most', 'least']
        stat_sorts = ['increase', 'decrease', 'static', 'turbulent']
        options = stat_field + easy_sorts + stat_sorts
        sort_by_convert = {'most': 'total', True: 'total', 'least': 'infreq'}
        sort_by = sort_by_convert.get(sort_by, sort_by)

        # probably broken :(
        if just_totals:
            if sort_by == 'name':
                return df.sort_index()
            else:
                return df.sort_values(by='Combined total', ascending=sort_by != 'total', axis=1)

        stats_done = False
        if keep_stats or sort_by in stat_field + stat_sorts:
            df = do_stats(df)
            stats_done = True
            if isinstance(df, bool):
                if df is False:
                    return False
        
        if isinstance(df, Series):
            if stats_done:
                stats = df.ix[range(-5, 0)]
                df = df.drop(list(stats.index))
            if sort_by == 'name':
                df = df.sort_index()
            else:
                df = df.sort_values(ascending=sort_by != 'total')
            if stats_done:
                df = df.append(stats)
            return df

        if sort_by == 'name':
            # currently case sensitive
            df = df.reindex_axis(sorted(df.columns), axis=1)
        elif sort_by in ['total', 'infreq']:
            if df1_istotals:
                df = df.T
            df = df[list(df.sum().sort_values(ascending=sort_by != 'total').index)]
        
        # sort by slope etc., or search by subcorpus name
        if sort_by in stat_field or sort_by not in options:
            asc = kwargs.get('reverse', False)
            df = df.T.sort_values(by=sort_by, ascending=asc).T
        
        if sort_by in ['increase', 'decrease', 'static', 'turbulent']:
            slopes = df.ix['slope']
            if sort_by == 'increase':
                df = df[slopes.argsort()[::-1]]
            elif sort_by == 'decrease':
                df = df[slopes.argsort()]
            elif sort_by == 'static':
                df = df[slopes.abs().argsort()]
            elif sort_by == 'turbulent':
                df = df[slopes.abs().argsort()[::-1]]
            if remove_above_p:
                df = df.T
                df = df[df['p'] <= p]
                df = df.T

        # remove stats field by default
        if not keep_stats:
            df = df.drop(stat_field, axis=0, errors='ignore')
        return df

    def set_threshold(big_list, threshold, prinf=True):
        if isinstance(threshold, STRINGTYPE):
            if threshold.startswith('l'):
                denominator = 10000
            if threshold.startswith('m'):
                denominator = 5000
            if threshold.startswith('h'):
                denominator = 2500
            if isinstance(big_list, DataFrame):
                tot = big_list.sum().sum()

            if isinstance(big_list, Series):
                tot = big_list.sum()
            tshld = float(tot) / float(denominator)
        else:
            tshld = threshold
        if prinf:
            print('Threshold: %d\n' % tshld)
        return tshld

    # copy dataframe to be very safe
    df = dataframe1.copy()
    # make cols into strings
    try:
        df.columns = [str(c) for c in list(df.columns)]
    except:
        pass

    if operation is None:
        operation = 'None'

    if isinstance(interrogation, Concordance):
        return_conc = True
    # do concordance work
    if return_conc:
        if just_entries:
            if isinstance(just_entries, int):
                just_entries = [just_entries]
            if isinstance(just_entries, STRINGTYPE):
                df = df[df['m'].str.contains(just_entries)]
            if isinstance(just_entries, list):
                if all(isinstance(e, STRINGTYPE) for e in just_entries):
                    mp = df['m'].map(lambda x: x in just_entries)
                    df = df[mp]
                else:
                    df = df.ix[just_entries]

        if skip_entries:
            if isinstance(skip_entries, int):
                skip_entries = [skip_entries]
            if isinstance(skip_entries, STRINGTYPE):
                df = df[~df['m'].str.contains(skip_entries)]
            if isinstance(skip_entries, list):
                if all(isinstance(e, STRINGTYPE) for e in skip_entries):
                    mp = df['m'].map(lambda x: x not in skip_entries)
                    df = df[mp]
                else:
                    df = df.drop(skip_entries, axis=0)

        if just_subcorpora:
            if isinstance(just_subcorpora, int):
                just_subcorpora = [just_subcorpora]
            if isinstance(just_subcorpora, STRINGTYPE):
                df = df[df['c'].str.contains(just_subcorpora)]
            if isinstance(just_subcorpora, list):
                if all(isinstance(e, STRINGTYPE) for e in just_subcorpora):
                    mp = df['c'].map(lambda x: x in just_subcorpora)
                    df = df[mp]
                else:
                    df = df.ix[just_subcorpora]

        if skip_subcorpora:
            if isinstance(skip_subcorpora, int):
                skip_subcorpora = [skip_subcorpora]
            if isinstance(skip_subcorpora, STRINGTYPE):
                df = df[~df['c'].str.contains(skip_subcorpora)]
            if isinstance(skip_subcorpora, list):
                if all(isinstance(e, STRINGTYPE) for e in skip_subcorpora):
                    mp = df['c'].map(lambda x: x not in skip_subcorpora)
                    df = df[mp]
                else:
                    df = df.drop(skip_subcorpora, axis=0)

        return Concordance(df)

    if print_info:
        print('\n***Processing results***\n========================\n')

    df1_istotals = False
    if isinstance(df, Series):
        df1_istotals = True
        df = DataFrame(df)
        # if just a single result
    else:
        df = DataFrame(df)
    if operation.startswith('k'):
        if sort_by is False:
            if not df1_istotals:
                sort_by = 'turbulent'
        if df1_istotals:
            df = df.T
    
    # figure out if there's a second list
    # copy and remove totals if there is
    single_totals = True
    using_totals = False
    outputmode = False

    if denominator.__class__ == Interrogation:
        try:
            denominator = denominator.results
        except AttributeError:
            denominator = denominator.totals

    if denominator is not False and not isinstance(denominator, STRINGTYPE):
        df2 = denominator.copy()
        using_totals = True
        if isinstance(df2, DataFrame):
            if len(df2.columns) > 1:
                single_totals = False
            else:
                df2 = Series(df2)
        elif isinstance(df2, Series):
            single_totals = True
            #if operation == 'k':
                #raise ValueError('Keywording requires a DataFrame for denominator. Use "self"?')
    else:
        if operation in ['k', 'a', '%', '/', '*', '-', '+']:
            denominator = 'self'         
        if denominator == 'self':
            outputmode = True

    if operation.startswith('a') or operation.startswith('A'):
        if list(df.columns)[0] != '0' and list(df.columns)[0] != 0:
            df = df.T
        if using_totals:
            if not single_totals:
                df2 = df2.T

    if projection:
        # projection shouldn't do anything when working with '%', remember.
        df = projector(df, projection)
        if using_totals:
            df2 = projector(df2, projection)

    if spelling:
        df = convert_spell(df, convert_to=spelling)
        df = merge_duplicates(df, print_info=False)

        if not single_totals:
            df2 = convert_spell(df2, convert_to=spelling, print_info=False)
            df2 = merge_duplicates(df2, print_info=False)
        if not df1_istotals:
            sort_by = 'total'

    if replace_names:
        df = name_replacer(df, replace_names)
        df = merge_duplicates(df)
        if not single_totals:
            df2 = name_replacer(df2, replace_names, print_info=False)
            df2 = merge_duplicates(df2, print_info=False)
        if not sort_by:
            sort_by = 'total'

    if replace_subcorpus_names:
        df = name_replacer(df.T, replace_subcorpus_names)
        df = merge_duplicates(df).T
        df = df.sort_index()
        if not single_totals:
            if isinstance(df2, DataFrame):
                df2 = df2.T
            df2 = name_replacer(df2, replace_subcorpus_names, print_info=False)
            df2 = merge_duplicates(df2, print_info=False)
            if isinstance(df2, DataFrame):
                df2 = df2.T
            df2 = df2.sort_index()
        if not sort_by:
            sort_by = 'total'

    # remove old stats if they're there:
    statfields = ['slope', 'intercept', 'r', 'p', 'stderr']
    try:
        df = df.drop(statfields, axis=0)
    except:
        pass
    if using_totals:
        try:
            df2 = df2.drop(statfields, axis=0)
        except:
            pass

    # remove totals and tkinter order
    for name, ax in zip(['Total'] * 2 + ['tkintertable-order'] * 2, [0, 1, 0, 1]):
        if name == 'Total' and df1_istotals:
            continue
        try:
            df = df.drop(name, axis=ax, errors='ignore')
        except:
            pass
    for name, ax in zip(['Total'] * 2 + ['tkintertable-order'] * 2, [0, 1, 0, 1]):
        if name == 'Total' and single_totals:
            continue

        try:

            df2 = df2.drop(name, axis=ax, errors='ignore')
        except:
            pass

    # merging: make dicts if they aren't already, so we can iterate
    if merge_entries:
        if not isinstance(merge_entries, list):
            if isinstance(merge_entries, STRINGTYPE):
                merge_entries = {'combine': merge_entries}
            # for newname, criteria    
            for name, the_input in sorted(merge_entries.items()):
                pin = parse_input(df, the_input)
                the_newname = newname_getter(df, pin, newname=name, prinf=print_info)
                df = merge_these_entries(df, pin, the_newname, prinf=print_info)
                if not single_totals:
                    pin2 = parse_input(df2, the_input)
                    df2 = merge_these_entries(df2, pin2, the_newname, prinf=False)
        else:
            for i in merge_entries:
                pin = parse_input(df, merge_entries)
                the_newname = newname_getter(df, pin, prinf=print_info)
                df = merge_these_entries(df, pin, the_newname, prinf=print_info)
                if not single_totals:
                    pin2 = parse_input(df2, merge_entries)
                    df2 = merge_these_entries(df2, pin2, the_newname, prinf=False)
    
    if merge_subcorpora:
        if not isinstance(merge_subcorpora, dict):
            if isinstance(merge_subcorpora, list):
                if isinstance(merge_subcorpora[0], tuple):
                    merge_subcorpora = {x: y for x, y in merge_subcorpora}
                elif isinstance(merge_subcorpora[0], STRINGTYPE):
                    merge_subcorpora = {'combine': [x for x in merge_subcorpora]}
                elif isinstance(merge_subcorpora[0], int):
                    merge_subcorpora = {'combine': [str(x) for x in merge_subcorpora]}
            else:
                merge_subcorpora = {'combine': merge_subcorpora}
        for name, the_input in sorted(merge_subcorpora.items()):
            pin = parse_input(df.T, the_input)
            the_newname = newname_getter(df.T, pin, newname=name, \
                merging_subcorpora=True, prinf=print_info)
            df = merge_these_entries(df.T, pin, the_newname, merging='subcorpora', 
                                     prinf=print_info).T
            if using_totals:
                pin2 = parse_input(df2.T, the_input)
                df2 = merge_these_entries(df2.T, pin2, the_newname, merging='subcorpora', 
                                          prinf=False).T

    if just_subcorpora:
        df = just_these_subcorpora(df, just_subcorpora, prinf=print_info)
        if using_totals:
            df2 = just_these_subcorpora(df2, just_subcorpora, prinf=False)
    
    if skip_subcorpora:
        df = skip_these_subcorpora(df, skip_subcorpora, prinf=print_info)
        if using_totals:
            df2 = skip_these_subcorpora(df2, skip_subcorpora, prinf=False)
    
    if span_subcorpora:
        df = span_these_subcorpora(df, span_subcorpora, prinf=print_info)
        if using_totals:
            df2 = span_these_subcorpora(df2, span_subcorpora, prinf=False)

    if just_entries:
        df = just_these_entries(df, parse_input(df, just_entries), prinf=print_info)
        if not single_totals:
            df2 = just_these_entries(df2, parse_input(df2, just_entries), prinf=False)
    
    if skip_entries:
        df = skip_these_entries(df, parse_input(df, skip_entries), prinf=print_info)
        if not single_totals:
            df2 = skip_these_entries(df2, parse_input(df2, skip_entries), prinf=False)

    # drop infinites and nans
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.fillna(0.0)

    if just_totals:
        df = DataFrame(df.sum(), columns=['Combined total'])
        if using_totals:
            if not single_totals:
                df2 = DataFrame(df2.sum(), columns=['Combined total'])
            else:
                df2 = df2.sum()

    tots = df.sum(axis=1)

    if using_totals or outputmode:
        if not operation.startswith('k'):
            tshld = 0
            # set a threshold if just_totals
            if outputmode is True:
                df2 = df.T.sum()
                if not just_totals:
                    df2.name = 'Total'
                else:
                    df2.name = 'Combined total'
                using_totals = True
                single_totals = True
            if just_totals:
                if not single_totals:
                    tshld = set_threshold(df2, threshold, prinf=print_info)
            df, tots = combiney(df, df2, operation=operation, threshold=tshld, prinf=print_info)
    
    # if doing keywording...
    if operation.startswith('k'):

        if isinstance(denominator, STRINGTYPE):
            if denominator == 'self':
                df2 = df.copy()
            else:
                df2 = denominator

        from corpkit.keys import keywords
        df = keywords(df, df2, 
                      selfdrop=selfdrop, 
                      threshold=threshold, 
                      print_info=print_info,
                      editing=True,
                      calc_all=calc_all,
                      sort_by=sort_by,
                      measure=keyword_measure,
                      **kwargs)
    
    # drop infinites and nans
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.fillna(0.0)

    # resort data
    if sort_by or keep_stats:
        df = resort(df, keep_stats=keep_stats, sort_by=sort_by)
        if isinstance(df, bool):
            if df is False:
                return 'linregress'

    if keep_top:
        if not just_totals:
            df = df[list(df.columns)[:keep_top]]
        else:
            df = df.head(keep_top)

    if just_totals:
        # turn just_totals into series:
        df = Series(df['Combined total'], name='Combined total')

    if df1_istotals:
        if operation.startswith('k'):
            try:
                df = Series(df.ix[dataframe1.name])
                df.name = '%s: keyness' % df.name
            except:
                df = df.iloc[0, :]
                df.name = 'keyness' % df.name

    # generate totals branch if not percentage results:
    # fix me
    if df1_istotals or operation.startswith('k'):
        if not just_totals:
            try:
                total = Series(df['Total'], name='Total')
            except:
                total = 'none'
                pass

            #total = df.copy()
        else:
            total = 'none'
    else:
        # might be wrong if using division or something...
        try:
            total = df.T.sum(axis=1)
        except:
            total = 'none'
    
    if not isinstance(tots, DataFrame) and not isinstance(tots, Series):
        total = df.sum(axis=1)
    else:
        total = tots

    if isinstance(df, DataFrame):
        datatype = df.iloc[0].dtype
    else:
        datatype = df.dtype
    locs['datatype'] = datatype

    # TURN INT COL NAMES INTO STR
    try:
        df.results.columns = [str(d) for d in list(df.results.columns)]
    except:
        pass

    def add_tkt_index(df):
        """add an order for tkintertable if using gui"""
        if isinstance(df, Series):
            df = df.T
            df = df.drop('tkintertable-order', errors='ignore', axis=0)
            df = df.drop('tkintertable-order', errors='ignore', axis=1)
            dat = [i for i in range(len(df.index))]
            df['tkintertable-order'] = Series(dat, index=list(df.index))
            df = df.T
        return df

    # while tkintertable can't sort rows
    if checkstack('tkinter'):
        df = add_tkt_index(df)

    if kwargs.get('df1_always_df'):
        if isinstance(df, Series):
            df = DataFrame(df)

    # delete non-appearing conc lines
    if not hasattr(interrogation, 'concordance'):
        lns = None
    elif hasattr(interrogation, 'concordance') and interrogation.concordance is None:
        lns = None
    else:
        col_crit = interrogation.concordance['m'].map(lambda x: x in list(df.columns))
        ind_crit = interrogation.concordance['c'].map(lambda x: x in list(df.index))
        lns = interrogation.concordance[col_crit]
        lns = lns.loc[ind_crit]
        lns = Concordance(lns)
    
    output = Interrogation(results=df, totals=total, query=locs, concordance=lns)

    if print_info:
        print('***Done!***\n========================\n')

    return output
        else:
            prob = prob * (1 - row.iloc[ix])
    return prob
    

for elo_name in list(elo_names):
    allchunks = []
    for cb in chunk_bounds:
        model = chunkmodels[elo_name, cb]
        newcol_name = 'cb_' + str(elo_name.translate(None, ' ()[],')) + '_' + str(cb)
        like_colnames.append(newcol_name)
        msg('Predicting %s' % newcol_name)
        preds = model.predict_proba(X)
        preds_series = DataFrame(preds).iloc[:,1]
        preds_series.index = X.index
        preds_series.name = cb
        allchunks.append(preds_series)
        
    allchunks.append(fit_df['movergain'])
    allchunks_df = concat(allchunks, axis=1)
    fit_df[elo_name] = allchunks_df.apply(gain_likelihood, axis=1)

if diagnose:
    cols_to_show = list(elo_names)
    cols_to_show.extend(['gamenum','side','halfply','elo','movergain'])
    print fit_df[cols_to_show].transpose()

# group by player-game, and combine all the likelihoods into a single
# likelihood for that ELO

def exp_sum_log(foo):
Beispiel #44
0
def df_loads(stream):
    """ Returns dataframe from a serialized stream"""
    tempobj = cPickle.loads(stream)  # loads not load
    df = tempobj.dataframe
    for attr, value in tempobj._metadict.items():
        setattr(df, attr, value)
    return df


if __name__ == "__main__":
    ### Make a random dataframe, add some attributes
    df = DataFrame(((randn(3, 3))), columns=["a", "b", "c"])
    print_customattr(df)
    print "adding some attributes"
    df.name = "Billy"
    df.junk = "in the trunk"
    print_customattr(df)

    ### Serialize into memory
    stream = df_dumps(df)
    print "wrote dataframe to memory"
    ### Restore from memory
    dfnew = df_loads(stream)
    print "restored from memory"
    print_customattr(dfnew)

    ### Serialize into file
    outfile = "dftest.df"  # What file extension is commonly used for this?
    df_dump(df, outfile)
    print "wrote dataframe to file %s" % outfile
# Create a list of obs dataframes, one for each station:

# <codecell>

obs_df = []
sta_names = []
sta_failed = []
for sta in stations:
    b = coops2df(collector, sta, sos_name)
    name = b.name
    sta_names.append(name)
    print(name)
    if b.empty:
        sta_failed.append(name)
        b = DataFrame(np.arange(len(ts)) * np.NaN, index=ts.index, columns=['Observed Data'])
        b.name = name
    # Limit interpolation to 10 points (10 @ 6min = 1 hour).
    col = 'Observed Data'
    concatenated = concat([b, ts], axis=1).interpolate(limit=10)[col]
    obs_df.append(DataFrame(concatenated))
    obs_df[-1].name = b.name

# <codecell>

geodetic = ccrs.Geodetic(globe=ccrs.Globe(datum='WGS84'))
tiler = MapQuestOpenAerial()
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection=tiler.crs))
# Open Source Imagery from MapQuest (max zoom = 16?)
zoom = 8
extent = [box[0], box[2], box[1], box[3]]
ax.set_extent(extent, geodetic)
Beispiel #46
0
    def test_timegrouper_with_reg_groups(self):

        # GH 3794
        # allow combinateion of timegrouper/reg groups

        df_original = DataFrame({
            'Branch': 'A A A A A A A B'.split(),
            'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(),
            'Quantity': [1, 3, 5, 1, 8, 1, 9, 3],
            'Date': [
                datetime(2013, 1, 1, 13, 0),
                datetime(2013, 1, 1, 13, 5),
                datetime(2013, 10, 1, 20, 0),
                datetime(2013, 10, 2, 10, 0),
                datetime(2013, 10, 1, 20, 0),
                datetime(2013, 10, 2, 10, 0),
                datetime(2013, 12, 2, 12, 0),
                datetime(2013, 12, 2, 14, 0),
            ]
        }).set_index('Date')

        df_sorted = df_original.sort_values(by='Quantity', ascending=False)

        for df in [df_original, df_sorted]:
            expected = DataFrame({
                'Buyer': 'Carl Joe Mark'.split(),
                'Quantity': [10, 18, 3],
                'Date': [
                    datetime(2013, 12, 31, 0, 0),
                    datetime(2013, 12, 31, 0, 0),
                    datetime(2013, 12, 31, 0, 0),
                ]
            }).set_index(['Date', 'Buyer'])

            result = df.groupby([pd.Grouper(freq='A'), 'Buyer']).sum()
            assert_frame_equal(result, expected)

            expected = DataFrame({
                'Buyer': 'Carl Mark Carl Joe'.split(),
                'Quantity': [1, 3, 9, 18],
                'Date': [
                    datetime(2013, 1, 1, 0, 0),
                    datetime(2013, 1, 1, 0, 0),
                    datetime(2013, 7, 1, 0, 0),
                    datetime(2013, 7, 1, 0, 0),
                ]
            }).set_index(['Date', 'Buyer'])
            result = df.groupby([pd.Grouper(freq='6MS'), 'Buyer']).sum()
            assert_frame_equal(result, expected)

        df_original = DataFrame({
            'Branch': 'A A A A A A A B'.split(),
            'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(),
            'Quantity': [1, 3, 5, 1, 8, 1, 9, 3],
            'Date': [
                datetime(2013, 10, 1, 13, 0),
                datetime(2013, 10, 1, 13, 5),
                datetime(2013, 10, 1, 20, 0),
                datetime(2013, 10, 2, 10, 0),
                datetime(2013, 10, 1, 20, 0),
                datetime(2013, 10, 2, 10, 0),
                datetime(2013, 10, 2, 12, 0),
                datetime(2013, 10, 2, 14, 0),
            ]
        }).set_index('Date')

        df_sorted = df_original.sort_values(by='Quantity', ascending=False)
        for df in [df_original, df_sorted]:

            expected = DataFrame({
                'Buyer': 'Carl Joe Mark Carl Joe'.split(),
                'Quantity': [6, 8, 3, 4, 10],
                'Date': [
                    datetime(2013, 10, 1, 0, 0),
                    datetime(2013, 10, 1, 0, 0),
                    datetime(2013, 10, 1, 0, 0),
                    datetime(2013, 10, 2, 0, 0),
                    datetime(2013, 10, 2, 0, 0),
                ]
            }).set_index(['Date', 'Buyer'])

            result = df.groupby([pd.Grouper(freq='1D'), 'Buyer']).sum()
            assert_frame_equal(result, expected)

            result = df.groupby([pd.Grouper(freq='1M'), 'Buyer']).sum()
            expected = DataFrame({
                'Buyer': 'Carl Joe Mark'.split(),
                'Quantity': [10, 18, 3],
                'Date': [
                    datetime(2013, 10, 31, 0, 0),
                    datetime(2013, 10, 31, 0, 0),
                    datetime(2013, 10, 31, 0, 0),
                ]
            }).set_index(['Date', 'Buyer'])
            assert_frame_equal(result, expected)

            # passing the name
            df = df.reset_index()
            result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer'
                                 ]).sum()
            assert_frame_equal(result, expected)

            with self.assertRaises(KeyError):
                df.groupby([pd.Grouper(freq='1M', key='foo'), 'Buyer']).sum()

            # passing the level
            df = df.set_index('Date')
            result = df.groupby([pd.Grouper(freq='1M', level='Date'), 'Buyer'
                                 ]).sum()
            assert_frame_equal(result, expected)
            result = df.groupby([pd.Grouper(freq='1M', level=0), 'Buyer']).sum(
            )
            assert_frame_equal(result, expected)

            with self.assertRaises(ValueError):
                df.groupby([pd.Grouper(freq='1M', level='foo'),
                            'Buyer']).sum()

            # multi names
            df = df.copy()
            df['Date'] = df.index + pd.offsets.MonthEnd(2)
            result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer'
                                 ]).sum()
            expected = DataFrame({
                'Buyer': 'Carl Joe Mark'.split(),
                'Quantity': [10, 18, 3],
                'Date': [
                    datetime(2013, 11, 30, 0, 0),
                    datetime(2013, 11, 30, 0, 0),
                    datetime(2013, 11, 30, 0, 0),
                ]
            }).set_index(['Date', 'Buyer'])
            assert_frame_equal(result, expected)

            # error as we have both a level and a name!
            with self.assertRaises(ValueError):
                df.groupby([pd.Grouper(freq='1M', key='Date',
                                       level='Date'), 'Buyer']).sum()

            # single groupers
            expected = DataFrame({'Quantity': [31],
                                  'Date': [datetime(2013, 10, 31, 0, 0)
                                           ]}).set_index('Date')
            result = df.groupby(pd.Grouper(freq='1M')).sum()
            assert_frame_equal(result, expected)

            result = df.groupby([pd.Grouper(freq='1M')]).sum()
            assert_frame_equal(result, expected)

            expected = DataFrame({'Quantity': [31],
                                  'Date': [datetime(2013, 11, 30, 0, 0)
                                           ]}).set_index('Date')
            result = df.groupby(pd.Grouper(freq='1M', key='Date')).sum()
            assert_frame_equal(result, expected)

            result = df.groupby([pd.Grouper(freq='1M', key='Date')]).sum()
            assert_frame_equal(result, expected)

        # GH 6764 multiple grouping with/without sort
        df = DataFrame({
            'date': pd.to_datetime([
                '20121002', '20121007', '20130130', '20130202', '20130305',
                '20121002', '20121207', '20130130', '20130202', '20130305',
                '20130202', '20130305'
            ]),
            'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5],
            'whole_cost': [1790, 364, 280, 259, 201, 623, 90, 312, 359, 301,
                           359, 801],
            'cost1': [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12]
        }).set_index('date')

        for freq in ['D', 'M', 'A', 'Q-APR']:
            expected = df.groupby('user_id')[
                'whole_cost'].resample(
                    freq).sum().dropna().reorder_levels(
                        ['date', 'user_id']).sort_index().astype('int64')
            expected.name = 'whole_cost'

            result1 = df.sort_index().groupby([pd.TimeGrouper(freq=freq),
                                               'user_id'])['whole_cost'].sum()
            assert_series_equal(result1, expected)

            result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])[
                'whole_cost'].sum()
            assert_series_equal(result2, expected)