Example #1
def get_dstat_snpwindow(af, quadruples, jackknife_window=2000,
    # min fraction of snps to report value
    #(only makes a difference for right-most interval)
    min_observation_fraction = 0.75
    dstats = []
    jackknife_window_dstats = []
    snp_window_dstats = []
    for j, (h1, h2, h3, o) in enumerate(quadruples):
        dstat = pd.DataFrame(columns=['num', 'denom'])
        dstat['num'] = ((af[h1] - af[h2]) * (af[h3] - af[o])).dropna()
        dstat['denom'] = ((af[h1] + af[h2] - 2 * af[h1] * af[h2])
                          * (af[h3] + af[o] - 2 * af[h3] * af[o])).dropna()
        # only use informative SNPs
        dstat = dstat[dstat['denom'] != 0]
        dstats.append([dstat['num'].sum(), dstat['denom'].sum()])
        jackknife_window_sum = pd.rolling_sum(dstat, jackknife_window,
                                                  min_observation_fraction * jackknife_window),
                                              center=True).iloc[jackknife_window / 2::jackknife_window].dropna()
        #del jackknife_window_sum
        #del dstat
        # gc.collect
        if snp_window is not None:
            snp_window_sum = pd.rolling_sum(dstat, snp_window,
                                            min_periods=0,  # what should this be?
                                            center=True).iloc[snp_window / 2::snp_window].dropna()
            # gc.collect()
    return dstats, jackknife_window_dstats, snp_window_dstats
Example #2
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4):
    M = df["Close"].diff(r1 - 1)
    N = df["Close"].shift(r1 - 1)
    ROC1 = M / N
    M = df["Close"].diff(r2 - 1)
    N = df["Close"].shift(r2 - 1)
    ROC2 = M / N
    M = df["Close"].diff(r3 - 1)
    N = df["Close"].shift(r3 - 1)
    ROC3 = M / N
    M = df["Close"].diff(r4 - 1)
    N = df["Close"].shift(r4 - 1)
    ROC4 = M / N
    KST = Series(
        rolling_sum(ROC1, n1) + rolling_sum(ROC2, n2) * 2 + rolling_sum(ROC3, n3) * 3 + rolling_sum(ROC4, n4) * 4,
        + str(r1)
        + "_"
        + str(r2)
        + "_"
        + str(r3)
        + "_"
        + str(r4)
        + "_"
        + str(n1)
        + "_"
        + str(n2)
        + "_"
        + str(n3)
        + "_"
        + str(n4),
    df = df.join(KST)
    return df
Example #3
def mfi(prices, params={"window": 14}):
    1. Typical Price = (High + Low + Close)/3
    2. Raw Money Flow = Typical Price x Volume
    3. Money Flow Ratio = (14-period Positive Money Flow)/(14-period Negative Money Flow)
    4. Money Flow Index = 100 - 100/(1 + Money Flow Ratio)

    prices: DataFrame
        Includes the open, close, high, low and volume.
    params: dict

    mfi_val: DataFrame
    window = params["window"]
    tp = __tp(prices)
    rmf = tp * prices['Volume']
    close = prices["Close"]
    ret = close - close.shift(1)
    prmf = rmf.copy()
    nrmf = rmf.copy()
    prmf[ret < 0] = 0
    nrmf[ret > 0] = 0

    mfr = pd.rolling_sum(prmf, window)/pd.rolling_sum(nrmf, window)
    mfi_val = 100 - 100. / (1 + mfr)

    return pd.DataFrame(mfi_val.values, index=prices.index, columns=["MFI"])
Example #4
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4):
    KST Oscillator
    M = df['Close'].diff(r1 - 1)
    N = df['Close'].shift(r1 - 1)
    ROC1 = M / N
    M = df['Close'].diff(r2 - 1)
    N = df['Close'].shift(r2 - 1)
    ROC2 = M / N
    M = df['Close'].diff(r3 - 1)
    N = df['Close'].shift(r3 - 1)
    ROC3 = M / N
    M = df['Close'].diff(r4 - 1)
    N = df['Close'].shift(r4 - 1)
    ROC4 = M / N
    result = pd.Series(
        pd.rolling_sum(ROC1, n1) +
        pd.rolling_sum(ROC2, n2) * 2 +
        pd.rolling_sum(ROC3, n3) * 3 +
        pd.rolling_sum(ROC4, n4) * 4,
        name='KST_' +
        str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' +
        str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4)
    return out(SETTINGS, df, result)
def getDF():

    rsite = sys.argv[1]
    fdate = sys.argv[2]
    tdate = sys.argv[3]
    #    rsite = '1236'
    # set arbitrarily for now
    #    fdate = "2015-04-25"
    #    tdate = "2016-04-25"
    engine = create_engine(
        'mysql+pymysql://updews:[email protected]/senslopedb')
    query = "select * from senslopedb.%s where timestamp between '%s' and  '%s'" % (
        rsite, fdate, tdate)
    df = pd.io.sql.read_sql(query, engine)
    df.columns = ['ts', 'cumm', 'rval']
    df = df.set_index(['ts'])
    df = df["rval"].astype(float)
    df = df[df >= 0]
    df = df.resample('15Min', how="sum")
    dfs = pd.rolling_sum(df, 96, min_periods=1)
    dfs1 = pd.rolling_sum(df, 288, min_periods=1)
    dfs = dfs[dfs >= 0]
    dfs1 = dfs1[dfs1 >= 0]
    dfa = pd.DataFrame({"rval": df, "cumm": dfs, "hrs72": dfs1})
    dfajson = dfa.reset_index().to_json(orient="records", date_format='iso')
    dfajson = dfajson.replace("T", " ").replace("Z", "").replace(".000", "")
    print dfajson
Example #6
def ULTOSC(df,ksgn='close'):
    def ULTOSC(df,ksgn='close'):
    UOS,终极指标(Ultimate Oscillator,UOS)
  终极指标,由拉瑞·威廉(Larry Williams)所创。他认为现行使用的各种振荡指标,对于周期参数的选择相当敏感。
        df, pd.dataframe格式数据源
        df, pd.dataframe格式数据源,
    i = 0  
    TR_l = [0]  
    BP_l = [0]  
    while i <  len(df) - 1:   #df.index[-1]:  
        #TR = max(df.get_value(i + 1, 'high'), df.get_value(i, 'close')) - min(df.get_value(i + 1, 'low'), df.get_value(i, 'close'))  
        TR = max(df['high'].iloc[i+1],df[ksgn].iloc[i])-min(df['low'].iloc[i+1],df[ksgn].iloc[i])  
        #BP = df.get_value(i + 1, 'close') - min(df.get_value(i + 1, 'low'), df.get_value(i, 'close'))  
        BP =df[ksgn].iloc[i+1]-min(df['low'].iloc[i+1], df[ksgn].iloc[i])  
        i = i + 1  
    UltO = pd.Series((4 * pd.rolling_sum(pd.Series(BP_l), 7) / pd.rolling_sum(pd.Series(TR_l), 7)) + (2 * pd.rolling_sum(pd.Series(BP_l), 14) / pd.rolling_sum(pd.Series(TR_l), 14)) + (pd.rolling_sum(pd.Series(BP_l), 28) / pd.rolling_sum(pd.Series(TR_l), 28)), name =xnam)  # 'Ultimate_Osc'
    #df = df.join(UltO)      
    return df
Example #7
 def extract_windows(days):
     # Add categories to count types of crimes committed in time windows leading to date we're trying to predict
     for label in ['Violent', 'Severe', 'Minor', 'Petty']:
         days[label + ' Crimes in Last Week'] = pd.rolling_sum(days[label + ' Crimes'], 7)
         days[label + ' Crimes in Last Month'] = pd.rolling_sum(days[label + ' Crimes'], 30)
     # The earliest 30 days in the time series have missing values for their first 30 days. Remove those days.
     return days[30:]
Example #8
def CumSum(r,offsetstart,end,tsn, data):

    ##prints timestamp and intsantaneous rainfall
    ##plots instantaneous rainfall data, 24-hr cumulative and 72-hr rainfall, and half of 2-yr max and 2-yr max rainfall for 10 days

    ##r; string; site code
    ##offsetstart; datetime; starting point of interval with offset to account for moving window operations
    ##end; datetime; end of interval
    ##tsn; string; datetime format allowed in savefig

##    if r!='lipw': continue

    rainfall = data

    #getting the rolling sum for the last24 hours
    #getting the rolling sum for the last 3 days
    return rainfall2, rainfall3
Example #9
def find_capm_gap(df_prices, i_lookback, switch):
#   df_spread = pd.merge(df_prices, df_prices, left_index=True, right_index=True, how='outer') 
    frames = [df_prices, df_prices]
    df_spread = pd.concat(frames, keys=ls_symbols)
    print "in"
    print "df_spread:::", df_spread
    df_capm_gap = np.NAN * copy.deepcopy(df_prices)
    ts_index = df_prices[ls_symbols[-1]]
    for s_symbol in ls_symbols[:len(ls_symbols)-1]:
    	ts_price = df_prices[s_symbol]
#       print "returns", ts_price
#       print "index", ts_index
	ts_x_ret = pd.rolling_sum(ts_index, i_lookback)   
    	ts_y_ret = pd.rolling_sum(ts_price, i_lookback)
    	beta = (1/pd.rolling_var(ts_index, i_lookback)) * pd.rolling_cov(ts_index, ts_price, i_lookback)
    	alpha = pd.rolling_mean(ts_price, i_lookback) - beta * pd.rolling_mean(ts_index, i_lookback)
    	df_capm_gap[s_symbol] = switch*(ts_y_ret - ts_x_ret)+(1-switch)*(ts_y_ret - alpha - beta * ts_x_ret) 
#       print "ind", ts_x_ret, "y", ts_y_ret, "a" , alpha, "b", beta, df_capm_gap[s_symbol]
    ldt_timestamps = df_capm_gap.index
    print df_capm_gap
    for i in range(1, len(ldt_timestamps)):
        print df_spread.ix[[('AMZN',df_prices.index[i])]] 
    return df_capm_gap 
Example #10
def Vortex(df, n):
    Vortex Indicator
    i = 0
    TR = [0]
    while i < len(df) - 1:  # df.index[-1]:
        Range = max(
            df.get_value(i + 1, 'High'),
            df.get_value(i, 'Close')) - min(
                df.get_value(i + 1, 'Low'),
                df.get_value(i, 'Close')
        i = i + 1
    i = 0
    VM = [0]
    while i < len(df) - 1:  # df.index[-1]:
        Range = abs(
            df.get_value(i + 1, 'High') - df.get_value(i, 'Low')
        ) - abs(
            df.get_value(i + 1, 'Low') - df.get_value(i, 'High')
        i = i + 1
    result = pd.Series(
            pd.Series(VM), n
        ) / pd.rolling_sum(pd.Series(TR), n),
        name='Vortex_' + str(n)
    return out(SETTINGS, df, result)
Example #11
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    # see note around test_rolling_dataframe for logic concerning precision
    assert_eq(pd.rolling_skew(p, 3),
              dd.rolling_skew(d, 3), check_less_precise=True)
    assert_eq(pd.rolling_kurt(p, 3),
              dd.rolling_kurt(d, 3), check_less_precise=True)
    assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        assert_eq(pd.rolling_window(p, 3, 'boxcar'),
                  dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    assert_eq(pd.rolling_sum(p, 3, min_periods=3),
              dd.rolling_sum(d, 3, min_periods=3))
Example #12
    def calculate_signals(self, event):
        create signal events
        in asof, ticker, exchange, signal format
        if event.type == 'MARKET':
            for s in self.symbol_list:
                bars = self.bars.get_latest_bars(s, N = 201)
                df = pd.DataFrame(bars, columns = self.bars.bar_columns)
                df['closepctchg'] = df['adjclose'].pct_change(periods=1)
                df['lowpctchg'] = df['low'].pct_change(periods=1)
                df['highpctchg'] = df['high'].pct_change(periods=1)
                df['logreturn'] = np.log(df['adjclose'].pct_change(periods=1)+1)
                df['ma200'] = pd.rolling_mean(df['adjclose'], 200)
                df['ma5'] = pd.rolling_mean(df['adjclose'], 5)
                df['lowpctsign'] = np.sign(df['lowpctchg'])
                df['highpctsign'] = np.sign(df['highpctchg'])
                df['closepctsign'] = np.sign(df['closepctchg'])
                df['sumlowsign'] = pd.rolling_sum(df['lowpctsign'], 3)
                df['sumhighsign'] = pd.rolling_sum(df['highpctsign'], 3)
                df['sumclosesign'] = pd.rolling_sum(df['closepctsign'], 3)
                cv = df.tail(1)
                #print bars[0][0], bars[0][1], bars[0][2], cv['ma200'], cv['adjclose'], cv['lowpctsign'], cv['highpctsign'], cv['closepctsign']
                if bars is not None and len(bars) == 201 and cv['ma200'] < cv['adjclose'] and cv['sumclosesign'] == -3 and cv['closepctchg'] < -0.015:
                    if self.bought[s] == False:
                        print 'SIGNAL: ', bars[-1][0], bars[-1][1], bars[-1][2], bars[-1][7], 'LONG'
                        signal = SignalEvent(bars[-1][1], bars[-1][2], bars[-1][0], bars[-1][7], 'LONG')
                        self.bought[s] = True

                if self.bought[s] == True and cv['adjclose'] > cv['ma5']:
                        print 'SIGNAL: ', bars[-1][0], bars[-1][1], bars[-1][2], bars[-1][7], 'EXIT'
                        signal = SignalEvent(bars[-1][1], bars[-1][2], bars[-1][0], bars[-1][7], 'EXIT')
                        self.bought[s] = False
Example #13
def get_estimator(ticker, start, end, window=30, clean=True):
    prices = data.get_data(ticker, start, end)
    log_ho = (prices['Adj High'] / prices['Adj Open']).apply(np.log)
    log_lo = (prices['Adj Low'] / prices['Adj Open']).apply(np.log)
    log_co = (prices['Adj Close'] / prices['Adj Open']).apply(np.log)
    log_oc = (prices['Adj Open'] / prices['Adj Close'].shift(1)).apply(np.log)
    log_oc_sq = log_oc**2
    log_cc = (prices['Close'] / prices['Close'].shift(1)).apply(np.log)
    log_cc_sq = log_cc**2
    rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co)
    close_vol = pandas.rolling_sum(log_cc_sq, window=window) * (1.0 / (window - 1.0))
    open_vol = pandas.rolling_sum(log_oc_sq, window=window) * (1.0 / (window - 1.0))
    window_rs = pandas.rolling_sum(rs, window=window) * (1.0 / (window - 1.0))
    result = (open_vol + 0.164333 * close_vol + 0.835667 * window_rs).apply(np.sqrt) * math.sqrt(252)
    result[:window-1] = np.nan

    if clean:
        return result.dropna()
        return result
Example #14
def rolling_proportion(df, date_col, value_col, value, window=30, dropna=True, label=False, fig=None, ax=None, color=None):
    Interpolated proportion of binary risk factor over time.

    df = pandas df
    date_col = name of column containing dates
    value_col = name of column to be tallied
    value = value to tally (e.g. 'Male')
    window = number of days to include. Default is 30.
    dropna = exclude rows where val is NaN. Default is true. False will include those rows.
    label = legend label
    fig, ax = matplotlib objects
    Returns Series of proportions with date index, fig, and ax.
    datetime_df.index = df.dates
    rolling_proportion(datetime_df.sex, 'Male')

    Note: If you are having trouble, make ensure that your date_col is a datetime.

    df = df[df[date_col].isnull() == False]
    df.index = df[date_col]

    if dropna == False:
        df = df[value_col].fillna(False)
        df = df[df[value_col].isnull() == False]

    df['matches'] = df[value_col] == value
    df['matches'] = df['matches'].astype(np.int)
    df['ones'] = 1

    prop = pd.DataFrame()
    prop['numerator'] = df.matches.groupby(by=df.index).sum()
    prop['denom'] = df.ones.groupby(by=df.index).sum()
    prop['proportion'] = pd.rolling_sum(prop.numerator, window, 5)/pd.rolling_sum(prop.denom, window, 5)
    prop = prop.dropna(how='any')

    ts = pd.date_range(min(prop.index), max(prop.index))
    new_prop = prop['proportion']
    new_prop = new_prop.reindex(ts)
    new_prop = new_prop.fillna(method='pad')

    if fig is None and ax is None:
        fig, ax = plt.subplots()

    if color is None:
        color = 'b'
    new_prop.plot(ax=ax, label=label, color=color)
    ax.set_ylim(-0.05, 1.05)
    if label != False:

    return new_prop, fig, ax
Example #15
def _calculate_cmo_values(returns, periods):
    only_gains = returns.map(lambda x: 0 if x < 0 else 1)
    only_losses = returns.map(lambda x: 0 if x >= 0 else 1)
    sums_of_gains = pd.rolling_sum(only_gains, window=periods)
    sums_of_losses = pd.rolling_sum(only_losses, window=periods)
    result = pd.Series(index=sums_of_gains.index)
    for date, sog in sums_of_gains.items():
        result[date] = ((sog - sums_of_losses[date]) /
                        (sog + sums_of_losses[date]))
    return result
Example #16
def rsi(data, periods=14):
    # pdb.set_trace()
    change = data['Adjusted Close'] - data['Adjusted Close'].shift(1)
    gains = change.apply(lambda x: 0 if x < 0 else x)
    losses = change.apply(lambda x: 0 if x >= 0 else -x)
    f*g = pd.rolling_sum(gains, periods)
    fal = pd.rolling_sum(losses, periods)
    ag = (f*g.shift(1) * (periods-1) + gains)/periods
    al = (fal.shift(1) * (periods-1) + losses)/periods
    rs = ag / al

    return 1 - 1/(1+rs)
Example #17
def SensorPlot(r,offsetstart,end,tsn, data, halfmax, twoyrmax):
    ##r; str; site
    ##offsetstart; datetime; starting point of interval with offset to account for moving window operations
    ##end; datetime; end of rainfall data
    ##tsn; str; time format acceptable as file name
    ##data; dataframe; rainfall data
    ##halfmax; float; half of 2yr max rainfall, one-day cumulative rainfall threshold
    ##twoyrmax; float; 2yr max rainfall, three-day cumulative rainfall threshold
    ##rainfall2, rainfall3; dataframe containing one-day and three-day cumulative rainfall
    if PrintPlot:
        plt.xticks(rotation=70, size=5)       
    #getting the rolling sum for the last24 hours
    #getting the rolling sum for the last 3 days
    if PrintPlot:
        #assigning the thresholds to their own columns for plotting 

        sub['maxhalf'] = halfmax  
        sub['max'] = twoyrmax
        #assigning df to plot variables (to avoid caveats ? expressed from Spyder)
        plot1=data.dropna()     # instantaneous rainfall data
        plot2=rainfall2             # 24-hr cumulative rainfall
        plot3=rainfall3             # 72-hr cumulative rainfall
        plot4=sub['maxhalf']        # half of 2-yr max rainfall
        plot5=sub['max']            # 2-yr max rainfall

        #plots instantaneous rainfall data, 24-hr cumulative rainfall, 72-hr cumulative rainfall,
        #half of 2-yr max rainfall, 2-yr max rainfall
        plt.plot(plot1.index,plot1,color='#db4429', label = 'instantaneous rainfall') # instantaneous rainfall data
        plt.plot(plot2.index,plot2,color='#5ac126', label = '24hr cumulative rainfall') # 24-hr cumulative rainfall
        plt.plot(plot3.index,plot3,color='#0d90d0', label = '72hr cumulative rainfall') # 72-hr cumulative rainfall
        plt.plot(plot4.index,plot4,color="#fbb714", label = 'half of 2yr max rainfall') # half of 2-yr max rainfall
        plt.plot(plot5.index,plot5,color="#963bd6", label = '2yr max rainfall')  # 2-yr max rainfall
        plt.legend(loc='upper left', fontsize = 8)        
        plt.savefig(RainfallPlotsPath+tsn+"_"+r, dpi=160, 
            facecolor='w', edgecolor='w',orientation='landscape',mode='w')
    return rainfall2, rainfall3
Example #18
def getrain(rainsite,start,end):
    raindf = q.GetRawRainData(rainsite, fromTime=start, toTime=end)
    raindf = raindf.set_index('ts')
    raindf = raindf.resample('30min',how='sum')
    raindf['one_d'] = pd.rolling_sum(raindf.rain,48,min_periods=1)
    raindf['thr_d'] = pd.rolling_sum(raindf.rain,144,min_periods=1)
    raindf['gts']  = raindf.ts.apply(datenum)     
    return raindf
def ULTOSC(df):  
    i = 0  
    TR_l = [0]  
    BP_l = [0]  
    while i < df.index[-1]:  
        TR = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))  
        BP = df.get_value(i + 1, 'Close') - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))  
        i = i + 1  
    UltO = pd.Series((4 * pd.rolling_sum(pd.Series(BP_l), 7) / pd.rolling_sum(pd.Series(TR_l), 7)) + (2 * pd.rolling_sum(pd.Series(BP_l), 14) / pd.rolling_sum(pd.Series(TR_l), 14)) + (pd.rolling_sum(pd.Series(BP_l), 28) / pd.rolling_sum(pd.Series(TR_l), 28)), name = 'Ultimate_Osc')  
    df = df.join(UltO)  
    return df
Example #20
def prediction_augmented(df_train, col_names, df_day_avg_values, adjacency_list, df_model, prediction_model, window_size=10, do_rounding = False):
    staircaseA_nodes = ['S42', 'S46']
    staircaseB_nodes = ['S34', 'S35']
    staircaseC_nodes = ['S52', 'S53']
    # Dataframe to store the model prediction
    df_model_lr = df_model.copy()
    # Building the moving sum for the features before/after for each neighbor
    model_curr_before = pd.rolling_sum(df_model.sort(ascending=False), window_size+1) - df_model
    model_curr_after = pd.rolling_sum(df_model, window_size+1) - df_model
    model_curr_before = model_curr_before.rename(columns={col:col+'before' for col in col_names})
    model_curr_after = model_curr_after.rename(columns={col:col+'after' for col in col_names})
    window_features = model_curr_after.join(model_curr_before[[col_+'before' for col_ in col_names]])
    for col in col_names:
        # X will store the features and the outcome Y
        X = df_train.copy()
        X = X.rename(columns={col:'Y'})
        X = pd.merge(X, df_day_avg_values[[col]], left_on='day_time', right_index=True)
        X = X.rename(columns={col:col+'avg'})

        # Building the neighbors (from adjacency list) with missing values filled as in model
        neighbors_col = ['S'+str(n) for n in adjacency_list[int(col[1:])]]
        X = X[['Y']].join(df_model[neighbors_col])
        X = X.join(window_features[[col_+'before' for col_ in neighbors_col] + [col_+'after' for col_ in neighbors_col]])
        # Removing the first and last element impossible to compute given the window_size
        X = X.sort()[window_size: - window_size]
        # augment with staircase info
        X['sA'] = (col in staircaseA_nodes) * 1.
        X['sB'] = (col in staircaseB_nodes) * 1.
        X['sC'] = (col in staircaseC_nodes) * 1.

        X_train = X[X['Y'] != -1]
        X_test = X[X['Y'] == -1]
        test_indices = X[X['Y'] == -1].index
        col_values = df_model_lr[col]

        if len(X_test):
            # Models
            prediction_model = prediction_model.fit(X_train.drop('Y', axis=1), X_train.Y)
            col_values.ix[test_indices] = prediction_model.predict(X_test.drop('Y', axis=1))

            # Filling the result with the current sensor prediction
            if do_rounding:
                df_model_lr[col] = np.round(col_values)
                df_model_lr[col] = col_values
    return df_model_lr
Example #21
def VWAP_dataPre(qx,xnam0,ksgn0):
    vwap 数据预处理函数,vwap策略,成交量加权平均价
        qx (zwQuantX): zwQuantX数据包 
        xnam0 (str):函数标签
        ksgn0 (str): 价格列名称,一般是'adj close'

    ksgn,qx.priceCalc=ksgn0,ksgn0;  #'adj close';'close';
    for xcod in zw.stkLibCode:
        #d20['dprice']=d20['open']*d20['adj close']/d20['close']
        #d20=zwta.MA(d20,qx.staMA_short,'adj close');
        #d20=zwta.MA(d20,qx.staMA_long,'adj close');
        #d20['dprice']=d20['open']*d20['adj close']/d20['close']
        #d20['dprice']=d20['adj close']
        #vwap = (prices * volume).sum(n) / volume.sum(n)  #sum函数自动忽略NaN值
        if qx.debugMod>0:
Example #22
def tdi(price, window=20, multiple=2):
    ''' Trend Detection Index '''
    price = utils.safe_series(price)
    mom = price - price.shift(window)
    mom[np.isnan(mom)] = 0
    di = pd.rolling_sum(mom, window)
    di_abs = di.abs()
    mom_2n_abs = pd.rolling_sum(mom.abs(), window*multiple)
    mom_1n_abs = pd.rolling_sum(mom.abs(), window)
    tdi_ = di_abs - (mom_2n_abs - mom_1n_abs)
    return pd.DataFrame(dict(tdi=tdi_, di=di), index=price.index)
Example #23
def ULTOSC(df):
    Ultimate Oscillator
    i = 0
    TR_l = [0]
    BP_l = [0]
    while i < len(df) - 1: #df.index[-1]:
        TR = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))
        BP = df.get_value(i + 1, 'Close') - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))
        i = i + 1
    result = pd.Series((4 * pd.rolling_sum(pd.Series(BP_l), 7) / pd.rolling_sum(pd.Series(TR_l), 7)) + (2 * pd.rolling_sum(pd.Series(BP_l), 14) / pd.rolling_sum(pd.Series(TR_l), 14)) + (pd.rolling_sum(pd.Series(BP_l), 28) / pd.rolling_sum(pd.Series(TR_l), 28)), name = 'Ultimate_Osc')
    return out(SETTINGS, df, result)
Example #24
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4):
    M = df['close'].diff(r1 - 1)
    N = df['close'].shift(r1 - 1)
    ROC1 = M / N
    M = df['close'].diff(r2 - 1)
    N = df['close'].shift(r2 - 1)
    ROC2 = M / N
    M = df['close'].diff(r3 - 1)
    N = df['close'].shift(r3 - 1)
    ROC3 = M / N
    M = df['close'].diff(r4 - 1)
    N = df['close'].shift(r4 - 1)
    ROC4 = M / N
    KST = pd.Series(pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 + pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4, name = 'KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4))
    return KST
Example #25
def mfi(hlc, volume, window=14):
    high, low, close = utils.safe_hlc(hlc)
    volume = utils.safe_series(volume) / 1000
    price = (high+low+close) * 1.0 / 3
    mf = price * volume
    pmf = (mf > mf.shift(1)).astype(int) * mf
    nmf = (mf < mf.shift(1)).astype(int) * mf
    mr = pd.rolling_sum(pmf, window) / pd.rolling_sum(nmf, window)
    rval = 100 - (100/(1 + mr))
    utils.safe_name(rval, name='MFI')
    rval.index = hlc.index
    return rval    
def collection_freq(breath_df, win):
    for ds_type in ['ds', 'pl', 'pvt', 'ie']:
        breath_df['{0}_rolling'.format(ds_type)] = pd.rolling_sum(breath_df['analysis.' + ds_type], window = 60 * win,
                                                                  center = True, min_periods = 1)
        breath_df[ds_type + '_tot_rolling'] = pd.rolling_count(breath_df['analysis.' + ds_type], window = 60 * win,
                                                               center = True)
        breath_df[ds_type + '_freq'] = breath_df[ds_type + '_rolling'] / breath_df[ds_type + '_tot_rolling']

    # add rolling average for Fio2, PEEP, p_mean
        breath_df['peep_rolling'] = pd.rolling_mean(breath_df['vent_settings.PEEP'], window = 60 * win,
                                                    center = True, min_periods = 1)
    except KeyError:

        breath_df['p_mean_rolling'] = pd.rolling_mean(breath_df['vent_settings.p_mean'], window = 60 * win,
                                                      center = True, min_periods = 1)
    except KeyError:

        breath_df['fio2_rolling'] = pd.rolling_mean(breath_df['vent_settings.FiO2'], window = 60 * win,
                                                    center = True, min_periods = 1)
    except KeyError:

    return breath_df
Example #27
def calculateRewards(data, n):
    logging.info('Rewards: calculating {0}...'.format(n))

    # get tick changes
    diffs = data['close'].diff(1)
    # print 'DIFFS'
    # print diffs

    # get rolling sum
    sums = pd.rolling_sum(diffs, n)
    # print 'SUMS'
    # print sums

    # shift
    rewards = sums.shift(-n)
    # print 'SHIFTS'
    # print rewards

    # label data
    rewards[rewards >= 0] = 'bull'
    rewards[rewards < 0] = 'bear'
    # print rewards

    logging.info('Rewards: calculated')
    return rewards
def Vortex(df, n):  
    i = 0  
    TR = [0]  
    while i < df.index[-1]:  
        Range = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))  
        i = i + 1  
    i = 0  
    VM = [0]  
    while i < df.index[-1]:  
        Range = abs(df.get_value(i + 1, 'High') - df.get_value(i, 'Low')) - abs(df.get_value(i + 1, 'Low') - df.get_value(i, 'High'))  
        i = i + 1  
    VI = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name = 'Vortex_' + str(n))  
    df = df.join(VI)  
    return df
    def aggregate_per_week(self, daily_values, last_week_ending, weeks):
        Aggregates daily values into weekly values.

            daily_values: Pandas Series of daily values, indexed by date.
                All dates are assumed to be contiguous, though their values may be NaN.
                Dates do not have to cover the periods being sampled.
            last_week_ending: last day of last week.
            weeks: number of weeks to sample (including the last day)

            Pandas Series with weekly values, indexed by date of last day of week.
            Any day with NaN will result in the corresponding week also being NaN.
            As a consequence, any week requested that is not completely covered
            by the input daily_values will be NaN.
        # For each date in daily input, find sum of day's value with the previous
        # six days.
        week_window = pandas.rolling_sum(daily_values, window=7)

        # Pull out the requested end-of-week days.  If requested week dates are
        # not in the range of the daily input, NaN values are returned.
        days = [last_week_ending - timedelta(i * 7) for i in reversed(xrange(weeks))]
        return week_window.loc[days]
Example #30
def downloadRainfallNOAH(rsite, fdate, tdate):   
    url = "http://weather.asti.dost.gov.ph/home/index.php/api/data/%s/from/%s/to/%s" % (rsite,fdate,tdate)
    r = requests.get(url)

        df = pd.DataFrame(r.json()["data"])
    except TypeError:
        print "    No device with id of %s" % rsite
        return pd.DataFrame()

        df = df.set_index(['dateTimeRead'])
        df.index = pd.to_datetime(df.index)
        df = df["rain_value"].astype(float)
        df = df.resample('15Min').fillna(0.00)
        dfs = pd.rolling_sum(df,96)
        dfa = pd.DataFrame({"rval":df,"cumm":dfs})
        dfa = dfa.fillna(0)
        dfa = dfa[96:]
        #rename the "index" into "timestamp"
        dfa.index.names = ["timestamp"]
        return dfa
        return pd.DataFrame()
Example #31
def vortex(df, n):
    i = 0
    tr = [0]
    while i < df.index[-1]:
        Range = max(df.get_value(i + 1, 'High'), df.get_value(
            i, 'Close')) - min(df.get_value(i + 1, 'Low'),
                               df.get_value(i, 'Close'))
        i = i + 1
    i = 0
    vm = [0]
    while i < df.index[-1]:
        Range = abs(df.get_value(i + 1, 'High') -
                    df.get_value(i, 'Low')) - abs(
                        df.get_value(i + 1, 'Low') - df.get_value(i, 'High'))
        i = i + 1
    vi = pd.Series(pd.rolling_sum(pd.Series(vm), n) /
                   pd.rolling_sum(pd.Series(tr), n),
                   name='Vortex_' + str(n))
    df = df.join(vi)
    return df
Example #32
 def efficiency_ratio(self, periods=[30], sample_size=500):
     for period in periods:
         label = 'er' + str(period)
         for symbol, frame in self.series_set.items():
             abs_daily_change = abs(frame.c - frame.shift(1).c)
             abs_period_change = abs(frame.c - frame.shift(period).c)
             sum_change = pd.rolling_sum(abs_daily_change, period)
             frame[label] = abs_period_change / sum_change
             frame['m' + label] = pd.rolling_mean(frame[label], sample_size)
             std = pd.rolling_std(frame[label], sample_size)
             frame['erz' +
                   str(period)] = (frame[label] - frame['m' + label]) / std
     return self
def Vortex(df, n):
    i = 0
    TR = [0]
    while i < df.index[-1]:
        Range = max(df.get_value(i + 1, 'High'), df.get_value(
            i, 'Close')) - min(df.get_value(i + 1, 'Low'),
                               df.get_value(i, 'Close'))
        i = i + 1
    i = 0
    VM = [0]
    while i < df.index[-1]:
        Range = abs(df.get_value(i + 1, 'High') -
                    df.get_value(i, 'Low')) - abs(
                        df.get_value(i + 1, 'Low') - df.get_value(i, 'High'))
        i = i + 1
    VI = pd.Series(pd.rolling_sum(pd.Series(VM), n) /
                   pd.rolling_sum(pd.Series(TR), n),
                   name='Vortex_' + str(n))
    df = df.join(VI)
    return df
Example #34
def mass_index(df):
    """Calculate the Mass Index for given data.

    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    Range = df['High'] - df['Low']
    EX1 = pd.ewma(Range, span = 9, min_periods = 8)
    EX2 = pd.ewma(EX1, span = 9, min_periods = 8)
    Mass = EX1 / EX2
    MassI = pd.Series(pd.rolling_sum(Mass, 25), name = 'Mass Index')
    df = df.join(MassI)
    return df
Example #35
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4):
    KST Oscillator
    M = df['close'].diff(r1 - 1)
    N = df['close'].shift(r1 - 1)
    ROC1 = M / N
    M = df['close'].diff(r2 - 1)
    N = df['close'].shift(r2 - 1)
    ROC2 = M / N
    M = df['close'].diff(r3 - 1)
    N = df['close'].shift(r3 - 1)
    ROC3 = M / N
    M = df['close'].diff(r4 - 1)
    N = df['close'].shift(r4 - 1)
    ROC4 = M / N
    result = pd.Series(
        pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 +
        pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4,
        name='KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) +
        '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4))
    return out(SETTINGS, df, result)
def getDF():

    rsite = sys.argv[1]
    fdate = sys.argv[2]
    tdate = sys.argv[3]
    engine = create_engine(
        'mysql+pymysql://updews:[email protected]/senslopedb')
    query = "select timestamp, rain from senslopedb.%s where timestamp between '%s' and  '%s'" % (
        rsite, fdate, tdate)
    df = pd.io.sql.read_sql(query, engine)
    df.columns = ['ts', 'rain']
    df = df.set_index(['ts'])
    df = df["rain"].astype(float)
    df = df.resample('15Min', how="sum")
    dfs = pd.rolling_sum(df, 96, min_periods=1)
    dfs1 = pd.rolling_sum(df, 288, min_periods=1)
    dfs = dfs[dfs >= 0]
    dfs1 = dfs1[dfs1 >= 0]
    dfa = pd.DataFrame({"rval": df, "hrs24": dfs, "hrs72": dfs1})
    dfajson = dfa.reset_index().to_json(orient="records", date_format='iso')
    dfajson = dfajson.replace("T", " ").replace("Z", "").replace(".000", "")
    print dfajson
Example #37
def getDF():

    rsite = sys.argv[1]
    fdate = sys.argv[2].replace("%20", " ")
    tdate = sys.argv[3].replace("%20", " ")
    # rsite = "1069"
    # fdate = "2014-04-25"
    # tdate = "2017-04-25"
    engine = create_engine(
        'mysql+pymysql://updews:[email protected]/senslopedb')
    query = "select timestamp, rval from senslopedb.rain_noah_%s " % rsite
    query += "where timestamp between '%s' and '%s'" % (pd.to_datetime(fdate) -
                                                        td(3), tdate)
    df = pd.io.sql.read_sql(query, engine)
    df.columns = ['ts', 'rain']
    df = df[df.rain >= 0]
    df = df.set_index(['ts'])
    df = df.resample('30Min').sum()

    df_inst = df.resample('30Min').sum()

    if max(df_inst.index) < pd.to_datetime(tdate):
        new_data = pd.DataFrame({'ts': [pd.to_datetime(tdate)], 'rain': [0]})
        new_data = new_data.set_index(['ts'])
        df = df.append(new_data)
        df = df.resample('30Min').sum()

    df1 = pd.rolling_sum(df, 48, min_periods=1)
    df3 = pd.rolling_sum(df, 144, min_periods=1)

    df['rval'] = df_inst
    df['hrs24'] = df1
    df['hrs72'] = df3

    df = df[(df.index >= fdate) & (df.index <= tdate)]

    dfajson = df.reset_index().to_json(orient="records", date_format='iso')
    dfajson = dfajson.replace("T", " ").replace("Z", "").replace(".000", "")
    print dfajson
Example #38
    def get_df(self, query_obj=None):
        form_data = self.form_data
        df = super(NVD3TimeSeriesViz, self).get_df(query_obj)

        df = df.fillna(0)
        if form_data.get("granularity") == "all":
            raise Exception("Pick a time granularity for your time series")

        df = df.pivot_table(

        fm = form_data.get("resample_fillmethod")
        if not fm:
            fm = None
        how = form_data.get("resample_how")
        rule = form_data.get("resample_rule")
        if how and rule:
            df = df.resample(rule, how=how, fill_method=fm)
            if not fm:
                df = df.fillna(0)

        if self.sort_series:
            dfs = df.sum()
            df = df[dfs.index]

        if form_data.get("contribution"):
            dft = df.T
            df = (dft / dft.sum()).T

        num_period_compare = form_data.get("num_period_compare")
        if num_period_compare:
            num_period_compare = int(num_period_compare)
            df = (df / df.shift(num_period_compare)) - 1
            df = df[num_period_compare:]

        rolling_periods = form_data.get("rolling_periods")
        rolling_type = form_data.get("rolling_type")

        if rolling_type in ('mean', 'std', 'sum') and rolling_periods:
            if rolling_type == 'mean':
                df = pd.rolling_mean(df, int(rolling_periods), min_periods=0)
            elif rolling_type == 'std':
                df = pd.rolling_std(df, int(rolling_periods), min_periods=0)
            elif rolling_type == 'sum':
                df = pd.rolling_sum(df, int(rolling_periods), min_periods=0)
        elif rolling_type == 'cumsum':
            df = df.cumsum()
        return df
Example #39
def VORTEX(df, n):
    def VORTEX(df, n):
    螺旋指标,#Vortex Indicator  
    参见 http://www.vortexindicator.com/VFX_VORTEX.PDF

        df, pd.dataframe格式数据源

        df, pd.dataframe格式数据源,
    i = 0
    TR = [0]
    while i < len(df) - 1:  # df.index[-1]:
        #Range = max(df.get_value(i + 1, 'high'), df.get_value(i, 'close')) - min(df.get_value(i + 1, 'low'), df.get_value(i, 'close'))
        #TR = max(df['High'].iloc[i + 1], df['Close'].iloc[i] - min(df['Low'].iloc[i + 1], df['Close'].iloc[i]))
        i = i + 1
    i = 0
    VM = [0]
    while i < len(df) - 1:  # df.index[-1]:
        #Range = abs(df.get_value(i + 1, 'high') - df.get_value(i, 'low')) - abs(df.get_value(i + 1, 'low') - df.get_value(i, 'high'))
        i = i + 1
    ds = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name=xnam)
    #df = df.join(ds)  
    return df
Example #40
def count_by_loc_time(data):
    temp_time = data.groupby(['zipcode', 'time', 'year_month']).agg({'vcrime': 'sum'}).reset_index()
    temp_time['order_within_group'] = temp_time.groupby('zipcode').cumcount()

    temp_time['count_1m_loc_time'] = (temp_time.groupby(['zipcode', 'time'])['vcrime']
                                      .apply(lambda x: pd.rolling_sum(x, window=1, min_periods=0)

    # counting the robberies in the previous 6 months for each zipcode and time range
    temp_time['count_6m_loc_time'] = (temp_time.groupby(['zipcode', 'time'])['vcrime']
                                      .apply(lambda x: pd.rolling_sum(x, window=6, min_periods=0)

    temp_time['count_2y_loc_time'] = (temp_time.groupby(['zipcode', 'time'])['vcrime']
                                      .apply(lambda x: pd.rolling_sum(x, window=24, min_periods=0)
    # droping columns
    temp_time = temp_time.drop(['vcrime', 'order_within_group'], axis=1)

    return temp_time
def getRSI(close):
    calculate RSI value
    :param DataFrame close: close price
    :return: DataFrame RSI: RSI value
    n = 3
    # calculate increment of close price of two succeeding days
    close_increment = close.diff()
    close_increment.index = range(close_increment.shape[0])
    close_pos = close_increment.copy()
    close_pos[close_pos < 0] = 0
    close_abs = np.abs(close_increment)
    sum_pos = pd.rolling_sum(close_pos, n)
    sum_pos.index = range(sum_pos.shape[0])
    sum_abs = pd.rolling_sum(close_abs, n)
    sum_abs.index = range(sum_abs.shape[0])
    RSI = sum_pos / sum_abs
    RSI.replace([np.nan, np.inf, -np.inf], 0, inplace=True)
    return RSI
Example #42
def calc_log_vols(ser):
    stk = ser.values

    daily = stk[-period:]
    print daily 
    daily = np.std(daily, ddof=1) * np.sqrt(252.0)

    weekly = pd.rolling_sum(stk[-(period+5):], 5) #gets the rolling sum for every 5 days 
    print weekly 
    weekly = weekly[-period:] #gets that number of days back
    weekly = weekly[4::5] #starting from the 
    print weekly 
    weekly = np.std(weekly, ddof=1) * np.sqrt(52)
    return pd.Series([daily, weekly], index=['daily', 'weekly'])
Example #43
def KAMA(df, n):
    Kaufman Moving Average
    def minusone(x):
        df = pd.DataFrame(x)
        t = df[-1] - df[-2]
        return t

    PP = (df['High'] + df['Low']) / 2
    Directions = pd.Series.abs(PP[-1] - PP[-1 - n])
    Volatility = pd.Series(
            pd.Series.abs(pd.rolling_apply(PP, 1, lambda x: minusone(x))), n))
Example #44
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, 'boxcar'),
           dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3,
Example #45
def rolling_tests(p, d):
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    mad = lambda x: np.fabs(x - x.mean()).mean()
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, 'boxcar'),
           dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3,
Example #46
def estimateVolatility(ohlc, N=10, algo='YangZhang'):
    Volatility estimation 
    Possible algorithms: ['YangZhang', 'CC']

    cc = np.log(ohlc.close / ohlc.close.shift(1))

    if algo == 'YangZhang':  # Yang-zhang volatility
        ho = np.log(ohlc.high / ohlc.open)
        lo = np.log(ohlc.low / ohlc.open)
        co = np.log(ohlc.close / ohlc.open)

        oc = np.log(ohlc.open / ohlc.close.shift(1))
        oc_sq = oc**2

        cc_sq = cc**2

        rs = ho * (ho - co) + lo * (lo - co)

        close_vol = pd.rolling_sum(cc_sq, window=N) * (1.0 / (N - 1.0))
        open_vol = pd.rolling_sum(oc_sq, window=N) * (1.0 / (N - 1.0))
        window_rs = pd.rolling_sum(rs, window=N) * (1.0 / (N - 1.0))

        result = (open_vol + 0.164333 * close_vol +
                  0.835667 * window_rs).apply(np.sqrt) * np.sqrt(252)

        result[:N - 1] = np.nan

    elif algo == 'CC':  # standard close-close estimator
        result = np.sqrt(252) * np.sqrt(((pd.rolling_sum(cc**2, N)) / N))

        raise ValueError('Unknown algo type.')

    return result * 100
Example #47
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4,ksgn='close'): 
    def KST(df, r1, r2, r3, r4, n1, n2, n3, n4,ksgn='close'): 
    #KST Oscillator  
       (r1, r2, r3, r4, n1, n2, n3, n4) = (1, 2, 3, 4, 6, 7, 9, 9)
        df, pd.dataframe格式数据源
        df, pd.dataframe格式数据源,
    M = df[ksgn].diff(r1 - 1)  
    N = df[ksgn].shift(r1 - 1)  
    ROC1 = M / N  
    M = df[ksgn].diff(r2 - 1)  
    N = df[ksgn].shift(r2 - 1)  
    ROC2 = M / N  
    M = df[ksgn].diff(r3 - 1)  
    N = df[ksgn].shift(r3 - 1)  
    ROC3 = M / N  
    M = df[ksgn].diff(r4 - 1)  
    N = df[ksgn].shift(r4 - 1)  
    ROC4 = M / N  
    #'KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4)
    KST = pd.Series(pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 + pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4, name = xnam)  
    df = df.join(KST)  
    return df
def CCI(data, n=20, m=7):
    data['cci'] = ta.CCI(np.array(data.high), np.array(data.low),
                         np.array(data.close), n)
    signal = pd.DataFrame(index=data.index)

    #strategy 1
    当 CCI 上穿100,买入,信号为1
    当CCI 下穿-100,卖空,信号为-1
    signal['1'] = ((data['cci'] > 100) & (data['cci'].shift(1) < 100)) * 1 + (
        (data['cci'] < -100) & (data['cci'].shift(1) > -100)) * (-1)
    signal['1'] = signal['1'][signal['1'].isin([1,

    #strategy 2
    CCI 指标上穿100 买入,信号为1
    当CCI 指标回到100,并距离前次上穿100 在m 天之内,我们卖出,信号为-1
    否则信号不变,直到下穿-100 才卖出
    下穿-100 情况同上。
    signal['2'] = ((data['cci'] > 100) & (data['cci'].shift(1) < 100)) * 1 + (
        (data['cci'] < -100) & (data['cci'].shift(1) > -100)) * (-1)
    signal['2'] = signal['2'] + (
        ((data['cci'] < 100) & (data['cci'].shift(1) > 100)) &
        (pd.rolling_sum(signal['2'], m) > 0)) * (-1) + (
            ((data['cci'] > -100) & (data['cci'].shift(1) < -100)) &
            (pd.rolling_sum(signal['2'], m) < 0)) * 1
    signal['2'] = signal['2'][signal['2'].isin([1,
    signal = signal.fillna(0)
    return signal
Example #49
    def stop(trip_data):
        Алгоритм выделения остановок
        df = trip_data.copy()

        n = 10

        df['temp'] = df.apply(lambda x: 1 if x.v < 0.5 else 0, axis=1)

        df['flag'] = pd.rolling_sum(df.temp, n) \
            .apply(lambda x: 1 if x == n else 0) \

        return np.where(df.flag > 0, 1, 0)
Example #50
def money_flow_index(df, col_volume='Volume_BTC', n=14):
    Money Flow Index
    # 1 typical price
    tp = (df['High'] + df['Low'] + df['Close']) / 3.0
    # 2 money flow
    mf = tp * df[col_volume]
    # 3 positive and negative money flow with n periods
    df['1_Period_Positive_Money_Flow'] = 0.0
    df.loc[df['Up_or_Down'] == 1, '1_Period_Positive_Money_Flow'] = mf
    df['1_Period_Negative_Money_Flow'] = 0.0
    df.loc[df['Up_or_Down'] == 2, '1_Period_Negative_Money_Flow'] = mf
    n_positive_mf = pd.rolling_sum(df['1_Period_Positive_Money_Flow'], n)
    n_negative_mf = pd.rolling_sum(df['1_Period_Negative_Money_Flow'], n)
    # 4 money flow index
    mr = n_positive_mf / n_negative_mf
    # delete intermediate columns
    df.drop('1_Period_Positive_Money_Flow', axis=1, inplace=True)
    df.drop('1_Period_Negative_Money_Flow', axis=1, inplace=True)

    return (100 - (100 / (1 + mr)))
Example #51
    def boll(self, start, stop):
        close = []
        for i in range(start, stop):

        closepd = pandas.Series(close)
        ma = pandas.rolling_sum(closepd, 20) / 20
        md = pandas.rolling_std(closepd, 20)
        up = ma + 2 * md
        dn = ma - 2 * md

        for i in range(start, stop):
            self.stocks[i].boll = ma[i - start]
            self.stocks[i].up = up[i - start]
            self.stocks[i].dn = dn[i - start]
Example #52
def vortex_indicator(df, n):
    """Calculate the Vortex Indicator for given data.

    Vortex Indicator described here:
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    i = 0
    TR = [0]
    while i < df.index[-1]:
        Range = max(df.at[i + 1, 'High'], df.at[i, 'Close']) - min(df.at[i + 1, 'Low'], df.at[i, 'Close'])
        i = i + 1
    i = 0
    VM = [0]
    while i < df.index[-1]:
        Range = abs(df.at[i + 1, 'High'] - df.at[i, 'Low']) - abs(df.at[i + 1, 'Low'] - df.at[i, 'High'])
        i = i + 1
    VI = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name = 'Vortex_' + str(n))
    df = df.join(VI)
    return df
Example #53
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    # see note around test_rolling_dataframe for logic concerning precision
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True)
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True)
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, 'boxcar'),
           dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3,
Example #54
    def rolling_sum(self, data_frame, periods):
        """Calculates the rolling sum

        data_frame : DataFrame
            contains time series
        periods : int
            period for rolling sum

        return pandas.rolling_sum(data_frame, periods)
Example #55
def Vortex(df, n):
    Vortex Indicator
    i = 0
    TR = [0]
    while i < len(df) - 1:  # df.index[-1]:
        Range = max(df.get_value(i + 1, 'high'), df.get_value(
            i, 'close')) - min(df.get_value(i + 1, 'low'),
                               df.get_value(i, 'close'))
        i = i + 1
    i = 0
    VM = [0]
    while i < len(df) - 1:  # df.index[-1]:
        Range = abs(df.get_value(i + 1, 'high') -
                    df.get_value(i, 'low')) - abs(
                        df.get_value(i + 1, 'low') - df.get_value(i, 'high'))
        i = i + 1
    result = pd.Series(pd.rolling_sum(pd.Series(VM), n) /
                       pd.rolling_sum(pd.Series(TR), n),
                       name='Vortex_' + str(n))
    return out(SETTINGS, df, result)
Example #56
def ULTOSC(df):
    TR_l = TR(df)
    BP_l = df['close'] - pd.concat([df['low'], df['close'].shift(1)],
    UltO = pd.Series(
        (4 * pd.rolling_sum(BP_l, 7) / pd.rolling_sum(TR_l, 7)) +
        (2 * pd.rolling_sum(BP_l, 14) / pd.rolling_sum(TR_l, 14)) +
        (pd.rolling_sum(BP_l, 28) / pd.rolling_sum(TR_l, 28)),
    return UltO
Example #57
def net_matrix_build(data, station_data):
    '''This function adds the information about the total bikes 
    at station to our net matrix, which currently only contains small 
    integers that represent the net change in bikes over each hour.'''
    for column in data.columns:
        #we add the values from the Available Docks Guess to 
        #the first row of our net change matrix (which represents 
        #the first hour of July 1, 2013)
            for i in range(793):
                data.ix[24*i, column] += station_data.ix[int(column), 'Available Docks Guess']
    #finally, we compute a rolling sum over the columns of the net change matrix        
    data = pd.rolling_sum(data.fillna(False), window = 24, min_periods = 1) 
    return data
Example #58
    def load_we_dist(self):
        with h5py.File('../we/pdist.h5', 'r') as f:
            p = f['histograms'][:]
            b = f['midpoints_0'][:]

        pcum = np.zeros_like(p)
        pcum = pd.rolling_sum(p, window=40, min_periods=0, axis=0)
        #pcum[:50,:] = pd.rolling_sum(p[:50,:], window=10, min_periods=0, axis=0)
        #pcum[50:150,:] = pd.rolling_sum(p[50:150,:], window=25, min_periods=0, axis=0)
        #pcum[150:,:] = pd.rolling_sum(p[150:,:], window=100, min_periods=0, axis=0)

        #pcum = np.cumsum(p, axis=0)

        pcum /= pcum.sum(axis=1)[:, np.newaxis]

        return pcum, b
Example #59
 def _roll_std(self, sample):
     calculator = lambda x: (x['rate_square'] - x['rate'] * x['rate'] / x['trade_days']) \
                            / (x['trade_days'] - (x['trade_days'] > 1))
     ts = (lambda x: pd.DataFrame(
              rate_square=(x['rate'] * x['rate']),
              trade_days=x['trade_days'])).resample('MS', how='sum')
     result = DataFrameExtended([], index=ts.index.rename('time'))
     for key, value in self._column_names['M'].items():
         # XXX 开根号运算会将精度缩小一半,必须在此之前就处理先前浮点运算带来的浮点误差
         result[value[0]] = _deal_float_error(
             pd.rolling_sum(ts, key).apply(calculator, axis=1))**0.5
     result.total = (lambda x: int(abs(x) > FLOAT_ERR) * x)(calculator(
     return _deal_float_error(result)
Example #60
def get_last_changepoint(resid, max_days_over=5, plot=False):
    # Poor man's changepoint detection
    m_rngs = np.abs(np.diff(resid))
    ucl = 3.267 * np.nanmean(m_rngs)

    if plot:

    days_over = np.nan_to_num(pd.rolling_sum(m_rngs > ucl, 30))
    max_arg_over = 0
    args_where_over = np.argwhere(days_over > max_days_over)
    if len(args_where_over) > 0:
        max_arg_over = np.max(args_where_over) + 1

    return max_arg_over