Beispiel #1
0
def get_dstat_snpwindow(af, quadruples, jackknife_window=2000,
                        snp_window=None):
    # min fraction of snps to report value
    #(only makes a difference for right-most interval)
    min_observation_fraction = 0.75
    dstats = []
    jackknife_window_dstats = []
    snp_window_dstats = []
    for j, (h1, h2, h3, o) in enumerate(quadruples):
        dstat = pd.DataFrame(columns=['num', 'denom'])
        dstat['num'] = ((af[h1] - af[h2]) * (af[h3] - af[o])).dropna()
        dstat['denom'] = ((af[h1] + af[h2] - 2 * af[h1] * af[h2])
                          * (af[h3] + af[o] - 2 * af[h3] * af[o])).dropna()
        # only use informative SNPs
        dstat = dstat[dstat['denom'] != 0]
        dstats.append([dstat['num'].sum(), dstat['denom'].sum()])
        jackknife_window_sum = pd.rolling_sum(dstat, jackknife_window,
                                              min_periods=int(
                                                  min_observation_fraction * jackknife_window),
                                              center=True).iloc[jackknife_window / 2::jackknife_window].dropna()
        jackknife_window_dstats.append(
            jackknife_window_sum.reset_index(level=1).values.tolist())
        #del jackknife_window_sum
        #del dstat
        # gc.collect
        if snp_window is not None:
            snp_window_sum = pd.rolling_sum(dstat, snp_window,
                                            min_periods=0,  # what should this be?
                                            center=True).iloc[snp_window / 2::snp_window].dropna()
            # gc.collect()
            snp_window_dstats.append(
                snp_window_sum.reset_index(level=1).values.tolist())
    return dstats, jackknife_window_dstats, snp_window_dstats
Beispiel #2
0
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4):
    M = df["Close"].diff(r1 - 1)
    N = df["Close"].shift(r1 - 1)
    ROC1 = M / N
    M = df["Close"].diff(r2 - 1)
    N = df["Close"].shift(r2 - 1)
    ROC2 = M / N
    M = df["Close"].diff(r3 - 1)
    N = df["Close"].shift(r3 - 1)
    ROC3 = M / N
    M = df["Close"].diff(r4 - 1)
    N = df["Close"].shift(r4 - 1)
    ROC4 = M / N
    KST = Series(
        rolling_sum(ROC1, n1) + rolling_sum(ROC2, n2) * 2 + rolling_sum(ROC3, n3) * 3 + rolling_sum(ROC4, n4) * 4,
        name="KST_"
        + str(r1)
        + "_"
        + str(r2)
        + "_"
        + str(r3)
        + "_"
        + str(r4)
        + "_"
        + str(n1)
        + "_"
        + str(n2)
        + "_"
        + str(n3)
        + "_"
        + str(n4),
    )
    df = df.join(KST)
    return df
Beispiel #3
0
def mfi(prices, params={"window": 14}):
    """
    1. Typical Price = (High + Low + Close)/3
    2. Raw Money Flow = Typical Price x Volume
    3. Money Flow Ratio = (14-period Positive Money Flow)/(14-period Negative Money Flow)
    4. Money Flow Index = 100 - 100/(1 + Money Flow Ratio)

    Parameters
    ----------
    prices: DataFrame
        Includes the open, close, high, low and volume.
    params: dict

    Returns
    ----------
    mfi_val: DataFrame
    """
    window = params["window"]
    tp = __tp(prices)
    rmf = tp * prices['Volume']
    close = prices["Close"]
    ret = close - close.shift(1)
    prmf = rmf.copy()
    nrmf = rmf.copy()
    prmf[ret < 0] = 0
    nrmf[ret > 0] = 0

    mfr = pd.rolling_sum(prmf, window)/pd.rolling_sum(nrmf, window)
    mfi_val = 100 - 100. / (1 + mfr)

    return pd.DataFrame(mfi_val.values, index=prices.index, columns=["MFI"])
Beispiel #4
0
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4):
    """
    KST Oscillator
    """
    M = df['Close'].diff(r1 - 1)
    N = df['Close'].shift(r1 - 1)
    ROC1 = M / N
    M = df['Close'].diff(r2 - 1)
    N = df['Close'].shift(r2 - 1)
    ROC2 = M / N
    M = df['Close'].diff(r3 - 1)
    N = df['Close'].shift(r3 - 1)
    ROC3 = M / N
    M = df['Close'].diff(r4 - 1)
    N = df['Close'].shift(r4 - 1)
    ROC4 = M / N
    result = pd.Series(
        pd.rolling_sum(ROC1, n1) +
        pd.rolling_sum(ROC2, n2) * 2 +
        pd.rolling_sum(ROC3, n3) * 3 +
        pd.rolling_sum(ROC4, n4) * 4,
        name='KST_' +
        str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' +
        str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4)
    )
    return out(SETTINGS, df, result)
def getDF():

    rsite = sys.argv[1]
    fdate = sys.argv[2]
    tdate = sys.argv[3]
    #    rsite = '1236'
    # set arbitrarily for now
    #    fdate = "2015-04-25"
    #    tdate = "2016-04-25"
    engine = create_engine(
        'mysql+pymysql://updews:[email protected]/senslopedb')
    query = "select * from senslopedb.%s where timestamp between '%s' and  '%s'" % (
        rsite, fdate, tdate)
    df = pd.io.sql.read_sql(query, engine)
    df.columns = ['ts', 'cumm', 'rval']
    df = df.set_index(['ts'])
    df = df["rval"].astype(float)
    df = df[df >= 0]
    df = df.resample('15Min', how="sum")
    dfs = pd.rolling_sum(df, 96, min_periods=1)
    dfs1 = pd.rolling_sum(df, 288, min_periods=1)
    dfs = dfs[dfs >= 0]
    dfs1 = dfs1[dfs1 >= 0]
    dfa = pd.DataFrame({"rval": df, "cumm": dfs, "hrs72": dfs1})
    dfajson = dfa.reset_index().to_json(orient="records", date_format='iso')
    dfajson = dfajson.replace("T", " ").replace("Z", "").replace(".000", "")
    print dfajson
Beispiel #6
0
def ULTOSC(df,ksgn='close'):
    '''
    def ULTOSC(df,ksgn='close'):
    UOS,终极指标(Ultimate Oscillator,UOS)
  终极指标,由拉瑞·威廉(Larry Williams)所创。他认为现行使用的各种振荡指标,对于周期参数的选择相当敏感。
   不同的市况,不同参数设定的振荡指标,产生的结果截然不同。因此,选择最佳的参数组含,成为使用振荡指标之前,最重要的一道手续。
  为了将参数周期调和至最佳状况,拉瑞·威廉经过不断测试的结果,先找出三个周期不同的振荡指标,再将这些周期参数,按照反比例的方式,制作成常数因子。
   然后,依照加权的方式,将三个周期不同的振荡指标,分别乘以不同比例的常数,加以综合制作成UOS指标。
  经过一连串参数顺化的过程后,UOS指标比一般单一参数的振荡指标,更能够顺应各种不同的市况。
    【输入】
        df, pd.dataframe格式数据源
        ksgn,列名,一般是:close收盘价
    【输出】    
        df, pd.dataframe格式数据源,
        增加了一栏:uos,输出数据
    '''
    i = 0  
    TR_l = [0]  
    BP_l = [0]  
    xnam='uos'
    while i <  len(df) - 1:   #df.index[-1]:  
        #TR = max(df.get_value(i + 1, 'high'), df.get_value(i, 'close')) - min(df.get_value(i + 1, 'low'), df.get_value(i, 'close'))  
        TR = max(df['high'].iloc[i+1],df[ksgn].iloc[i])-min(df['low'].iloc[i+1],df[ksgn].iloc[i])  
        TR_l.append(TR)  
        #BP = df.get_value(i + 1, 'close') - min(df.get_value(i + 1, 'low'), df.get_value(i, 'close'))  
        BP =df[ksgn].iloc[i+1]-min(df['low'].iloc[i+1], df[ksgn].iloc[i])  
        BP_l.append(BP)  
        i = i + 1  
    UltO = pd.Series((4 * pd.rolling_sum(pd.Series(BP_l), 7) / pd.rolling_sum(pd.Series(TR_l), 7)) + (2 * pd.rolling_sum(pd.Series(BP_l), 14) / pd.rolling_sum(pd.Series(TR_l), 14)) + (pd.rolling_sum(pd.Series(BP_l), 28) / pd.rolling_sum(pd.Series(TR_l), 28)), name =xnam)  # 'Ultimate_Osc'
    #df = df.join(UltO)      
    UltO.index=df.index;
    df[xnam]=UltO
    return df
Beispiel #7
0
 def extract_windows(days):
     # Add categories to count types of crimes committed in time windows leading to date we're trying to predict
     for label in ['Violent', 'Severe', 'Minor', 'Petty']:
         days[label + ' Crimes in Last Week'] = pd.rolling_sum(days[label + ' Crimes'], 7)
         days[label + ' Crimes in Last Month'] = pd.rolling_sum(days[label + ' Crimes'], 30)
     # The earliest 30 days in the time series have missing values for their first 30 days. Remove those days.
     return days[30:]
Beispiel #8
0
def CumSum(r,offsetstart,end,tsn, data):

    ##DESCRIPTION:
    ##prints timestamp and intsantaneous rainfall
    ##plots instantaneous rainfall data, 24-hr cumulative and 72-hr rainfall, and half of 2-yr max and 2-yr max rainfall for 10 days

    ##INPUT:
    ##r; string; site code
    ##offsetstart; datetime; starting point of interval with offset to account for moving window operations
    ##end; datetime; end of interval
    ##tsn; string; datetime format allowed in savefig

##    if r!='lipw': continue

    rainfall = data
    rainfall=rainfall[(rainfall.index>=offsetstart)]
    rainfall=rainfall[(rainfall.index<=end)]
    rainfall=rainfall.resample('15min',how='sum')

    
    #getting the rolling sum for the last24 hours
    rainfall2=pd.rolling_sum(rainfall,96,min_periods=1)
    rainfall2=np.round(rainfall2,4)
    
    #getting the rolling sum for the last 3 days
    rainfall3=pd.rolling_sum(rainfall,288,min_periods=1)
    rainfall3=np.round(rainfall3,4)
    
    return rainfall2, rainfall3
Beispiel #9
0
def find_capm_gap(df_prices, i_lookback, switch):
#   df_spread = pd.merge(df_prices, df_prices, left_index=True, right_index=True, how='outer') 
    frames = [df_prices, df_prices]
    df_spread = pd.concat(frames, keys=ls_symbols)
    print "in"
    print "df_spread:::", df_spread
    df_capm_gap = np.NAN * copy.deepcopy(df_prices)
    ts_index = df_prices[ls_symbols[-1]]
    tsu.returnize0(ts_index)
    for s_symbol in ls_symbols[:len(ls_symbols)-1]:
    	ts_price = df_prices[s_symbol]
	tsu.returnize0(ts_price)
#       print "returns", ts_price
#       print "index", ts_index
	ts_x_ret = pd.rolling_sum(ts_index, i_lookback)   
    	ts_y_ret = pd.rolling_sum(ts_price, i_lookback)
        
    	beta = (1/pd.rolling_var(ts_index, i_lookback)) * pd.rolling_cov(ts_index, ts_price, i_lookback)
    	alpha = pd.rolling_mean(ts_price, i_lookback) - beta * pd.rolling_mean(ts_index, i_lookback)
    	df_capm_gap[s_symbol] = switch*(ts_y_ret - ts_x_ret)+(1-switch)*(ts_y_ret - alpha - beta * ts_x_ret) 
#       print "ind", ts_x_ret, "y", ts_y_ret, "a" , alpha, "b", beta, df_capm_gap[s_symbol]
    ldt_timestamps = df_capm_gap.index
    print df_capm_gap
    for i in range(1, len(ldt_timestamps)):
	df_capm_gap.ix[ldt_timestamps[i]]=scipy.stats.stats.rankdata(df_capm_gap.ix[ldt_timestamps[i]])
        print df_spread.ix[[('AMZN',df_prices.index[i])]] 
    return df_capm_gap 
Beispiel #10
0
def Vortex(df, n):
    """
    Vortex Indicator
    """
    i = 0
    TR = [0]
    while i < len(df) - 1:  # df.index[-1]:
        Range = max(
            df.get_value(i + 1, 'High'),
            df.get_value(i, 'Close')) - min(
                df.get_value(i + 1, 'Low'),
                df.get_value(i, 'Close')
            )
        TR.append(Range)
        i = i + 1
    i = 0
    VM = [0]
    while i < len(df) - 1:  # df.index[-1]:
        Range = abs(
            df.get_value(i + 1, 'High') - df.get_value(i, 'Low')
        ) - abs(
            df.get_value(i + 1, 'Low') - df.get_value(i, 'High')
        )
        VM.append(Range)
        i = i + 1
    result = pd.Series(
        pd.rolling_sum(
            pd.Series(VM), n
        ) / pd.rolling_sum(pd.Series(TR), n),
        name='Vortex_' + str(n)
    )
    return out(SETTINGS, df, result)
Beispiel #11
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    # see note around test_rolling_dataframe for logic concerning precision
    assert_eq(pd.rolling_skew(p, 3),
              dd.rolling_skew(d, 3), check_less_precise=True)
    assert_eq(pd.rolling_kurt(p, 3),
              dd.rolling_kurt(d, 3), check_less_precise=True)
    assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        assert_eq(pd.rolling_window(p, 3, 'boxcar'),
                  dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    assert_eq(pd.rolling_sum(p, 3, min_periods=3),
              dd.rolling_sum(d, 3, min_periods=3))
Beispiel #12
0
    def calculate_signals(self, event):
        """
        create signal events
        in asof, ticker, exchange, signal format
        """
        if event.type == 'MARKET':
            for s in self.symbol_list:
                bars = self.bars.get_latest_bars(s, N = 201)
                df = pd.DataFrame(bars, columns = self.bars.bar_columns)
                df['closepctchg'] = df['adjclose'].pct_change(periods=1)
                df['lowpctchg'] = df['low'].pct_change(periods=1)
                df['highpctchg'] = df['high'].pct_change(periods=1)
                df['logreturn'] = np.log(df['adjclose'].pct_change(periods=1)+1)
                df['ma200'] = pd.rolling_mean(df['adjclose'], 200)
                df['ma5'] = pd.rolling_mean(df['adjclose'], 5)
                df['lowpctsign'] = np.sign(df['lowpctchg'])
                df['highpctsign'] = np.sign(df['highpctchg'])
                df['closepctsign'] = np.sign(df['closepctchg'])
                df['sumlowsign'] = pd.rolling_sum(df['lowpctsign'], 3)
                df['sumhighsign'] = pd.rolling_sum(df['highpctsign'], 3)
                df['sumclosesign'] = pd.rolling_sum(df['closepctsign'], 3)
                cv = df.tail(1)
                #print bars[0][0], bars[0][1], bars[0][2], cv['ma200'], cv['adjclose'], cv['lowpctsign'], cv['highpctsign'], cv['closepctsign']
                if bars is not None and len(bars) == 201 and cv['ma200'] < cv['adjclose'] and cv['sumclosesign'] == -3 and cv['closepctchg'] < -0.015:
                    if self.bought[s] == False:
                        print 'SIGNAL: ', bars[-1][0], bars[-1][1], bars[-1][2], bars[-1][7], 'LONG'
                        signal = SignalEvent(bars[-1][1], bars[-1][2], bars[-1][0], bars[-1][7], 'LONG')
                        self.events.put(signal)
                        self.bought[s] = True

                if self.bought[s] == True and cv['adjclose'] > cv['ma5']:
                        print 'SIGNAL: ', bars[-1][0], bars[-1][1], bars[-1][2], bars[-1][7], 'EXIT'
                        signal = SignalEvent(bars[-1][1], bars[-1][2], bars[-1][0], bars[-1][7], 'EXIT')
                        self.events.put(signal)
                        self.bought[s] = False
Beispiel #13
0
def get_estimator(ticker, start, end, window=30, clean=True):
    
    prices = data.get_data(ticker, start, end)
    
    log_ho = (prices['Adj High'] / prices['Adj Open']).apply(np.log)
    log_lo = (prices['Adj Low'] / prices['Adj Open']).apply(np.log)
    log_co = (prices['Adj Close'] / prices['Adj Open']).apply(np.log)
    
    log_oc = (prices['Adj Open'] / prices['Adj Close'].shift(1)).apply(np.log)
    log_oc_sq = log_oc**2
    
    log_cc = (prices['Close'] / prices['Close'].shift(1)).apply(np.log)
    log_cc_sq = log_cc**2
    
    rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co)
    
    close_vol = pandas.rolling_sum(log_cc_sq, window=window) * (1.0 / (window - 1.0))
    open_vol = pandas.rolling_sum(log_oc_sq, window=window) * (1.0 / (window - 1.0))
    window_rs = pandas.rolling_sum(rs, window=window) * (1.0 / (window - 1.0))
    
    result = (open_vol + 0.164333 * close_vol + 0.835667 * window_rs).apply(np.sqrt) * math.sqrt(252)
    
    result[:window-1] = np.nan

    if clean:
        return result.dropna()
    else:
        return result
Beispiel #14
0
def rolling_proportion(df, date_col, value_col, value, window=30, dropna=True, label=False, fig=None, ax=None, color=None):
    """
    Interpolated proportion of binary risk factor over time.

    df = pandas df
    date_col = name of column containing dates
    value_col = name of column to be tallied
    value = value to tally (e.g. 'Male')
    window = number of days to include. Default is 30.
    dropna = exclude rows where val is NaN. Default is true. False will include those rows.
    label = legend label
    fig, ax = matplotlib objects
    -----
    Returns Series of proportions with date index, fig, and ax.
    -----
    Example:
    datetime_df.index = df.dates
    rolling_proportion(datetime_df.sex, 'Male')

    Note: If you are having trouble, make ensure that your date_col is a datetime.
    """

    df = df[df[date_col].isnull() == False]
    df.index = df[date_col]

    if dropna == False:
        df = df[value_col].fillna(False)
    else:
        df = df[df[value_col].isnull() == False]

    df['matches'] = df[value_col] == value
    df['matches'] = df['matches'].astype(np.int)
    df['ones'] = 1

    prop = pd.DataFrame()
    prop['numerator'] = df.matches.groupby(by=df.index).sum()
    prop['denom'] = df.ones.groupby(by=df.index).sum()
    prop['proportion'] = pd.rolling_sum(prop.numerator, window, 5)/pd.rolling_sum(prop.denom, window, 5)
    prop = prop.dropna(how='any')

    ts = pd.date_range(min(prop.index), max(prop.index))
    new_prop = prop['proportion']
    new_prop = new_prop.reindex(ts)
    new_prop = new_prop.fillna(method='pad')

    if fig is None and ax is None:
        fig, ax = plt.subplots()

    if color is None:
        color = 'b'
        
    ax.xaxis_date()
    new_prop.plot(ax=ax, label=label, color=color)
    fig.autofmt_xdate()
    ax.set_ylim(-0.05, 1.05)
    ax.set_xlabel('')
    if label != False:
        ax.legend()

    return new_prop, fig, ax
Beispiel #15
0
def _calculate_cmo_values(returns, periods):
    only_gains = returns.map(lambda x: 0 if x < 0 else 1)
    only_losses = returns.map(lambda x: 0 if x >= 0 else 1)
    sums_of_gains = pd.rolling_sum(only_gains, window=periods)
    sums_of_losses = pd.rolling_sum(only_losses, window=periods)
    result = pd.Series(index=sums_of_gains.index)
    for date, sog in sums_of_gains.items():
        result[date] = ((sog - sums_of_losses[date]) /
                        (sog + sums_of_losses[date]))
    return result
Beispiel #16
0
def rsi(data, periods=14):
    # pdb.set_trace()
    change = data['Adjusted Close'] - data['Adjusted Close'].shift(1)
    gains = change.apply(lambda x: 0 if x < 0 else x)
    losses = change.apply(lambda x: 0 if x >= 0 else -x)
    f*g = pd.rolling_sum(gains, periods)
    fal = pd.rolling_sum(losses, periods)
    ag = (f*g.shift(1) * (periods-1) + gains)/periods
    al = (fal.shift(1) * (periods-1) + losses)/periods
    rs = ag / al

    return 1 - 1/(1+rs)
Beispiel #17
0
def SensorPlot(r,offsetstart,end,tsn, data, halfmax, twoyrmax):
    
    ##INPUT:
    ##r; str; site
    ##offsetstart; datetime; starting point of interval with offset to account for moving window operations
    ##end; datetime; end of rainfall data
    ##tsn; str; time format acceptable as file name
    ##data; dataframe; rainfall data
    ##halfmax; float; half of 2yr max rainfall, one-day cumulative rainfall threshold
    ##twoyrmax; float; 2yr max rainfall, three-day cumulative rainfall threshold
    
    ##OUTPUT:
    ##rainfall2, rainfall3; dataframe containing one-day and three-day cumulative rainfall
        
    if PrintPlot:
        plt.xticks(rotation=70, size=5)       
        
    #getting the rolling sum for the last24 hours
    rainfall2=pd.rolling_sum(data,48,min_periods=1)
    rainfall2=np.round(rainfall2,4)
    
    #getting the rolling sum for the last 3 days
    rainfall3=pd.rolling_sum(data,144,min_periods=1)
    rainfall3=np.round(rainfall3,4)
    
    if PrintPlot:
        #assigning the thresholds to their own columns for plotting 

        sub=base
        sub['maxhalf'] = halfmax  
        sub['max'] = twoyrmax
        
        #assigning df to plot variables (to avoid caveats ? expressed from Spyder)
        plot1=data.dropna()     # instantaneous rainfall data
        plot2=rainfall2             # 24-hr cumulative rainfall
        plot3=rainfall3             # 72-hr cumulative rainfall
        plot4=sub['maxhalf']        # half of 2-yr max rainfall
        plot5=sub['max']            # 2-yr max rainfall

        #plots instantaneous rainfall data, 24-hr cumulative rainfall, 72-hr cumulative rainfall,
        #half of 2-yr max rainfall, 2-yr max rainfall
        plt.plot(plot1.index,plot1,color='#db4429', label = 'instantaneous rainfall') # instantaneous rainfall data
        plt.plot(plot2.index,plot2,color='#5ac126', label = '24hr cumulative rainfall') # 24-hr cumulative rainfall
        plt.plot(plot3.index,plot3,color='#0d90d0', label = '72hr cumulative rainfall') # 72-hr cumulative rainfall
        plt.plot(plot4.index,plot4,color="#fbb714", label = 'half of 2yr max rainfall') # half of 2-yr max rainfall
        plt.plot(plot5.index,plot5,color="#963bd6", label = '2yr max rainfall')  # 2-yr max rainfall
        plt.legend(loc='upper left', fontsize = 8)        
        plt.title(r)
        plt.savefig(RainfallPlotsPath+tsn+"_"+r, dpi=160, 
            facecolor='w', edgecolor='w',orientation='landscape',mode='w')
        plt.close()
    
    return rainfall2, rainfall3
def getrain(rainsite,start,end):
    raindf = q.GetRawRainData(rainsite, fromTime=start, toTime=end)
    
    raindf = raindf.set_index('ts')
    raindf = raindf.resample('30min',how='sum')
    
    raindf['one_d'] = pd.rolling_sum(raindf.rain,48,min_periods=1)
    raindf['thr_d'] = pd.rolling_sum(raindf.rain,144,min_periods=1)
    
    raindf=raindf.reset_index()
    raindf['gts']  = raindf.ts.apply(datenum)     
    
    return raindf
def ULTOSC(df):  
    i = 0  
    TR_l = [0]  
    BP_l = [0]  
    while i < df.index[-1]:  
        TR = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))  
        TR_l.append(TR)  
        BP = df.get_value(i + 1, 'Close') - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))  
        BP_l.append(BP)  
        i = i + 1  
    UltO = pd.Series((4 * pd.rolling_sum(pd.Series(BP_l), 7) / pd.rolling_sum(pd.Series(TR_l), 7)) + (2 * pd.rolling_sum(pd.Series(BP_l), 14) / pd.rolling_sum(pd.Series(TR_l), 14)) + (pd.rolling_sum(pd.Series(BP_l), 28) / pd.rolling_sum(pd.Series(TR_l), 28)), name = 'Ultimate_Osc')  
    df = df.join(UltO)  
    return df
Beispiel #20
0
def prediction_augmented(df_train, col_names, df_day_avg_values, adjacency_list, df_model, prediction_model, window_size=10, do_rounding = False):
    staircaseA_nodes = ['S42', 'S46']
    staircaseB_nodes = ['S34', 'S35']
    staircaseC_nodes = ['S52', 'S53']
    
    # Dataframe to store the model prediction
    df_model_lr = df_model.copy()
    
    # Building the moving sum for the features before/after for each neighbor
    model_curr_before = pd.rolling_sum(df_model.sort(ascending=False), window_size+1) - df_model
    model_curr_after = pd.rolling_sum(df_model, window_size+1) - df_model
    model_curr_before = model_curr_before.rename(columns={col:col+'before' for col in col_names})
    model_curr_after = model_curr_after.rename(columns={col:col+'after' for col in col_names})
    window_features = model_curr_after.join(model_curr_before[[col_+'before' for col_ in col_names]])
    
    for col in col_names:
        # X will store the features and the outcome Y
        X = df_train.copy()
        X = X.rename(columns={col:'Y'})
        X = pd.merge(X, df_day_avg_values[[col]], left_on='day_time', right_index=True)
        X = X.rename(columns={col:col+'avg'})

        # Building the neighbors (from adjacency list) with missing values filled as in model
        neighbors_col = ['S'+str(n) for n in adjacency_list[int(col[1:])]]
        
        X = X[['Y']].join(df_model[neighbors_col])
        X = X.join(window_features[[col_+'before' for col_ in neighbors_col] + [col_+'after' for col_ in neighbors_col]])
        # Removing the first and last element impossible to compute given the window_size
        X = X.sort()[window_size: - window_size]
        
        # augment with staircase info
        X['sA'] = (col in staircaseA_nodes) * 1.
        X['sB'] = (col in staircaseB_nodes) * 1.
        X['sC'] = (col in staircaseC_nodes) * 1.

        X_train = X[X['Y'] != -1]
        X_test = X[X['Y'] == -1]
        test_indices = X[X['Y'] == -1].index
        col_values = df_model_lr[col]

        if len(X_test):
            # Models
            prediction_model = prediction_model.fit(X_train.drop('Y', axis=1), X_train.Y)
            col_values.ix[test_indices] = prediction_model.predict(X_test.drop('Y', axis=1))

            # Filling the result with the current sensor prediction
            if do_rounding:
                df_model_lr[col] = np.round(col_values)
            else:
                df_model_lr[col] = col_values
    return df_model_lr
Beispiel #21
0
def VWAP_dataPre(qx,xnam0,ksgn0):
    ''' 
    vwap 数据预处理函数,vwap策略,成交量加权平均价
    Args:
        qx (zwQuantX): zwQuantX数据包 
        xnam0 (str):函数标签
        ksgn0 (str): 价格列名称,一般是'adj close'
        '''

    zwx.sta_dataPre0xtim(qx,xnam0);
    #
    ksgn,qx.priceCalc=ksgn0,ksgn0;  #'adj close';'close';
    for xcod in zw.stkLibCode:
        d20=zw.stkLib[xcod];
        #---------------dprice,kprice
        #d20['dprice']=d20['open']*d20['adj close']/d20['close']
        d20['dprice']=d20[ksgn]
        #d20['kprice']=d20['dprice'].shift(-1)
        #d20['kprice']=d20['dprice'].shift(-1)
        d20['kprice']=d20['open'].shift(-1)
        #
        #d=qx.staVars[0];d20=zwta.MA(d20,d,ksgn);
        #d=qx.staVars[1];d20=zwta.MA(d20,d,ksgn);
        #
        #d20=zwta.MA(d20,qx.staMA_short,'adj close');
        #d20=zwta.MA(d20,qx.staMA_long,'adj close');
        #ksma='ma_'+str(qx.staMA_long);
        #d20['ma1n']=d20[ksma].shift(1)
        #d20['ma1n']=d20[ksma]
        #
        #---------------dprice,kprice
        #d20['dprice']=d20['open']*d20['adj close']/d20['close']
        
        #d20['dprice']=d20['adj close']
        #d20['kprice']=d20['dprice']
        #vwap,基于成交量的加权平均价
        #vwap = (prices * volume).sum(n) / volume.sum(n)  #sum函数自动忽略NaN值
        #vwapWindowSize,threshold
        #qx.staVarLst=[15,0.01]#
        nwin=qx.staVars[0];
        d20['vw_sum']=pd.rolling_sum(d20['dprice']*d20['volume'],nwin);
        d20['vw_vol']=pd.rolling_sum(d20['volume'],nwin);
        d20['vwap']=d20['vw_sum']/d20['vw_vol']
        
        #---
        zw.stkLib[xcod]=d20;
        if qx.debugMod>0:
            print(d20.tail())
            fss='tmp\\'+qx.prjName+'_'+xcod+'.csv'
            d20.to_csv(fss)
Beispiel #22
0
def tdi(price, window=20, multiple=2):
    ''' Trend Detection Index '''
    price = utils.safe_series(price)
    mom = price - price.shift(window)
    mom[np.isnan(mom)] = 0
    
    di = pd.rolling_sum(mom, window)
    di_abs = di.abs()
    
    mom_2n_abs = pd.rolling_sum(mom.abs(), window*multiple)
    mom_1n_abs = pd.rolling_sum(mom.abs(), window)
    
    tdi_ = di_abs - (mom_2n_abs - mom_1n_abs)
    return pd.DataFrame(dict(tdi=tdi_, di=di), index=price.index)
Beispiel #23
0
def ULTOSC(df):
    """
    Ultimate Oscillator
    """
    i = 0
    TR_l = [0]
    BP_l = [0]
    while i < len(df) - 1: #df.index[-1]:
        TR = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))
        TR_l.append(TR)
        BP = df.get_value(i + 1, 'Close') - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))
        BP_l.append(BP)
        i = i + 1
    result = pd.Series((4 * pd.rolling_sum(pd.Series(BP_l), 7) / pd.rolling_sum(pd.Series(TR_l), 7)) + (2 * pd.rolling_sum(pd.Series(BP_l), 14) / pd.rolling_sum(pd.Series(TR_l), 14)) + (pd.rolling_sum(pd.Series(BP_l), 28) / pd.rolling_sum(pd.Series(TR_l), 28)), name = 'Ultimate_Osc')
    return out(SETTINGS, df, result)
Beispiel #24
0
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4):
    M = df['close'].diff(r1 - 1)
    N = df['close'].shift(r1 - 1)
    ROC1 = M / N
    M = df['close'].diff(r2 - 1)
    N = df['close'].shift(r2 - 1)
    ROC2 = M / N
    M = df['close'].diff(r3 - 1)
    N = df['close'].shift(r3 - 1)
    ROC3 = M / N
    M = df['close'].diff(r4 - 1)
    N = df['close'].shift(r4 - 1)
    ROC4 = M / N
    KST = pd.Series(pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 + pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4, name = 'KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4))
    return KST
Beispiel #25
0
def mfi(hlc, volume, window=14):
    '''MFI'''
    high, low, close = utils.safe_hlc(hlc)
    volume = utils.safe_series(volume) / 1000
    price = (high+low+close) * 1.0 / 3
    mf = price * volume
    pmf = (mf > mf.shift(1)).astype(int) * mf
    nmf = (mf < mf.shift(1)).astype(int) * mf
    mr = pd.rolling_sum(pmf, window) / pd.rolling_sum(nmf, window)
    
    rval = 100 - (100/(1 + mr))
    utils.safe_name(rval, name='MFI')
    rval.index = hlc.index
    
    return rval    
def collection_freq(breath_df, win):
    print(breath_df.columns)
    for ds_type in ['ds', 'pl', 'pvt', 'ie']:
        breath_df['{0}_rolling'.format(ds_type)] = pd.rolling_sum(breath_df['analysis.' + ds_type], window = 60 * win,
                                                                  center = True, min_periods = 1)
        breath_df[ds_type + '_tot_rolling'] = pd.rolling_count(breath_df['analysis.' + ds_type], window = 60 * win,
                                                               center = True)
        breath_df[ds_type + '_freq'] = breath_df[ds_type + '_rolling'] / breath_df[ds_type + '_tot_rolling']

    # add rolling average for Fio2, PEEP, p_mean
    try:
        breath_df['peep_rolling'] = pd.rolling_mean(breath_df['vent_settings.PEEP'], window = 60 * win,
                                                    center = True, min_periods = 1)
    except KeyError:
        pass

    try:
        breath_df['p_mean_rolling'] = pd.rolling_mean(breath_df['vent_settings.p_mean'], window = 60 * win,
                                                      center = True, min_periods = 1)
    except KeyError:
        pass

    try:
        breath_df['fio2_rolling'] = pd.rolling_mean(breath_df['vent_settings.FiO2'], window = 60 * win,
                                                    center = True, min_periods = 1)
    except KeyError:
        pass

    return breath_df
Beispiel #27
0
def calculateRewards(data, n):
    logging.info('Rewards: calculating {0}...'.format(n))

    # get tick changes
    diffs = data['close'].diff(1)
    # print 'DIFFS'
    # print diffs

    # get rolling sum
    sums = pd.rolling_sum(diffs, n)
    # print 'SUMS'
    # print sums

    # shift
    rewards = sums.shift(-n)
    # print 'SHIFTS'
    # print rewards

    # label data
    rewards[rewards >= 0] = 'bull'
    rewards[rewards < 0] = 'bear'
    # print rewards

    logging.info('Rewards: calculated')
    return rewards
def Vortex(df, n):  
    i = 0  
    TR = [0]  
    while i < df.index[-1]:  
        Range = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))  
        TR.append(Range)  
        i = i + 1  
    i = 0  
    VM = [0]  
    while i < df.index[-1]:  
        Range = abs(df.get_value(i + 1, 'High') - df.get_value(i, 'Low')) - abs(df.get_value(i + 1, 'Low') - df.get_value(i, 'High'))  
        VM.append(Range)  
        i = i + 1  
    VI = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name = 'Vortex_' + str(n))  
    df = df.join(VI)  
    return df
    def aggregate_per_week(self, daily_values, last_week_ending, weeks):
        """
        Aggregates daily values into weekly values.

        Args:
            daily_values: Pandas Series of daily values, indexed by date.
                All dates are assumed to be contiguous, though their values may be NaN.
                Dates do not have to cover the periods being sampled.
            last_week_ending: last day of last week.
            weeks: number of weeks to sample (including the last day)

        Returns:
            Pandas Series with weekly values, indexed by date of last day of week.
            Any day with NaN will result in the corresponding week also being NaN.
            As a consequence, any week requested that is not completely covered
            by the input daily_values will be NaN.
        """
        # For each date in daily input, find sum of day's value with the previous
        # six days.
        week_window = pandas.rolling_sum(daily_values, window=7)

        # Pull out the requested end-of-week days.  If requested week dates are
        # not in the range of the daily input, NaN values are returned.
        days = [last_week_ending - timedelta(i * 7) for i in reversed(xrange(weeks))]
        return week_window.loc[days]
Beispiel #30
0
def downloadRainfallNOAH(rsite, fdate, tdate):   
    url = "http://weather.asti.dost.gov.ph/home/index.php/api/data/%s/from/%s/to/%s" % (rsite,fdate,tdate)
    r = requests.get(url)

    try:
        df = pd.DataFrame(r.json()["data"])
    except TypeError:
        print "    No device with id of %s" % rsite
        return pd.DataFrame()

    try:
        df = df.set_index(['dateTimeRead'])
        df.index = pd.to_datetime(df.index)
        df = df["rain_value"].astype(float)
        df = df.resample('15Min').fillna(0.00)
        dfs = pd.rolling_sum(df,96)
        dfa = pd.DataFrame({"rval":df,"cumm":dfs})
        dfa = dfa.fillna(0)
        dfa = dfa[96:]
        
        #rename the "index" into "timestamp"
        dfa.index.names = ["timestamp"]
        return dfa
        
    except:
        return pd.DataFrame()
Beispiel #31
0
def vortex(df, n):
    i = 0
    tr = [0]
    while i < df.index[-1]:
        Range = max(df.get_value(i + 1, 'High'), df.get_value(
            i, 'Close')) - min(df.get_value(i + 1, 'Low'),
                               df.get_value(i, 'Close'))
        tr.append(Range)
        i = i + 1
    i = 0
    vm = [0]
    while i < df.index[-1]:
        Range = abs(df.get_value(i + 1, 'High') -
                    df.get_value(i, 'Low')) - abs(
                        df.get_value(i + 1, 'Low') - df.get_value(i, 'High'))
        vm.append(Range)
        i = i + 1
    vi = pd.Series(pd.rolling_sum(pd.Series(vm), n) /
                   pd.rolling_sum(pd.Series(tr), n),
                   name='Vortex_' + str(n))
    df = df.join(vi)
    return df
Beispiel #32
0
 def efficiency_ratio(self, periods=[30], sample_size=500):
     for period in periods:
         label = 'er' + str(period)
         for symbol, frame in self.series_set.items():
             abs_daily_change = abs(frame.c - frame.shift(1).c)
             abs_period_change = abs(frame.c - frame.shift(period).c)
             sum_change = pd.rolling_sum(abs_daily_change, period)
             frame[label] = abs_period_change / sum_change
             frame['m' + label] = pd.rolling_mean(frame[label], sample_size)
             std = pd.rolling_std(frame[label], sample_size)
             frame['erz' +
                   str(period)] = (frame[label] - frame['m' + label]) / std
     return self
def Vortex(df, n):
    i = 0
    TR = [0]
    while i < df.index[-1]:
        Range = max(df.get_value(i + 1, 'High'), df.get_value(
            i, 'Close')) - min(df.get_value(i + 1, 'Low'),
                               df.get_value(i, 'Close'))
        TR.append(Range)
        i = i + 1
    i = 0
    VM = [0]
    while i < df.index[-1]:
        Range = abs(df.get_value(i + 1, 'High') -
                    df.get_value(i, 'Low')) - abs(
                        df.get_value(i + 1, 'Low') - df.get_value(i, 'High'))
        VM.append(Range)
        i = i + 1
    VI = pd.Series(pd.rolling_sum(pd.Series(VM), n) /
                   pd.rolling_sum(pd.Series(TR), n),
                   name='Vortex_' + str(n))
    df = df.join(VI)
    return df
def mass_index(df):
    """Calculate the Mass Index for given data.

    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    Range = df['High'] - df['Low']
    EX1 = pd.ewma(Range, span = 9, min_periods = 8)
    EX2 = pd.ewma(EX1, span = 9, min_periods = 8)
    Mass = EX1 / EX2
    MassI = pd.Series(pd.rolling_sum(Mass, 25), name = 'Mass Index')
    df = df.join(MassI)
    return df
Beispiel #35
0
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4):
    """
    KST Oscillator
    """
    M = df['close'].diff(r1 - 1)
    N = df['close'].shift(r1 - 1)
    ROC1 = M / N
    M = df['close'].diff(r2 - 1)
    N = df['close'].shift(r2 - 1)
    ROC2 = M / N
    M = df['close'].diff(r3 - 1)
    N = df['close'].shift(r3 - 1)
    ROC3 = M / N
    M = df['close'].diff(r4 - 1)
    N = df['close'].shift(r4 - 1)
    ROC4 = M / N
    result = pd.Series(
        pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 +
        pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4,
        name='KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) +
        '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4))
    return out(SETTINGS, df, result)
def getDF():

    rsite = sys.argv[1]
    fdate = sys.argv[2]
    tdate = sys.argv[3]
    engine = create_engine(
        'mysql+pymysql://updews:[email protected]/senslopedb')
    query = "select timestamp, rain from senslopedb.%s where timestamp between '%s' and  '%s'" % (
        rsite, fdate, tdate)
    df = pd.io.sql.read_sql(query, engine)
    df.columns = ['ts', 'rain']
    df = df.set_index(['ts'])
    df = df["rain"].astype(float)
    df = df.resample('15Min', how="sum")
    dfs = pd.rolling_sum(df, 96, min_periods=1)
    dfs1 = pd.rolling_sum(df, 288, min_periods=1)
    dfs = dfs[dfs >= 0]
    dfs1 = dfs1[dfs1 >= 0]
    dfa = pd.DataFrame({"rval": df, "hrs24": dfs, "hrs72": dfs1})
    dfajson = dfa.reset_index().to_json(orient="records", date_format='iso')
    dfajson = dfajson.replace("T", " ").replace("Z", "").replace(".000", "")
    print dfajson
Beispiel #37
0
def getDF():

    rsite = sys.argv[1]
    fdate = sys.argv[2].replace("%20", " ")
    tdate = sys.argv[3].replace("%20", " ")
    # rsite = "1069"
    # fdate = "2014-04-25"
    # tdate = "2017-04-25"
    engine = create_engine(
        'mysql+pymysql://updews:[email protected]/senslopedb')
    query = "select timestamp, rval from senslopedb.rain_noah_%s " % rsite
    query += "where timestamp between '%s' and '%s'" % (pd.to_datetime(fdate) -
                                                        td(3), tdate)
    df = pd.io.sql.read_sql(query, engine)
    df.columns = ['ts', 'rain']
    df = df[df.rain >= 0]
    df = df.set_index(['ts'])
    df = df.resample('30Min').sum()

    df_inst = df.resample('30Min').sum()

    if max(df_inst.index) < pd.to_datetime(tdate):
        new_data = pd.DataFrame({'ts': [pd.to_datetime(tdate)], 'rain': [0]})
        new_data = new_data.set_index(['ts'])
        df = df.append(new_data)
        df = df.resample('30Min').sum()

    df1 = pd.rolling_sum(df, 48, min_periods=1)
    df3 = pd.rolling_sum(df, 144, min_periods=1)

    df['rval'] = df_inst
    df['hrs24'] = df1
    df['hrs72'] = df3

    df = df[(df.index >= fdate) & (df.index <= tdate)]

    dfajson = df.reset_index().to_json(orient="records", date_format='iso')
    dfajson = dfajson.replace("T", " ").replace("Z", "").replace(".000", "")
    print dfajson
Beispiel #38
0
    def get_df(self, query_obj=None):
        form_data = self.form_data
        df = super(NVD3TimeSeriesViz, self).get_df(query_obj)

        df = df.fillna(0)
        if form_data.get("granularity") == "all":
            raise Exception("Pick a time granularity for your time series")

        df = df.pivot_table(
            index="timestamp",
            columns=form_data.get('groupby'),
            values=form_data.get('metrics'))

        fm = form_data.get("resample_fillmethod")
        if not fm:
            fm = None
        how = form_data.get("resample_how")
        rule = form_data.get("resample_rule")
        if how and rule:
            df = df.resample(rule, how=how, fill_method=fm)
            if not fm:
                df = df.fillna(0)

        if self.sort_series:
            dfs = df.sum()
            dfs.sort(ascending=False)
            df = df[dfs.index]

        if form_data.get("contribution"):
            dft = df.T
            df = (dft / dft.sum()).T

        num_period_compare = form_data.get("num_period_compare")
        if num_period_compare:
            num_period_compare = int(num_period_compare)
            df = (df / df.shift(num_period_compare)) - 1
            df = df[num_period_compare:]

        rolling_periods = form_data.get("rolling_periods")
        rolling_type = form_data.get("rolling_type")

        if rolling_type in ('mean', 'std', 'sum') and rolling_periods:
            if rolling_type == 'mean':
                df = pd.rolling_mean(df, int(rolling_periods), min_periods=0)
            elif rolling_type == 'std':
                df = pd.rolling_std(df, int(rolling_periods), min_periods=0)
            elif rolling_type == 'sum':
                df = pd.rolling_sum(df, int(rolling_periods), min_periods=0)
        elif rolling_type == 'cumsum':
            df = df.cumsum()
        return df
Beispiel #39
0
def VORTEX(df, n):
    '''
    def VORTEX(df, n):
    螺旋指标,#Vortex Indicator  
    参见 http://www.vortexindicator.com/VFX_VORTEX.PDF


    
    【输入】
        df, pd.dataframe格式数据源
        n,时间长度

    【输出】    
        df, pd.dataframe格式数据源,
        增加了一栏:vortex__{n},输出数据
    '''
    xnam='vortex_{n}'.format(n=n)
    i = 0
    TR = [0]
    while i < len(df) - 1:  # df.index[-1]:
        #Range = max(df.get_value(i + 1, 'high'), df.get_value(i, 'close')) - min(df.get_value(i + 1, 'low'), df.get_value(i, 'close'))
        Range=max(df['high'].iloc[i+1],df['close'].iloc[i])-min(df['low'].iloc[i+1],df['close'].iloc[i])
        #TR = max(df['High'].iloc[i + 1], df['Close'].iloc[i] - min(df['Low'].iloc[i + 1], df['Close'].iloc[i]))
        TR.append(Range)
        i = i + 1
    i = 0
    VM = [0]
    while i < len(df) - 1:  # df.index[-1]:
        #Range = abs(df.get_value(i + 1, 'high') - df.get_value(i, 'low')) - abs(df.get_value(i + 1, 'low') - df.get_value(i, 'high'))
        Range=abs(df['high'].iloc[i+1]-df['low'].iloc[i])-abs(df['low'].iloc[i+1]-df['high'].iloc[i])
        VM.append(Range)
        i = i + 1
    ds = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name=xnam)
    #df = df.join(ds)  
    ds.index=df.index;
    df[xnam]=ds
    
    return df
Beispiel #40
0
def count_by_loc_time(data):
    temp_time = data.groupby(['zipcode', 'time', 'year_month']).agg({'vcrime': 'sum'}).reset_index()
    temp_time['order_within_group'] = temp_time.groupby('zipcode').cumcount()

    temp_time['count_1m_loc_time'] = (temp_time.groupby(['zipcode', 'time'])['vcrime']
                                      .apply(lambda x: pd.rolling_sum(x, window=1, min_periods=0)
                                             .shift()
                                             .fillna(0)))

    # counting the robberies in the previous 6 months for each zipcode and time range
    temp_time['count_6m_loc_time'] = (temp_time.groupby(['zipcode', 'time'])['vcrime']
                                      .apply(lambda x: pd.rolling_sum(x, window=6, min_periods=0)
                                             .shift()
                                             .fillna(0)))

    temp_time['count_2y_loc_time'] = (temp_time.groupby(['zipcode', 'time'])['vcrime']
                                      .apply(lambda x: pd.rolling_sum(x, window=24, min_periods=0)
                                             .shift()
                                             .fillna(0)))
    # droping columns
    temp_time = temp_time.drop(['vcrime', 'order_within_group'], axis=1)

    return temp_time
def getRSI(close):
    '''
    calculate RSI value
    :param DataFrame close: close price
    :return: DataFrame RSI: RSI value
    '''
    n = 3
    # calculate increment of close price of two succeeding days
    close_increment = close.diff()
    close_increment.dropna(inplace=True)
    close_increment.index = range(close_increment.shape[0])
    close_pos = close_increment.copy()
    close_pos[close_pos < 0] = 0
    close_abs = np.abs(close_increment)
    sum_pos = pd.rolling_sum(close_pos, n)
    sum_pos.dropna(inplace=True)
    sum_pos.index = range(sum_pos.shape[0])
    sum_abs = pd.rolling_sum(close_abs, n)
    sum_abs.dropna(inplace=True)
    sum_abs.index = range(sum_abs.shape[0])
    RSI = sum_pos / sum_abs
    RSI.replace([np.nan, np.inf, -np.inf], 0, inplace=True)
    return RSI
Beispiel #42
0
def calc_log_vols(ser):
    stk = ser.values

    daily = stk[-period:]
    print daily 
    daily = np.std(daily, ddof=1) * np.sqrt(252.0)

    weekly = pd.rolling_sum(stk[-(period+5):], 5) #gets the rolling sum for every 5 days 
    print weekly 
    weekly = weekly[-period:] #gets that number of days back
    weekly = weekly[4::5] #starting from the 
    print weekly 
    weekly = np.std(weekly, ddof=1) * np.sqrt(52)
    return pd.Series([daily, weekly], index=['daily', 'weekly'])
Beispiel #43
0
def KAMA(df, n):
    """
    Kaufman Moving Average
    """
    def minusone(x):
        df = pd.DataFrame(x)
        t = df[-1] - df[-2]
        return t

    PP = (df['High'] + df['Low']) / 2
    Directions = pd.Series.abs(PP[-1] - PP[-1 - n])
    Volatility = pd.Series(
        pd.rolling_sum(
            pd.Series.abs(pd.rolling_apply(PP, 1, lambda x: minusone(x))), n))
Beispiel #44
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, 'boxcar'),
           dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3,
                                                           min_periods=3))
Beispiel #45
0
def rolling_tests(p, d):
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    mad = lambda x: np.fabs(x - x.mean()).mean()
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, 'boxcar'),
           dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3,
                                                           min_periods=3))
Beispiel #46
0
def estimateVolatility(ohlc, N=10, algo='YangZhang'):
    """ 
    Volatility estimation 
    Possible algorithms: ['YangZhang', 'CC']
    
    """

    cc = np.log(ohlc.close / ohlc.close.shift(1))

    if algo == 'YangZhang':  # Yang-zhang volatility
        ho = np.log(ohlc.high / ohlc.open)
        lo = np.log(ohlc.low / ohlc.open)
        co = np.log(ohlc.close / ohlc.open)

        oc = np.log(ohlc.open / ohlc.close.shift(1))
        oc_sq = oc**2

        cc_sq = cc**2

        rs = ho * (ho - co) + lo * (lo - co)

        close_vol = pd.rolling_sum(cc_sq, window=N) * (1.0 / (N - 1.0))
        open_vol = pd.rolling_sum(oc_sq, window=N) * (1.0 / (N - 1.0))
        window_rs = pd.rolling_sum(rs, window=N) * (1.0 / (N - 1.0))

        result = (open_vol + 0.164333 * close_vol +
                  0.835667 * window_rs).apply(np.sqrt) * np.sqrt(252)

        result[:N - 1] = np.nan

    elif algo == 'CC':  # standard close-close estimator
        result = np.sqrt(252) * np.sqrt(((pd.rolling_sum(cc**2, N)) / N))

    else:
        raise ValueError('Unknown algo type.')

    return result * 100
Beispiel #47
0
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4,ksgn='close'): 
    '''
    def KST(df, r1, r2, r3, r4, n1, n2, n3, n4,ksgn='close'): 
    #KST Oscillator  
    确然指标(KST)又称为完定指标,该指标参考长、中、短期的变速率ROC,以了解不同时间循环对市场的影响。
    该指标将数个周期的价格变动率函数作加权以及再平滑绘制长短曲线,其特色在通过修正的价格变动组合来判断趋势,精准掌握转折买卖点。
    
    tst:
       (r1, r2, r3, r4, n1, n2, n3, n4) = (1, 2, 3, 4, 6, 7, 9, 9)
    '''
    '''
    
    【输入】
        df, pd.dataframe格式数据源
        r1..r4,n1..n4,时间长度
        ksgn,列名,一般是:close收盘价
    【输出】    
        df, pd.dataframe格式数据源,
        增加了一栏:ksf,输出数据
    '''
    xnam='kst';
    M = df[ksgn].diff(r1 - 1)  
    N = df[ksgn].shift(r1 - 1)  
    ROC1 = M / N  
    M = df[ksgn].diff(r2 - 1)  
    N = df[ksgn].shift(r2 - 1)  
    ROC2 = M / N  
    M = df[ksgn].diff(r3 - 1)  
    N = df[ksgn].shift(r3 - 1)  
    ROC3 = M / N  
    M = df[ksgn].diff(r4 - 1)  
    N = df[ksgn].shift(r4 - 1)  
    ROC4 = M / N  
    #'KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4)
    KST = pd.Series(pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 + pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4, name = xnam)  
    df = df.join(KST)  
    return df
def CCI(data, n=20, m=7):
    data['cci'] = ta.CCI(np.array(data.high), np.array(data.low),
                         np.array(data.close), n)
    signal = pd.DataFrame(index=data.index)

    #strategy 1
    """
    当 CCI 上穿100,买入,信号为1
    当CCI 下穿-100,卖空,信号为-1
    参数为20
    """
    signal['1'] = ((data['cci'] > 100) & (data['cci'].shift(1) < 100)) * 1 + (
        (data['cci'] < -100) & (data['cci'].shift(1) > -100)) * (-1)
    signal['1'] = signal['1'][signal['1'].isin([1,
                                                -1])].reindex(data.index,
                                                              method='ffill')

    #strategy 2
    """
    CCI 指标上穿100 买入,信号为1
    当CCI 指标回到100,并距离前次上穿100 在m 天之内,我们卖出,信号为-1
    否则信号不变,直到下穿-100 才卖出
    下穿-100 情况同上。
    测得最优参数为n=20,m=8
    """
    signal['2'] = ((data['cci'] > 100) & (data['cci'].shift(1) < 100)) * 1 + (
        (data['cci'] < -100) & (data['cci'].shift(1) > -100)) * (-1)
    signal['2'] = signal['2'] + (
        ((data['cci'] < 100) & (data['cci'].shift(1) > 100)) &
        (pd.rolling_sum(signal['2'], m) > 0)) * (-1) + (
            ((data['cci'] > -100) & (data['cci'].shift(1) < -100)) &
            (pd.rolling_sum(signal['2'], m) < 0)) * 1
    signal['2'] = signal['2'][signal['2'].isin([1,
                                                -1])].reindex(data.index,
                                                              method='ffill')
    signal = signal.fillna(0)
    return signal
Beispiel #49
0
    def stop(trip_data):
        """
        Алгоритм выделения остановок
        """
        df = trip_data.copy()

        n = 10

        df['temp'] = df.apply(lambda x: 1 if x.v < 0.5 else 0, axis=1)

        df['flag'] = pd.rolling_sum(df.temp, n) \
            .apply(lambda x: 1 if x == n else 0) \
            .shift(-n/2+1)

        return np.where(df.flag > 0, 1, 0)
Beispiel #50
0
def money_flow_index(df, col_volume='Volume_BTC', n=14):
    """
    Money Flow Index
    http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:money_flow_index_mfi
    https://en.wikipedia.org/wiki/Money_flow_index
    """
    # 1 typical price
    tp = (df['High'] + df['Low'] + df['Close']) / 3.0
    # 2 money flow
    mf = tp * df[col_volume]
    # 3 positive and negative money flow with n periods
    df['1_Period_Positive_Money_Flow'] = 0.0
    df.loc[df['Up_or_Down'] == 1, '1_Period_Positive_Money_Flow'] = mf
    df['1_Period_Negative_Money_Flow'] = 0.0
    df.loc[df['Up_or_Down'] == 2, '1_Period_Negative_Money_Flow'] = mf
    n_positive_mf = pd.rolling_sum(df['1_Period_Positive_Money_Flow'], n)
    n_negative_mf = pd.rolling_sum(df['1_Period_Negative_Money_Flow'], n)
    # 4 money flow index
    mr = n_positive_mf / n_negative_mf
    # delete intermediate columns
    df.drop('1_Period_Positive_Money_Flow', axis=1, inplace=True)
    df.drop('1_Period_Negative_Money_Flow', axis=1, inplace=True)

    return (100 - (100 / (1 + mr)))
Beispiel #51
0
    def boll(self, start, stop):
        close = []
        for i in range(start, stop):
            close.append(self.stocks[i].close)

        closepd = pandas.Series(close)
        ma = pandas.rolling_sum(closepd, 20) / 20
        md = pandas.rolling_std(closepd, 20)
        up = ma + 2 * md
        dn = ma - 2 * md

        for i in range(start, stop):
            self.stocks[i].boll = ma[i - start]
            self.stocks[i].up = up[i - start]
            self.stocks[i].dn = dn[i - start]
def vortex_indicator(df, n):
    """Calculate the Vortex Indicator for given data.

    Vortex Indicator described here:
        http://www.vortexindicator.com/VFX_VORTEX.PDF
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    i = 0
    TR = [0]
    while i < df.index[-1]:
        Range = max(df.at[i + 1, 'High'], df.at[i, 'Close']) - min(df.at[i + 1, 'Low'], df.at[i, 'Close'])
        TR.append(Range)
        i = i + 1
    i = 0
    VM = [0]
    while i < df.index[-1]:
        Range = abs(df.at[i + 1, 'High'] - df.at[i, 'Low']) - abs(df.at[i + 1, 'Low'] - df.at[i, 'High'])
        VM.append(Range)
        i = i + 1
    VI = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name = 'Vortex_' + str(n))
    df = df.join(VI)
    return df
Beispiel #53
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    # see note around test_rolling_dataframe for logic concerning precision
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True)
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True)
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, 'boxcar'),
           dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3,
                                                           min_periods=3))
Beispiel #54
0
    def rolling_sum(self, data_frame, periods):
        """Calculates the rolling sum

        Parameters
        ----------
        data_frame : DataFrame
            contains time series
        periods : int
            period for rolling sum

        Returns
        -------
        DataFrame
        """
        return pandas.rolling_sum(data_frame, periods)
Beispiel #55
0
def Vortex(df, n):
    """
    Vortex Indicator
    """
    i = 0
    TR = [0]
    while i < len(df) - 1:  # df.index[-1]:
        Range = max(df.get_value(i + 1, 'high'), df.get_value(
            i, 'close')) - min(df.get_value(i + 1, 'low'),
                               df.get_value(i, 'close'))
        TR.append(Range)
        i = i + 1
    i = 0
    VM = [0]
    while i < len(df) - 1:  # df.index[-1]:
        Range = abs(df.get_value(i + 1, 'high') -
                    df.get_value(i, 'low')) - abs(
                        df.get_value(i + 1, 'low') - df.get_value(i, 'high'))
        VM.append(Range)
        i = i + 1
    result = pd.Series(pd.rolling_sum(pd.Series(VM), n) /
                       pd.rolling_sum(pd.Series(TR), n),
                       name='Vortex_' + str(n))
    return out(SETTINGS, df, result)
Beispiel #56
0
def ULTOSC(df):
    TR_l = TR(df)
    BP_l = df['close'] - pd.concat([df['low'], df['close'].shift(1)],
                                   axis=1).min(axis=1)
    UltO = pd.Series(
        (4 * pd.rolling_sum(BP_l, 7) / pd.rolling_sum(TR_l, 7)) +
        (2 * pd.rolling_sum(BP_l, 14) / pd.rolling_sum(TR_l, 14)) +
        (pd.rolling_sum(BP_l, 28) / pd.rolling_sum(TR_l, 28)),
        name='Ultimate_Osc')
    return UltO
Beispiel #57
0
def net_matrix_build(data, station_data):
    '''This function adds the information about the total bikes 
    at station to our net matrix, which currently only contains small 
    integers that represent the net change in bikes over each hour.'''
    for column in data.columns:
        try:
        #we add the values from the Available Docks Guess to 
        #the first row of our net change matrix (which represents 
        #the first hour of July 1, 2013)
            for i in range(793):
                data.ix[24*i, column] += station_data.ix[int(column), 'Available Docks Guess']
        except:
            continue
    #finally, we compute a rolling sum over the columns of the net change matrix        
    data = pd.rolling_sum(data.fillna(False), window = 24, min_periods = 1) 
    return data
Beispiel #58
0
    def load_we_dist(self):
        with h5py.File('../we/pdist.h5', 'r') as f:
            p = f['histograms'][:]
            b = f['midpoints_0'][:]

        pcum = np.zeros_like(p)
        pcum = pd.rolling_sum(p, window=40, min_periods=0, axis=0)
        #pcum[:50,:] = pd.rolling_sum(p[:50,:], window=10, min_periods=0, axis=0)
        #pcum[50:150,:] = pd.rolling_sum(p[50:150,:], window=25, min_periods=0, axis=0)
        #pcum[150:,:] = pd.rolling_sum(p[150:,:], window=100, min_periods=0, axis=0)

        #pcum = np.cumsum(p, axis=0)

        pcum /= pcum.sum(axis=1)[:, np.newaxis]

        return pcum, b
Beispiel #59
0
 def _roll_std(self, sample):
     calculator = lambda x: (x['rate_square'] - x['rate'] * x['rate'] / x['trade_days']) \
                            / (x['trade_days'] - (x['trade_days'] > 1))
     ts = (lambda x: pd.DataFrame(
         dict(rate=x['rate'],
              rate_square=(x['rate'] * x['rate']),
              trade_days=x['trade_days'])).resample('MS', how='sum')
           )(sample)
     result = DataFrameExtended([], index=ts.index.rename('time'))
     for key, value in self._column_names['M'].items():
         # XXX 开根号运算会将精度缩小一半,必须在此之前就处理先前浮点运算带来的浮点误差
         result[value[0]] = _deal_float_error(
             pd.rolling_sum(ts, key).apply(calculator, axis=1))**0.5
     result.total = (lambda x: int(abs(x) > FLOAT_ERR) * x)(calculator(
         ts.sum()))**0.5
     return _deal_float_error(result)
Beispiel #60
0
def get_last_changepoint(resid, max_days_over=5, plot=False):
    # Poor man's changepoint detection
    m_rngs = np.abs(np.diff(resid))
    ucl = 3.267 * np.nanmean(m_rngs)

    if plot:
        plt.plot(m_rngs)
        plt.axhline(y=ucl)

    days_over = np.nan_to_num(pd.rolling_sum(m_rngs > ucl, 30))
    max_arg_over = 0
    args_where_over = np.argwhere(days_over > max_days_over)
    if len(args_where_over) > 0:
        max_arg_over = np.max(args_where_over) + 1

    return max_arg_over