def calculator_talib(data): ETF = { 'open': data[OHLCV_columns[0]].dropna().astype(float), 'high': data[OHLCV_columns[1]].dropna().astype(float), 'low': data[OHLCV_columns[2]].dropna().astype(float), 'close': data[OHLCV_columns[3]].dropna().astype(float), 'volume': data[OHLCV_columns[4]].dropna().astype(float) } def talib2df(talib_output): if type(talib_output) == list: ret = pd.DataFrame(talib_output).transpose() else: ret = pd.Series(talib_output) ret.index = data['收盤價'].index return ret KD = talib2df(abstract.STOCH(ETF, fastk_period=9)) #計算MACD# MACD = talib2df(abstract.MACD(ETF)) #計算OBV# OBV = talib2df(abstract.OBV(ETF)) #計算威廉指數# WILLR = talib2df(abstract.WILLR(ETF)) #ATR 計算# ATR = talib2df(abstract.ATR(ETF)) ETF = pd.DataFrame() ETF = pd.concat([data, KD, MACD, OBV, WILLR, ATR], axis=1) return ETF
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: # SMA - ex Moving Average dataframe[f'hma{shma}'] = qtpylib.hma(dataframe['close'], window=shma) dataframe[f'hma{lhma}'] = qtpylib.hma(dataframe['close'], window=lhma) # dataframe[f'hma{shma_c}'] = qtpylib.hma(dataframe['close'], window=shma_c) # dataframe[f'hma{lhma_c}'] = qtpylib.hma(dataframe['close'], window=lhma_c) dataframe['willr'] = ta.WILLR(dataframe['high'], dataframe['low'], dataframe['close'], timeperiod=pwill) # dataframe['will_mean'] = ta.EMA(dataframe, timeperiod=pmv, price='willr') dataframe['vol_mean'] = ta.EMA(dataframe, timeperiod=pvol, price='volume') return dataframe
def technical_index(self): df = self.max_min_price() df2 = self.institutional_investors() df['RSI'] = abstract.RSI(df) / 100 df['CMO'] =(abstract.CMO(df)+100) / (2 *100) df['MACD'] =(abstract.MACD(df)['macd']+abstract.MACD(df)['macd'].max()) / (2 *abstract.MACD(df)['macd'].max()) df['WILLR'] =(abstract.WILLR(df)+100) / (2 *100) df['WMA'] =abstract.WMA(df) / abstract.WMA(df).max() df['PPO'] =(abstract.PPO(df)+abstract.PPO(df).max()) / (2 *abstract.PPO(df).max()) df['EMA'] =abstract.EMA(df) / abstract.EMA(df).max() df['ROC'] =(abstract.ROC(df)+abstract.ROC(df).max()) / (2 *abstract.ROC(df).max()) df['SMA'] =abstract.SMA(df) / abstract.SMA(df).max() df['TEMA'] =abstract.TEMA(df) / abstract.TEMA(df).max() df['CCI'] =(abstract.CCI(df)+abstract.CCI(df).max()) / (2 *abstract.CCI(df).max()) df['investment_trust'] = (df2['investment_trust'] + df2['investment_trust'].max()) / (2*df2['investment_trust'].max()) df['foreign_investor'] = (df2['foreign_investor'] + df2['foreign_investor'].max()) / (2*df2['foreign_investor'].max()) df = df.drop(columns=['volume', 'open', 'high', 'low', 'close', 'close_max', 'close_min']) df = df.dropna() return df
def TA_processing(dataframe): bias(dataframe, days=[3, 6, 10, 25]) moving_average(dataframe, days=[5, 10, 20]) dataframe['ROC'] = abstract.ROC(dataframe, timeperiod=10) dataframe['MACD'] = abstract.MACD(dataframe, fastperiod=12, slowperiod=26, signalperiod=9)['macd'] dataframe['MACD_signal'] = abstract.MACD(dataframe, fastperiod=12, slowperiod=26, signalperiod=9)['macdsignal'] dataframe['UBBANDS'] = abstract.BBANDS(dataframe, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)['upperband'] dataframe['MBBANDS'] = abstract.BBANDS(dataframe, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)['middleband'] dataframe['LBBANDS'] = abstract.BBANDS(dataframe, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)['lowerband'] dataframe['%K'] = abstract.STOCH(dataframe, fastk_period=9)['slowk']/100 dataframe['%D'] = abstract.STOCH(dataframe, fastk_period=9)['slowd']/100 dataframe['W%R'] = abstract.WILLR(dataframe, timeperiod=14)/100 dataframe['RSI9'] = abstract.RSI(dataframe, timeperiod = 9)/100 dataframe['RSI14'] = abstract.RSI(dataframe, timeperiod = 14)/100 dataframe['CCI'] = abstract.CCI(dataframe, timeperiod=14)/100 counter_daily_potential(dataframe) dataframe['MOM'] = abstract.MOM(dataframe, timeperiod=10) dataframe['DX'] = abstract.DX(dataframe, timeperiod=14)/100 psy_line(dataframe) volumn_ratio(dataframe, d=26) on_balance_volume(dataframe)
def TKE(dataframe, *, length=14, emaperiod=5): """ Source: https://www.tradingview.com/script/Pcbvo0zG/ Author: Dr Yasar ERDINC The calculation is simple: TKE=(RSI+STOCHASTIC+ULTIMATE OSCILLATOR+MFI+WIILIAMS %R+MOMENTUM+CCI)/7 Buy signal: when TKE crosses above 20 value Oversold region: under 20 value Overbought region: over 80 value Another usage of TKE is with its EMA , the default value is defined as 5 bars of EMA of the TKE line, Go long: when TKE crosses above EMALine Go short: when TKE crosses below EMALine Usage: `dataframe['TKE'], dataframe['TKEema'] = TKE1(dataframe)` """ import talib.abstract as ta df = dataframe.copy() # TKE=(RSI+STOCHASTIC+ULTIMATE OSCILLATOR+MFI+WIILIAMS %R+MOMENTUM+CCI)/7 df["rsi"] = ta.RSI(df, timeperiod=length) df['stoch'] = (100 * (df['close'] - df['low'].rolling(window=length).min()) / (df['high'].rolling(window=length).max() - df['low'].rolling(window=length).min())) df["ultosc"] = ta.ULTOSC(df, timeperiod1=7, timeperiod2=14, timeperiod3=28) df["mfi"] = ta.MFI(df, timeperiod=length) df["willr"] = ta.WILLR(df, timeperiod=length) df["mom"] = ta.ROCR100(df, timeperiod=length) df["cci"] = ta.CCI(df, timeperiod=length) df['TKE'] = df[['rsi', 'stoch', 'ultosc', 'mfi', 'willr', 'mom', 'cci']].mean(axis='columns') df["TKEema"] = ta.EMA(df["TKE"], timeperiod=emaperiod) return df["TKE"], df["TKEema"]
def _build_indicators(self, df): if not self.realtime: inputs = df.to_dict(orient="list") for col in inputs: inputs[col] = np.array(inputs[col]) c = df["close"] for n in range(2, 40): inputs["bband_u_" + str(n)], inputs["bband_m_" + str(n)], inputs["bband_l_" + str(n)] = ta.BBANDS( inputs, n) inputs["sma_" + str(n)] = ta.SMA(inputs, timeperiod=n) inputs["adx_" + str(n)] = ta.ADX(inputs, timeperiod=n) # fast_ema = c.ewm(span = n, adjust = False).mean() # slow_ema = c.ewm(span = n*2, adjust = False).mean() # macd1 = fast_ema - slow_ema # macd2 = macd1.ewm(span = int(n*2/3), adjust = False).mean() # macd3 = macd1 - macd2 # inputs["macd_"+str(n)] = macd1.values # inputs["macdsignal_"+str(n)] = macd2.values # inputs["macdhist_"+str(n)] = macd3.values if n != 2: inputs["macd_" + str(n)], inputs["macdsignal_" + str(n)], inputs["macdhist_" + str(n)] = ta.MACD( inputs, n, n * 2, int(n * 2 / 3)) else: inputs["macd_" + str(n)], inputs["macdsignal_" + str(n)], inputs["macdhist_" + str(n)] = ta.MACD( inputs, n, n * 2, 1) # macd = [macd1.values, macd2.values, macd3.values] # for idx, i in enumerate(["macd_"+str(n), "macdsignal_"+str(n), "macdhist_"+str(n)]): # for day in zip(inputs[i], macd[idx]): # print("Type: %s N: %d PD: %.3f TA: %.3f, " % (i, n, day[1], day[0])) inputs["mfi_" + str(n)] = ta.MFI(inputs, n) inputs["ult_" + str(n)] = ta.ULTOSC(inputs, n, n * 2, n * 4) inputs["willr_" + str(n)] = ta.WILLR(inputs, n) inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs) inputs["mom_" + str(n)] = ta.MOM(inputs, n) inputs["volume"] = list(map(lambda x: x / 10000, inputs["volume"])) df = pd.DataFrame().from_dict(inputs) # df = df.ix[100:] # print(df.tail(5)["macd_3"], df.tail(5)["macdsignal_3"], df.tail(5)["macdhist_3"]) return df else: # Build data one-by-one, as if it's coming in one at a time output = pd.DataFrame() sliding_window = pd.DataFrame() for idx, day in df.iterrows(): print("\rNow building day", str(idx), end="", flush=True) day = copy.deepcopy(day) # Avoid reference vs copy bullshit sliding_window = sliding_window.append(day, ignore_index=True) # print(day, type(day)) day_out = {} # print(sliding_window) o = sliding_window["open"].values h = sliding_window["high"].values l = sliding_window["low"].values c_series = sliding_window["close"] c = sliding_window["close"].values # print("----") # print(c) v = sliding_window["volume"].values for t in ["open", "high", "low", "close"]: day_out[t] = sliding_window[t].values[-1] for n in range(2, 40): # time.sleep(0.1) day_out["bband_u_" + str(n)], day_out["bband_m_" + str(n)], day_out[ "bband_l_" + str(n)] = stream.BBANDS(c, n) day_out["sma_" + str(n)] = stream.SMA(c, timeperiod=n) day_out["adx_" + str(n)] = stream.ADX(h, l, c, timeperiod=n) fast_ema = c_series.ewm(span=n, adjust=False).mean() slow_ema = c_series.ewm(span=n * 2, adjust=False).mean() macd1 = fast_ema - slow_ema macd2 = macd1.ewm(span=int(n * 2 / 3), adjust=False).mean() macd3 = macd1 - macd2 day_out["macd_" + str(n)] = macd1.values[-1] day_out["macdsignal_" + str(n)] = macd2.values[-1] day_out["macdhist_" + str(n)] = macd3.values[-1] # if n != 2: # day_out["macd_"+str(n)], day_out["macdsignal_"+str(n)], day_out["macdhist_"+str(n)] = stream.MACD(c, n, n*2, int(n*2/3)) # elif idx > 100: # macd = ta.MACD({"close":c}, n, n*2, 1) # day_out["macd_2"], day_out["macdsignal_2"], day_out["macdhist_2"] = (x[-1] for x in macd) # else: # day_out["macd_2"], day_out["macdsignal_2"], day_out["macdhist_2"] = None, None, None # macd = [macd1.values, macd2.values, macd3.values] # for idx, i in enumerate(["macd_"+str(n), "macdsignal_"+str(n), "macdhist_"+str(n)]): # for day in zip(inputs[i], macd[idx]): # print("Type: %s N: %d PD: %.3f TA: %.3f, " % (i, n, day[1], day[0])) day_out["mfi_" + str(n)] = stream.MFI(h, l, c, v, n) day_out["ult_" + str(n)] = stream.ULTOSC( h, l, c, n, n * 2, n * 4) day_out["willr_" + str(n)] = stream.WILLR(h, l, c, n) day_out["slowk"], day_out["slowd"] = stream.STOCH(h, l, c) day_out["mom_" + str(n)] = stream.MOM(c, n) day_out["volume"] = v[-1] / 10000 # print(day_out["macd_2"], day_out["macdsignal_2"], day_out["macdhist_2"]) output = output.append(day_out, ignore_index=True) # print(output.tail(5)["macd_3"], output.tail(5)["macdsignal_3"], output.tail(5)["macdhist_3"]) return output
def populate_indicators(self, dataframe: DataFrame) -> DataFrame: from technical.util import resample_to_interval from technical.util import resampled_merge dataframe['sma'] = ta.SMA(dataframe, timeperiod=40) # EMA - Exponential Moving Average dataframe['ema3'] = ta.EMA(dataframe, timeperiod=3) dataframe['ema5'] = ta.EMA(dataframe, timeperiod=5) dataframe['ema10'] = ta.EMA(dataframe, timeperiod=10) dataframe['ema20'] = ta.EMA(dataframe, timeperiod=20) dataframe['ema50'] = ta.EMA(dataframe, timeperiod=50) dataframe['ema100'] = ta.EMA(dataframe, timeperiod=100) dataframe['ema200'] = ta.EMA(dataframe, timeperiod=200) # Stoch stoch = ta.STOCH(dataframe, fastk_period=5, slowk_period=2, slowk_matype=0, slowd_period=2, slowd_matype=0) dataframe['slowd15'] = stoch['slowd'] dataframe['slowk15'] = stoch['slowk'] stoch = ta.STOCH(dataframe, fastk_period=10, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0) dataframe['slowd'] = stoch['slowd'] dataframe['slowk'] = stoch['slowk'] # Stoch fast stoch_fast = ta.STOCHF(dataframe) dataframe['fastd'] = stoch_fast['fastd'] dataframe['fastk'] = stoch_fast['fastk'] dataframe['minus_di'] = ta.MINUS_DI(dataframe, timeperiod=24) dataframe['plus_di'] = ta.PLUS_DI(dataframe, timeperiod=24) dataframe['blower'] = ta.BBANDS(dataframe, nbdevup=2, nbdevdn=2)['lowerband'] # Bollinger bands bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(dataframe), window=20, stds=2) dataframe['bb_lowerband'] = bollinger['lower'] dataframe['bb_middleband'] = bollinger['mid'] dataframe['bb_upperband'] = bollinger['upper'] dataframe['sma3'] = ta.SMA(dataframe, timeperiod=3) dataframe['sma5'] = ta.SMA(dataframe, timeperiod=5) dataframe['sma10'] = ta.SMA(dataframe, timeperiod=10) dataframe['sma20'] = ta.SMA(dataframe, timeperiod=20) dataframe['sma50'] = ta.SMA(dataframe, timeperiod=50) dataframe['sma100'] = ta.SMA(dataframe, timeperiod=100) dataframe['sma220'] = ta.SMA(dataframe, timeperiod=220) dataframe['sma200'] = ta.SMA(dataframe, timeperiod=200) dataframe['willr'] = ta.WILLR(dataframe, timeperiod=28) # resample our dataframes dataframe_short = resample_to_interval(dataframe, self.get_ticker_indicator() * 3) dataframe_long = resample_to_interval(dataframe, self.get_ticker_indicator() * 7) # compute our RSI's dataframe_short['rsi'] = ta.RSI(dataframe_short, timeperiod=14) dataframe_long['rsi'] = ta.RSI(dataframe_long, timeperiod=14) dataframe['cci'] = ta.CCI(dataframe, timeperiod=20) dataframe['mfi'] = ta.MFI(dataframe) dataframe['CDLHAMMER'] = ta.CDLHAMMER(dataframe) # merge dataframe back together dataframe = resampled_merge(dataframe, dataframe_short) dataframe = resampled_merge(dataframe, dataframe_long) dataframe['rsi'] = ta.RSI(dataframe, timeperiod=14) dataframe.fillna(method='ffill', inplace=True) # Inverse Fisher transform on RSI, values [-1.0, 1.0] (https://goo .gl/2JGGoy) dataframe['fisher_rsi'] = fishers_inverse(dataframe['rsi']) # Inverse Fisher transform on RSI normalized, value [0.0, 100.0] (https://goo.gl/2JGGoy) dataframe['fisher_rsi_norma'] = 50 * (dataframe['fisher_rsi'] + 1) dataframe['resample_rsi_2'] = dataframe['resample_{}_rsi'.format( self.get_ticker_indicator() * 3)] dataframe['resample_rsi_8'] = dataframe['resample_{}_rsi'.format( self.get_ticker_indicator() * 7)] dataframe['average'] = (dataframe['close'] + dataframe['open'] + dataframe['high'] + dataframe['low']) / 4 return dataframe
def _build_indicators(num_secs): # accepts a list of one-day Series sec_idx_range = range(num_secs) sliding_window = [] # list of pd.DataFrames data = yield for datum in data: sliding_window += [_rename_columns(datum)] current_day = 0 while True: passes_validity_check, num_validation_iterations = False, 0 # time.sleep(1) while not passes_validity_check: for i in sec_idx_range: # for each security # print("Current day:", current_day) if current_day != 0: if current_day > 170 and num_validation_iterations == 0: sliding_window[i] = sliding_window[i].iloc[ 1:] # pop the first for datum in data: if num_validation_iterations == 0: sliding_window[i] = sliding_window[i].append( _rename_columns(datum)) data_with_ind = [] series = sliding_window[i] series = series.reset_index(drop=True) inputs = series.to_dict(orient="list") for col in inputs: inputs[col] = np.array(inputs[col]) c = series.close for n in range(2, 40): inputs["bband_u_" + str(n)], inputs["bband_m_" + str(n)], inputs["bband_l_" + str(n)] = ta.BBANDS( inputs, n) inputs["sma_" + str(n)] = ta.SMA(inputs, timeperiod=n) inputs["adx_" + str(n)] = ta.ADX(inputs, timeperiod=n) # print("\nINPUTS:", inputs) # if current_day > n*2: fast_ema = c.ewm(span=n).mean() slow_ema = c.ewm(span=n * 2).mean() # print(fast_ema, slow_ema) macd1 = fast_ema - slow_ema macd2 = macd1.ewm(span=n * 2 / 3).mean() macd3 = macd1 - macd2 inputs["macd_" + str(n)], inputs["macdsignal_" + str(n)], inputs[ "macdhist_" + str(n)] = macd1.iloc[ -1], macd2.iloc[-1], macd3.iloc[-1] if current_day == 160: print(n) print(macd1, macd2, macd3) sys.exit(69) # else: # inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = [np.NaN]*3 inputs["mfi_" + str(n)] = ta.MFI(inputs, n) inputs["ult_" + str(n)] = ta.ULTOSC( inputs, n, n * 2, n * 4) inputs["willr_" + str(n)] = ta.WILLR(inputs, n) inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs) inputs["mom_" + str(n)] = ta.MOM(inputs, n) inputs["mom_" + str(n)] = ta.MOM(inputs, n) inputs["volume"] = list( map(lambda x: x / 10000, inputs["volume"])) series = pd.DataFrame().from_dict(inputs) price = series["close"].iloc[-1] if isinstance(price, np.ndarray): price = price.tolist() # for idx, val in series.isnull().any(axis=1).iteritems(): # if val == True: # series.drop(idx, inplace = True) # try: # price[idx] = None # except IndexError: #drop the security # print("Error, failed to drop price on index", idx) # sys.exit(1) # # print("Dropped index:", idx) # for i, p in reversed(list(enumerate(price))): # actual_idx = len(price) - 1 - i # if p == None: # price.pop(actual_idx) # print(series["adx_10"]) X = series.iloc[-1].values if current_day < 170: passes_validity_check = True elif not np.isnan(X).any(): passes_validity_check = True # if num_validation_iterations != 0: # with pd.option_context('display.max_rows', None, 'display.max_columns', None): # print(series.iloc[-1]) # sys.exit(1) else: num_validation_iterations += 1 print("Reevaluating, iteration", num_validation_iterations, "day:", current_day) # with pd.option_context('display.max_rows', None, 'display.max_columns', None): # print(series.iloc[-1]) # sys.exit(1) # if current_day > 170: # print(series.iloc[-1].values) # if np.isnan(X).any() and current_day > 170: # # with pd.option_context('display.max_rows', None, 'display.max_columns', None): # # print(series) # print(sliding_window[0]) # break # print("ADX_10:\n", series["adx_10"].tail(3)) # if current_day == 900: # print(series) # print(X) data_with_ind += [{"data": X, "price": round(price, 2)}] data = yield data_with_ind current_day += 1
def build_data_to_dict(secs, raw = False): PICKLE_NAME = "_".join(s[5:] for s in secs) print("SECURITIES: ", PICKLE_NAME.split("_")) if not os.path.isfile("./stock_data/" + PICKLE_NAME + "_data.pickle"): print("No pickle found, getting data...") # df = pd.concat([quandl.get("WIKI/AAPL"), quandl.get("WIKI/F"), quandl.get("WIKI/XOM")]) df = pd.DataFrame() Y = pd.Series() prices = [] for sec in secs: sec_df = quandl.get(sec) if "Adj. Close" in sec_df.columns: sec_df = sec_df[["Adj. Open", "Adj. High", "Adj. Low", "Adj. Close", "Adj. Volume"]] sec_df.rename(columns=lambda x: x[5:].lower(), inplace=True) # Remove the "Adj. " and make lowercase elif "Close" in sec_df.columns: sec_df = sec_df[["Open", "High", "Low", "Close", "Volume"]] sec_df.rename(columns=lambda x: x.lower(), inplace=True) # make lowercase print("Calculating output for", sec) price = sec_df['close'].values minIdxs = argrelextrema(price, np.less) maxIdxs = argrelextrema(price, np.greater) sec_Y = pd.Series(name="signal", dtype=np.ndarray, index=range(0, len(price))) n=0 for _, idx in np.ndenumerate(minIdxs): if idx < MIN_MAX_PERIOD: continue max_price = max(price[idx: idx + MIN_MAX_PERIOD]) if ((max_price - price[idx]) / price[idx]) > HI_LO_DIFF: #if the difference between max and min is > 2% sec_Y.set_value(idx, np.array([1, 0, 0], np.int32)) n+=1 print("MINS:", n) n=0 for _, idx in np.ndenumerate(maxIdxs): if idx < MIN_MAX_PERIOD: continue min_price = min(price[idx: idx + MIN_MAX_PERIOD]) if ((price[idx] - min_price)/ min_price) > HI_LO_DIFF: #if the difference between max and min is > 2% sec_Y.set_value(idx, np.array([0, 0, 1], np.int32)) n+=1 print("MAXS:", n) for idx in pd.isnull(sec_Y).nonzero()[0]: sec_Y.set_value(idx, np.array([0, 1, 0], np.int32)) sec_df.reset_index(drop=True, inplace = True) if isinstance(price, np.ndarray): price = price.tolist() ''' INDICATORS ''' # print(len(sec_df), len(sec_Y)) print("Building indicators...") inputs = sec_df.to_dict(orient="list") for col in inputs: inputs[col] = np.array(inputs[col]) for n in range(2, 40): inputs["bband_u_"+str(n)], inputs["bband_m_"+str(n)], inputs["bband_l_"+str(n)] = ta.BBANDS(inputs, n) inputs["sma_"+str(n)] = ta.SMA(inputs, timeperiod = n) inputs["adx_"+str(n)] = ta.ADX(inputs, timeperiod = n) inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = ta.MACD(inputs, n, n*2, n*2/3) inputs["mfi_"+str(n)] = ta.MFI(inputs, n) inputs["ult_"+str(n)] = ta.ULTOSC(inputs, n, n*2, n*4) inputs["willr_"+str(n)] = ta.WILLR(inputs, n) inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["volume"] = list(map(lambda x: x/10000, inputs["volume"])) sec_df = pd.DataFrame().from_dict(inputs) # print(sec_df.isnull().any(axis=1)) for idx, val in sec_df.isnull().any(axis=1).iteritems(): if val == True: # print(idx, val) sec_df.drop(idx, inplace = True) sec_Y.drop(idx, inplace = True) price.pop(idx) prices.append(price) df = pd.concat([df, sec_df]) Y = pd.concat([Y, sec_Y]) prices = [j for i in prices for j in i] # spooky magic ''' BUILD NEURAL NET INPUTS ''' Y = np.vstack(Y.values) X = df.values if not raw: scaler = prep.StandardScaler().fit(X) X_norm = scaler.transform(X) from sklearn.externals import joblib joblib.dump(scaler, "./stock_data/" + sec + ".scaler") else: X_norm = X trX, testX, trY, testY= train_test_split(X_norm, Y, test_size = 0.1, random_state=0) # print("Pickling...") output = {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price} pickle.dump(output, open("./stock_data/" + (PICKLE_NAME if not raw else PICKLE_NAME + "_raw") + "_data.pickle", "wb")) return output else: print("Pickle found, loading...") _data = pickle.load(open("./stock_data/" + PICKLE_NAME + "_data.pickle", "rb")) trX, trY, testX, testY, price, X_norm, Y = _data["trX"], _data["trY"], _data["testX"], _data["testY"], _data["price"], _data["X_norm"], _data["Y"] return {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price}
def _build_indicators(data): # sliding_window = [] while True: data_with_ind = [] for df in data: df = copy.deepcopy(df) if "Adj. Close" in df.columns: df = df[["Adj. Open", "Adj. High", "Adj. Low", "Adj. Close", "Adj. Volume"]] df.rename(columns=lambda x: x[5:].lower(), inplace=True) # Remove the "Adj. " and make lowercase elif "Close" in df.columns: df = df[["Open", "High", "Low", "Close", "Volume"]] df.rename(columns=lambda x: x.lower(), inplace=True) # make lowercase df.reset_index(drop=True, inplace = True) inputs = df.to_dict(orient="list") for col in inputs: inputs[col] = np.array(inputs[col]) for n in range(2, 40): inputs["bband_u_"+str(n)], inputs["bband_m_"+str(n)], inputs["bband_l_"+str(n)] = ta.BBANDS(inputs, n) inputs["sma_"+str(n)] = ta.SMA(inputs, timeperiod = n) inputs["adx_"+str(n)] = ta.ADX(inputs, timeperiod = n) inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = ta.MACD(inputs, n, n*2, n*2/3) inputs["mfi_"+str(n)] = ta.MFI(inputs, n) inputs["ult_"+str(n)] = ta.ULTOSC(inputs, n, n*2, n*4) inputs["willr_"+str(n)] = ta.WILLR(inputs, n) inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["volume"] = list(map(lambda x: x/10000, inputs["volume"])) df = pd.DataFrame().from_dict(inputs) price = df["close"].values if isinstance(price, np.ndarray): price = price.tolist() for idx, val in df.isnull().any(axis=1).iteritems(): if val == True: df.drop(idx, inplace = True) try: price[idx] = None except IndexError: #drop the security print("Error, failed to drop price on index", idx) sys.exit(1) # print("Dropped index:", idx) for i, p in reversed(list(enumerate(price))): actual_idx = len(price) - 1 - i if p == None: price.pop(actual_idx) print(df["adx_10"]) X = df.values data_with_ind += [{"data": X, "price": price}] return data_with_ind
def build_data(raw = False, random_split = True, start_date = None, end_date = None, test_proportion = 0.1): # if len(sec) == 1 and os.path.isfile(secs[0]): #it's a file # with open(secs[0]) as f: # secs = ["WIKI/" + line.strip() for line in f] # print("SECURITIES: ", s[5:] for s in secs) with open("stock_data/invalid_stocks.txt", "r+") as f: invalid_stock_codes = [line.strip() for line in f] f = open("stock_data/invalid_stocks.txt", "a") stock_code = yield while True and stock_code is not None: valid_stock = False while not valid_stock: if "." in stock_code: stock_code = yield None continue if stock_code in invalid_stock_codes: # print("Skipping security", sec) stock_code = yield None continue valid_stock = True sec = stock_code.split("/")[1] # Just the ticker, not the database code pickle_name = sec if raw: pickle_name += "_raw" if not random_split: pickle_name += "_notrand" if start_date and end_date: pickle_name += start_date + "to" + end_date elif start_date: pickle_name += start_date elif end_date: pickle_name += "to" + end_date if not os.path.isfile("./stock_data/" + pickle_name + "_data.pickle"): # print("No pickle found, getting data for", sec) try: # print("Getting data for", stock_code) df = quandl.get(stock_code, start_date = start_date, end_date = end_date) except quandl.errors.quandl_error.NotFoundError: invalid_stock_codes += [stock_code] f.write(stock_code + "\n") stock_code = yield None continue if "Adj. Close" in df.columns: df = df[["Adj. Open", "Adj. High", "Adj. Low", "Adj. Close", "Adj. Volume"]] df.rename(columns=lambda x: x[5:].lower(), inplace=True) # Remove the "Adj. " and make lowercase elif "Close" in df.columns: df = df[["Open", "High", "Low", "Close", "Volume"]] df.rename(columns=lambda x: x.lower(), inplace=True) # make lowercase price = df['close'].values minIdxs = argrelextrema(price, np.less) maxIdxs = argrelextrema(price, np.greater) Y = pd.Series(name="signal", dtype=np.ndarray, index=range(0, len(price))) n=0 for _, idx in np.ndenumerate(minIdxs): # if idx < MIN_MAX_PERIOD: continue max_price = max(price[idx: idx + MIN_MAX_PERIOD]) if ((max_price - price[idx]) / price[idx]) > HI_LO_DIFF: #if the difference between max and min is > X% Y.set_value(idx, np.array([1., 0.], np.float32)) n+=1 # print("MINS:", n) n=0 for _, idx in np.ndenumerate(maxIdxs): # if idx < MIN_MAX_PERIOD: continue min_price = min(price[idx: idx + MIN_MAX_PERIOD]) if ((price[idx] - min_price)/ min_price) > HI_LO_DIFF: #if the difference between max and min is > X% Y.set_value(idx, np.array([0., 1.], np.float32)) n+=1 # print("MAXS:", n) _min_idx, _max_idx = 0, 0 for i, y in np.ndenumerate(Y.values): if np.array_equal(y, [1., 0.]): _min_idx = i[0] elif np.array_equal(y, [0., 1.]): _max_idx = i[0] else: if _min_idx > _max_idx: s = np.array([1., 0.]) elif _max_idx > _min_idx: s = np.array([0., 1.]) else: s = np.array([0., 0.]) # no action taken, only occurs at the beginnings of datasets, afaik Y.set_value(i, s, np.float32) # x = list(zip(price[0:50], Y.values[0:50])) # for i in x: # print("{0:.2f} -- {1}".format(i[0], "sell" if np.array_equal(i[1], [0, 1]) else "buy" if np.array_equal(i[1], [1, 0]) else "nothing")) df.reset_index(drop=True, inplace = True) if isinstance(price, np.ndarray): price = price.tolist() ''' INDICATORS ''' # print(len(df), len(Y)) # print("Building indicators...") inputs = df.to_dict(orient="list") for col in inputs: inputs[col] = np.array(inputs[col]) for n in range(2, 40): inputs["bband_u_"+str(n)], inputs["bband_m_"+str(n)], inputs["bband_l_"+str(n)] = ta.BBANDS(inputs, n) inputs["sma_"+str(n)] = ta.SMA(inputs, timeperiod = n) inputs["adx_"+str(n)] = ta.ADX(inputs, timeperiod = n) inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = ta.MACD(inputs, n, n*2, n*2/3) inputs["mfi_"+str(n)] = ta.MFI(inputs, n) inputs["ult_"+str(n)] = ta.ULTOSC(inputs, n, n*2, n*4) inputs["willr_"+str(n)] = ta.WILLR(inputs, n) inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["volume"] = list(map(lambda x: x/10000, inputs["volume"])) df = pd.DataFrame().from_dict(inputs) broken = False for idx, val in reversed(list(df.isnull().any(axis=1).iteritems())): if val == True: # print(actual_idx, val) df.drop(idx, inplace = True) Y.drop(idx, inplace = True) try: # price[actual_idx] = None price.pop(idx) except IndexError: #drop the security # print("Error, dropping security", sec) broken = True break # print(list(df.isnull().any(axis=1).iteritems())) # print("PRICES", price) # print(len(price), len(df.values)) # for i, p in reversed(list(enumerate(price))): # actual_idx = len(price) - 1 - i # if p is None: # print(actual_idx) # price.pop(actual_idx) ''' BUILD NEURAL NET INPUTS ''' if not broken: Y = np.vstack(Y.values) print(df["adx_10"]) X = df.values # print(X[0:2]) if not raw: rand = "_notrand" if not random_split else "" if not os.path.isfile("./stock_data/" + sec + rand + ".scaler"): scaler = prep.StandardScaler().fit(X) X_norm = scaler.transform(X) joblib.dump(scaler, "./stock_data/" + sec + rand + ".scaler") else: scaler = joblib.load("./stock_data/" + sec + rand + ".scaler") X_norm = scaler.transform(X) else: X_norm = X if random_split: trX, testX, trY, testY = train_test_split(X_norm, Y, test_size = test_proportion, random_state=0) else: # just clips the test data off the end l = len(X_norm) trX, testX = X_norm[:int(-test_proportion*l)], X_norm[int(-test_proportion*l):] trY, testY = Y[:int(-test_proportion*l)], Y[int(-test_proportion*l):] # print("Pickling...") output = {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price} pickle.dump(output, open("./stock_data/" + pickle_name + "_data.pickle", "wb")) stock_code = yield output else: invalid_stock_codes += [stock_code] f.write(stock_code + "\n") stock_code = yield None else: # print("Pickle found, loading...") _data = pickle.load(open("./stock_data/" + pickle_name + "_data.pickle", "rb")) trX, trY, testX, testY, price, X_norm, Y = _data["trX"], _data["trY"], _data["testX"], _data["testY"], _data["price"], _data["X_norm"], _data["Y"] stock_code = yield {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price}
def WILLR(self): #7 willr = abstract.WILLR(self.company_stock, timeperiod=14) self.company_stock['WILLR'] = willr