def ultosc(self): real = tb.ULTOSC(self.dataframe, timeperiod1=7, timeperiod2=14, timeperiod3=28) value = real[len(real) - 1] if value > 70: return "buy" elif value < 40: return "sell" else: return "neutral"
def ULTOSC(self): REAL = tb.ULTOSC(self.dataframe, timeperiod1=7, timeperiod2=14, timeperiod3=28) value = REAL[len(REAL) - 1] #print("ULTOSC: " + str(value)) if (value > 50): return "buy" elif (value < 50): return "sell" else: return "neutral"
def evaluate_ultimate_oscilator(self, prefix="uo", impact_buy=1, impact_sell=1): """ evaluates the ultimate_oscilator :param dataframe: :param period: :param prefix: :return: """ self._weights(impact_buy, impact_sell) dataframe = self.dataframe name = f"{prefix}" dataframe[name] = ta.ULTOSC(dataframe) dataframe.loc[((dataframe[name] < 30)), f"buy_{name}"] = 1 * impact_buy dataframe.loc[((dataframe[name] > 70)), f"sell_{name}"] = 1 * impact_sell
def evaluate_ultimate_oscilator(self, prefix="uo", impact_buy=1, impact_sell=1): """ evaluates the osc :param dataframe: :param period: :param prefix: :return: """ self._weights(impact_buy, impact_sell) dataframe = self.dataframe name = '{}'.format(prefix) dataframe[name] = ta.ULTOSC(dataframe) dataframe.loc[((dataframe[name] < 30)), 'buy_{}'.format(name)] = (1 * impact_buy) dataframe.loc[((dataframe[name] > 70)), 'sell_{}'.format(name)] = (1 * impact_sell)
def TKE(dataframe, *, length=14, emaperiod=5): """ Source: https://www.tradingview.com/script/Pcbvo0zG/ Author: Dr Yasar ERDINC The calculation is simple: TKE=(RSI+STOCHASTIC+ULTIMATE OSCILLATOR+MFI+WIILIAMS %R+MOMENTUM+CCI)/7 Buy signal: when TKE crosses above 20 value Oversold region: under 20 value Overbought region: over 80 value Another usage of TKE is with its EMA , the default value is defined as 5 bars of EMA of the TKE line, Go long: when TKE crosses above EMALine Go short: when TKE crosses below EMALine Usage: `dataframe['TKE'], dataframe['TKEema'] = TKE1(dataframe)` """ import talib.abstract as ta df = dataframe.copy() # TKE=(RSI+STOCHASTIC+ULTIMATE OSCILLATOR+MFI+WIILIAMS %R+MOMENTUM+CCI)/7 df["rsi"] = ta.RSI(df, timeperiod=length) df['stoch'] = (100 * (df['close'] - df['low'].rolling(window=length).min()) / (df['high'].rolling(window=length).max() - df['low'].rolling(window=length).min())) df["ultosc"] = ta.ULTOSC(df, timeperiod1=7, timeperiod2=14, timeperiod3=28) df["mfi"] = ta.MFI(df, timeperiod=length) df["willr"] = ta.WILLR(df, timeperiod=length) df["mom"] = ta.ROCR100(df, timeperiod=length) df["cci"] = ta.CCI(df, timeperiod=length) df['TKE'] = df[['rsi', 'stoch', 'ultosc', 'mfi', 'willr', 'mom', 'cci']].mean(axis='columns') df["TKEema"] = ta.EMA(df["TKE"], timeperiod=emaperiod) return df["TKE"], df["TKEema"]
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: # Momentum Indicators # ------------------------------------ # ADX dataframe['adx'] = ta.ADX(dataframe) # Plus Directional Indicator / Movement dataframe['plus_dm'] = ta.PLUS_DM(dataframe) dataframe['plus_di'] = ta.PLUS_DI(dataframe) # # Minus Directional Indicator / Movement dataframe['minus_dm'] = ta.MINUS_DM(dataframe) dataframe['minus_di'] = ta.MINUS_DI(dataframe) # Aroon, Aroon Oscillator aroon = ta.AROON(dataframe) dataframe['aroonup'] = aroon['aroonup'] dataframe['aroondown'] = aroon['aroondown'] dataframe['aroonosc'] = ta.AROONOSC(dataframe) # Awesome Oscillator dataframe['ao'] = qtpylib.awesome_oscillator(dataframe) # # Keltner Channel # keltner = qtpylib.keltner_channel(dataframe) # dataframe["kc_upperband"] = keltner["upper"] # dataframe["kc_lowerband"] = keltner["lower"] # dataframe["kc_middleband"] = keltner["mid"] # dataframe["kc_percent"] = ( # (dataframe["close"] - dataframe["kc_lowerband"]) / # (dataframe["kc_upperband"] - dataframe["kc_lowerband"]) # ) # dataframe["kc_width"] = ( # (dataframe["kc_upperband"] - dataframe["kc_lowerband"]) / dataframe["kc_middleband"] # ) # Ultimate Oscillator dataframe['uo'] = ta.ULTOSC(dataframe) # Commodity Channel Index: values [Oversold:-100, Overbought:100] dataframe['cci'] = ta.CCI(dataframe) # RSI dataframe['rsi'] = ta.RSI(dataframe) # Inverse Fisher transform on RSI: values [-1.0, 1.0] (https://goo.gl/2JGGoy) rsi = 0.1 * (dataframe['rsi'] - 50) dataframe['fisher_rsi'] = (np.exp(2 * rsi) - 1) / (np.exp(2 * rsi) + 1) # Inverse Fisher transform on RSI normalized: values [0.0, 100.0] (https://goo.gl/2JGGoy) dataframe['fisher_rsi_norma'] = 50 * (dataframe['fisher_rsi'] + 1) # Stochastic Slow stoch = ta.STOCH(dataframe) dataframe['slowd'] = stoch['slowd'] dataframe['slowk'] = stoch['slowk'] # Stochastic Fast stoch_fast = ta.STOCHF(dataframe) dataframe['fastd'] = stoch_fast['fastd'] dataframe['fastk'] = stoch_fast['fastk'] # Stochastic RSI stoch_rsi = ta.STOCHRSI(dataframe) dataframe['fastd_rsi'] = stoch_rsi['fastd'] dataframe['fastk_rsi'] = stoch_rsi['fastk'] # MACD macd = ta.MACD(dataframe) dataframe['macd'] = macd['macd'] dataframe['macdsignal'] = macd['macdsignal'] dataframe['macdhist'] = macd['macdhist'] # MFI dataframe['mfi'] = ta.MFI(dataframe) # # ROC dataframe['roc'] = ta.ROC(dataframe) # Overlap Studies # ------------------------------------ # # Bollinger Bands # bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(dataframe), window=20, stds=2) # dataframe['bb_lowerband'] = bollinger['lower'] # dataframe['bb_middleband'] = bollinger['mid'] # dataframe['bb_upperband'] = bollinger['upper'] # dataframe["bb_percent"] = ( # (dataframe["close"] - dataframe["bb_lowerband"]) / # (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) # ) # dataframe["bb_width"] = ( # (dataframe["bb_upperband"] - dataframe["bb_lowerband"]) / dataframe["bb_middleband"] # ) # # Bollinger Bands - Weighted (EMA based instead of SMA) # weighted_bollinger = qtpylib.weighted_bollinger_bands( # qtpylib.typical_price(dataframe), window=20, stds=2 # ) # dataframe["wbb_upperband"] = weighted_bollinger["upper"] # dataframe["wbb_lowerband"] = weighted_bollinger["lower"] # dataframe["wbb_middleband"] = weighted_bollinger["mid"] # dataframe["wbb_percent"] = ( # (dataframe["close"] - dataframe["wbb_lowerband"]) / # (dataframe["wbb_upperband"] - dataframe["wbb_lowerband"]) # ) # dataframe["wbb_width"] = ( # (dataframe["wbb_upperband"] - dataframe["wbb_lowerband"]) / # dataframe["wbb_middleband"] # ) # # EMA - Exponential Moving Average # dataframe['ema3'] = ta.EMA(dataframe, timeperiod=3) # dataframe['ema5'] = ta.EMA(dataframe, timeperiod=5) # dataframe['ema10'] = ta.EMA(dataframe, timeperiod=10) # dataframe['ema21'] = ta.EMA(dataframe, timeperiod=21) # dataframe['ema50'] = ta.EMA(dataframe, timeperiod=50) # dataframe['ema100'] = ta.EMA(dataframe, timeperiod=100) # # SMA - Simple Moving Average # dataframe['sma3'] = ta.SMA(dataframe, timeperiod=3) # dataframe['sma5'] = ta.SMA(dataframe, timeperiod=5) # dataframe['sma10'] = ta.SMA(dataframe, timeperiod=10) # dataframe['sma21'] = ta.SMA(dataframe, timeperiod=21) # dataframe['sma50'] = ta.SMA(dataframe, timeperiod=50) # dataframe['sma100'] = ta.SMA(dataframe, timeperiod=100) # Parabolic SAR # dataframe['sar'] = ta.SAR(dataframe) # TEMA - Triple Exponential Moving Average # dataframe['tema'] = ta.TEMA(dataframe, timeperiod=9) # # Cycle Indicator # # ------------------------------------ # # Hilbert Transform Indicator - SineWave # hilbert = ta.HT_SINE(dataframe) # dataframe['htsine'] = hilbert['sine'] # dataframe['htleadsine'] = hilbert['leadsine'] # # Pattern Recognition - Bullish candlestick patterns # # ------------------------------------ # # Hammer: values [0, 100] # dataframe['CDLHAMMER'] = ta.CDLHAMMER(dataframe) # # Inverted Hammer: values [0, 100] # dataframe['CDLINVERTEDHAMMER'] = ta.CDLINVERTEDHAMMER(dataframe) # # Dragonfly Doji: values [0, 100] # dataframe['CDLDRAGONFLYDOJI'] = ta.CDLDRAGONFLYDOJI(dataframe) # # Piercing Line: values [0, 100] # dataframe['CDLPIERCING'] = ta.CDLPIERCING(dataframe) # values [0, 100] # # Morningstar: values [0, 100] # dataframe['CDLMORNINGSTAR'] = ta.CDLMORNINGSTAR(dataframe) # values [0, 100] # # Three White Soldiers: values [0, 100] # dataframe['CDL3WHITESOLDIERS'] = ta.CDL3WHITESOLDIERS(dataframe) # values [0, 100] # # Pattern Recognition - Bearish candlestick patterns # # ------------------------------------ # # Hanging Man: values [0, 100] # dataframe['CDLHANGINGMAN'] = ta.CDLHANGINGMAN(dataframe) # # Shooting Star: values [0, 100] # dataframe['CDLSHOOTINGSTAR'] = ta.CDLSHOOTINGSTAR(dataframe) # # Gravestone Doji: values [0, 100] # dataframe['CDLGRAVESTONEDOJI'] = ta.CDLGRAVESTONEDOJI(dataframe) # # Dark Cloud Cover: values [0, 100] # dataframe['CDLDARKCLOUDCOVER'] = ta.CDLDARKCLOUDCOVER(dataframe) # # Evening Doji Star: values [0, 100] # dataframe['CDLEVENINGDOJISTAR'] = ta.CDLEVENINGDOJISTAR(dataframe) # # Evening Star: values [0, 100] # dataframe['CDLEVENINGSTAR'] = ta.CDLEVENINGSTAR(dataframe) # # Pattern Recognition - Bullish/Bearish candlestick patterns # # ------------------------------------ # # Three Line Strike: values [0, -100, 100] # dataframe['CDL3LINESTRIKE'] = ta.CDL3LINESTRIKE(dataframe) # # Spinning Top: values [0, -100, 100] # dataframe['CDLSPINNINGTOP'] = ta.CDLSPINNINGTOP(dataframe) # values [0, -100, 100] # # Engulfing: values [0, -100, 100] # dataframe['CDLENGULFING'] = ta.CDLENGULFING(dataframe) # values [0, -100, 100] # # Harami: values [0, -100, 100] # dataframe['CDLHARAMI'] = ta.CDLHARAMI(dataframe) # values [0, -100, 100] # # Three Outside Up/Down: values [0, -100, 100] # dataframe['CDL3OUTSIDE'] = ta.CDL3OUTSIDE(dataframe) # values [0, -100, 100] # # Three Inside Up/Down: values [0, -100, 100] # dataframe['CDL3INSIDE'] = ta.CDL3INSIDE(dataframe) # values [0, -100, 100] # # Chart type # # ------------------------------------ # # Heikin Ashi Strategy # heikinashi = qtpylib.heikinashi(dataframe) # dataframe['ha_open'] = heikinashi['open'] # dataframe['ha_close'] = heikinashi['close'] # dataframe['ha_high'] = heikinashi['high'] # dataframe['ha_low'] = heikinashi['low'] # Retrieve best bid and best ask from the orderbook # ------------------------------------ """ # first check if dataprovider is available if self.dp: if self.dp.runmode in ('live', 'dry_run'): ob = self.dp.orderbook(metadata['pair'], 1) dataframe['best_bid'] = ob['bids'][0][0] dataframe['best_ask'] = ob['asks'][0][0] """ return dataframe
def _build_indicators(self, df): if not self.realtime: inputs = df.to_dict(orient="list") for col in inputs: inputs[col] = np.array(inputs[col]) c = df["close"] for n in range(2, 40): inputs["bband_u_" + str(n)], inputs["bband_m_" + str(n)], inputs["bband_l_" + str(n)] = ta.BBANDS( inputs, n) inputs["sma_" + str(n)] = ta.SMA(inputs, timeperiod=n) inputs["adx_" + str(n)] = ta.ADX(inputs, timeperiod=n) # fast_ema = c.ewm(span = n, adjust = False).mean() # slow_ema = c.ewm(span = n*2, adjust = False).mean() # macd1 = fast_ema - slow_ema # macd2 = macd1.ewm(span = int(n*2/3), adjust = False).mean() # macd3 = macd1 - macd2 # inputs["macd_"+str(n)] = macd1.values # inputs["macdsignal_"+str(n)] = macd2.values # inputs["macdhist_"+str(n)] = macd3.values if n != 2: inputs["macd_" + str(n)], inputs["macdsignal_" + str(n)], inputs["macdhist_" + str(n)] = ta.MACD( inputs, n, n * 2, int(n * 2 / 3)) else: inputs["macd_" + str(n)], inputs["macdsignal_" + str(n)], inputs["macdhist_" + str(n)] = ta.MACD( inputs, n, n * 2, 1) # macd = [macd1.values, macd2.values, macd3.values] # for idx, i in enumerate(["macd_"+str(n), "macdsignal_"+str(n), "macdhist_"+str(n)]): # for day in zip(inputs[i], macd[idx]): # print("Type: %s N: %d PD: %.3f TA: %.3f, " % (i, n, day[1], day[0])) inputs["mfi_" + str(n)] = ta.MFI(inputs, n) inputs["ult_" + str(n)] = ta.ULTOSC(inputs, n, n * 2, n * 4) inputs["willr_" + str(n)] = ta.WILLR(inputs, n) inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs) inputs["mom_" + str(n)] = ta.MOM(inputs, n) inputs["volume"] = list(map(lambda x: x / 10000, inputs["volume"])) df = pd.DataFrame().from_dict(inputs) # df = df.ix[100:] # print(df.tail(5)["macd_3"], df.tail(5)["macdsignal_3"], df.tail(5)["macdhist_3"]) return df else: # Build data one-by-one, as if it's coming in one at a time output = pd.DataFrame() sliding_window = pd.DataFrame() for idx, day in df.iterrows(): print("\rNow building day", str(idx), end="", flush=True) day = copy.deepcopy(day) # Avoid reference vs copy bullshit sliding_window = sliding_window.append(day, ignore_index=True) # print(day, type(day)) day_out = {} # print(sliding_window) o = sliding_window["open"].values h = sliding_window["high"].values l = sliding_window["low"].values c_series = sliding_window["close"] c = sliding_window["close"].values # print("----") # print(c) v = sliding_window["volume"].values for t in ["open", "high", "low", "close"]: day_out[t] = sliding_window[t].values[-1] for n in range(2, 40): # time.sleep(0.1) day_out["bband_u_" + str(n)], day_out["bband_m_" + str(n)], day_out[ "bband_l_" + str(n)] = stream.BBANDS(c, n) day_out["sma_" + str(n)] = stream.SMA(c, timeperiod=n) day_out["adx_" + str(n)] = stream.ADX(h, l, c, timeperiod=n) fast_ema = c_series.ewm(span=n, adjust=False).mean() slow_ema = c_series.ewm(span=n * 2, adjust=False).mean() macd1 = fast_ema - slow_ema macd2 = macd1.ewm(span=int(n * 2 / 3), adjust=False).mean() macd3 = macd1 - macd2 day_out["macd_" + str(n)] = macd1.values[-1] day_out["macdsignal_" + str(n)] = macd2.values[-1] day_out["macdhist_" + str(n)] = macd3.values[-1] # if n != 2: # day_out["macd_"+str(n)], day_out["macdsignal_"+str(n)], day_out["macdhist_"+str(n)] = stream.MACD(c, n, n*2, int(n*2/3)) # elif idx > 100: # macd = ta.MACD({"close":c}, n, n*2, 1) # day_out["macd_2"], day_out["macdsignal_2"], day_out["macdhist_2"] = (x[-1] for x in macd) # else: # day_out["macd_2"], day_out["macdsignal_2"], day_out["macdhist_2"] = None, None, None # macd = [macd1.values, macd2.values, macd3.values] # for idx, i in enumerate(["macd_"+str(n), "macdsignal_"+str(n), "macdhist_"+str(n)]): # for day in zip(inputs[i], macd[idx]): # print("Type: %s N: %d PD: %.3f TA: %.3f, " % (i, n, day[1], day[0])) day_out["mfi_" + str(n)] = stream.MFI(h, l, c, v, n) day_out["ult_" + str(n)] = stream.ULTOSC( h, l, c, n, n * 2, n * 4) day_out["willr_" + str(n)] = stream.WILLR(h, l, c, n) day_out["slowk"], day_out["slowd"] = stream.STOCH(h, l, c) day_out["mom_" + str(n)] = stream.MOM(c, n) day_out["volume"] = v[-1] / 10000 # print(day_out["macd_2"], day_out["macdsignal_2"], day_out["macdhist_2"]) output = output.append(day_out, ignore_index=True) # print(output.tail(5)["macd_3"], output.tail(5)["macdsignal_3"], output.tail(5)["macdhist_3"]) return output
def _build_indicators(num_secs): # accepts a list of one-day Series sec_idx_range = range(num_secs) sliding_window = [] # list of pd.DataFrames data = yield for datum in data: sliding_window += [_rename_columns(datum)] current_day = 0 while True: passes_validity_check, num_validation_iterations = False, 0 # time.sleep(1) while not passes_validity_check: for i in sec_idx_range: # for each security # print("Current day:", current_day) if current_day != 0: if current_day > 170 and num_validation_iterations == 0: sliding_window[i] = sliding_window[i].iloc[ 1:] # pop the first for datum in data: if num_validation_iterations == 0: sliding_window[i] = sliding_window[i].append( _rename_columns(datum)) data_with_ind = [] series = sliding_window[i] series = series.reset_index(drop=True) inputs = series.to_dict(orient="list") for col in inputs: inputs[col] = np.array(inputs[col]) c = series.close for n in range(2, 40): inputs["bband_u_" + str(n)], inputs["bband_m_" + str(n)], inputs["bband_l_" + str(n)] = ta.BBANDS( inputs, n) inputs["sma_" + str(n)] = ta.SMA(inputs, timeperiod=n) inputs["adx_" + str(n)] = ta.ADX(inputs, timeperiod=n) # print("\nINPUTS:", inputs) # if current_day > n*2: fast_ema = c.ewm(span=n).mean() slow_ema = c.ewm(span=n * 2).mean() # print(fast_ema, slow_ema) macd1 = fast_ema - slow_ema macd2 = macd1.ewm(span=n * 2 / 3).mean() macd3 = macd1 - macd2 inputs["macd_" + str(n)], inputs["macdsignal_" + str(n)], inputs[ "macdhist_" + str(n)] = macd1.iloc[ -1], macd2.iloc[-1], macd3.iloc[-1] if current_day == 160: print(n) print(macd1, macd2, macd3) sys.exit(69) # else: # inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = [np.NaN]*3 inputs["mfi_" + str(n)] = ta.MFI(inputs, n) inputs["ult_" + str(n)] = ta.ULTOSC( inputs, n, n * 2, n * 4) inputs["willr_" + str(n)] = ta.WILLR(inputs, n) inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs) inputs["mom_" + str(n)] = ta.MOM(inputs, n) inputs["mom_" + str(n)] = ta.MOM(inputs, n) inputs["volume"] = list( map(lambda x: x / 10000, inputs["volume"])) series = pd.DataFrame().from_dict(inputs) price = series["close"].iloc[-1] if isinstance(price, np.ndarray): price = price.tolist() # for idx, val in series.isnull().any(axis=1).iteritems(): # if val == True: # series.drop(idx, inplace = True) # try: # price[idx] = None # except IndexError: #drop the security # print("Error, failed to drop price on index", idx) # sys.exit(1) # # print("Dropped index:", idx) # for i, p in reversed(list(enumerate(price))): # actual_idx = len(price) - 1 - i # if p == None: # price.pop(actual_idx) # print(series["adx_10"]) X = series.iloc[-1].values if current_day < 170: passes_validity_check = True elif not np.isnan(X).any(): passes_validity_check = True # if num_validation_iterations != 0: # with pd.option_context('display.max_rows', None, 'display.max_columns', None): # print(series.iloc[-1]) # sys.exit(1) else: num_validation_iterations += 1 print("Reevaluating, iteration", num_validation_iterations, "day:", current_day) # with pd.option_context('display.max_rows', None, 'display.max_columns', None): # print(series.iloc[-1]) # sys.exit(1) # if current_day > 170: # print(series.iloc[-1].values) # if np.isnan(X).any() and current_day > 170: # # with pd.option_context('display.max_rows', None, 'display.max_columns', None): # # print(series) # print(sliding_window[0]) # break # print("ADX_10:\n", series["adx_10"].tail(3)) # if current_day == 900: # print(series) # print(X) data_with_ind += [{"data": X, "price": round(price, 2)}] data = yield data_with_ind current_day += 1
def build_data_to_dict(secs, raw = False): PICKLE_NAME = "_".join(s[5:] for s in secs) print("SECURITIES: ", PICKLE_NAME.split("_")) if not os.path.isfile("./stock_data/" + PICKLE_NAME + "_data.pickle"): print("No pickle found, getting data...") # df = pd.concat([quandl.get("WIKI/AAPL"), quandl.get("WIKI/F"), quandl.get("WIKI/XOM")]) df = pd.DataFrame() Y = pd.Series() prices = [] for sec in secs: sec_df = quandl.get(sec) if "Adj. Close" in sec_df.columns: sec_df = sec_df[["Adj. Open", "Adj. High", "Adj. Low", "Adj. Close", "Adj. Volume"]] sec_df.rename(columns=lambda x: x[5:].lower(), inplace=True) # Remove the "Adj. " and make lowercase elif "Close" in sec_df.columns: sec_df = sec_df[["Open", "High", "Low", "Close", "Volume"]] sec_df.rename(columns=lambda x: x.lower(), inplace=True) # make lowercase print("Calculating output for", sec) price = sec_df['close'].values minIdxs = argrelextrema(price, np.less) maxIdxs = argrelextrema(price, np.greater) sec_Y = pd.Series(name="signal", dtype=np.ndarray, index=range(0, len(price))) n=0 for _, idx in np.ndenumerate(minIdxs): if idx < MIN_MAX_PERIOD: continue max_price = max(price[idx: idx + MIN_MAX_PERIOD]) if ((max_price - price[idx]) / price[idx]) > HI_LO_DIFF: #if the difference between max and min is > 2% sec_Y.set_value(idx, np.array([1, 0, 0], np.int32)) n+=1 print("MINS:", n) n=0 for _, idx in np.ndenumerate(maxIdxs): if idx < MIN_MAX_PERIOD: continue min_price = min(price[idx: idx + MIN_MAX_PERIOD]) if ((price[idx] - min_price)/ min_price) > HI_LO_DIFF: #if the difference between max and min is > 2% sec_Y.set_value(idx, np.array([0, 0, 1], np.int32)) n+=1 print("MAXS:", n) for idx in pd.isnull(sec_Y).nonzero()[0]: sec_Y.set_value(idx, np.array([0, 1, 0], np.int32)) sec_df.reset_index(drop=True, inplace = True) if isinstance(price, np.ndarray): price = price.tolist() ''' INDICATORS ''' # print(len(sec_df), len(sec_Y)) print("Building indicators...") inputs = sec_df.to_dict(orient="list") for col in inputs: inputs[col] = np.array(inputs[col]) for n in range(2, 40): inputs["bband_u_"+str(n)], inputs["bband_m_"+str(n)], inputs["bband_l_"+str(n)] = ta.BBANDS(inputs, n) inputs["sma_"+str(n)] = ta.SMA(inputs, timeperiod = n) inputs["adx_"+str(n)] = ta.ADX(inputs, timeperiod = n) inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = ta.MACD(inputs, n, n*2, n*2/3) inputs["mfi_"+str(n)] = ta.MFI(inputs, n) inputs["ult_"+str(n)] = ta.ULTOSC(inputs, n, n*2, n*4) inputs["willr_"+str(n)] = ta.WILLR(inputs, n) inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["volume"] = list(map(lambda x: x/10000, inputs["volume"])) sec_df = pd.DataFrame().from_dict(inputs) # print(sec_df.isnull().any(axis=1)) for idx, val in sec_df.isnull().any(axis=1).iteritems(): if val == True: # print(idx, val) sec_df.drop(idx, inplace = True) sec_Y.drop(idx, inplace = True) price.pop(idx) prices.append(price) df = pd.concat([df, sec_df]) Y = pd.concat([Y, sec_Y]) prices = [j for i in prices for j in i] # spooky magic ''' BUILD NEURAL NET INPUTS ''' Y = np.vstack(Y.values) X = df.values if not raw: scaler = prep.StandardScaler().fit(X) X_norm = scaler.transform(X) from sklearn.externals import joblib joblib.dump(scaler, "./stock_data/" + sec + ".scaler") else: X_norm = X trX, testX, trY, testY= train_test_split(X_norm, Y, test_size = 0.1, random_state=0) # print("Pickling...") output = {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price} pickle.dump(output, open("./stock_data/" + (PICKLE_NAME if not raw else PICKLE_NAME + "_raw") + "_data.pickle", "wb")) return output else: print("Pickle found, loading...") _data = pickle.load(open("./stock_data/" + PICKLE_NAME + "_data.pickle", "rb")) trX, trY, testX, testY, price, X_norm, Y = _data["trX"], _data["trY"], _data["testX"], _data["testY"], _data["price"], _data["X_norm"], _data["Y"] return {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price}
def _build_indicators(data): # sliding_window = [] while True: data_with_ind = [] for df in data: df = copy.deepcopy(df) if "Adj. Close" in df.columns: df = df[["Adj. Open", "Adj. High", "Adj. Low", "Adj. Close", "Adj. Volume"]] df.rename(columns=lambda x: x[5:].lower(), inplace=True) # Remove the "Adj. " and make lowercase elif "Close" in df.columns: df = df[["Open", "High", "Low", "Close", "Volume"]] df.rename(columns=lambda x: x.lower(), inplace=True) # make lowercase df.reset_index(drop=True, inplace = True) inputs = df.to_dict(orient="list") for col in inputs: inputs[col] = np.array(inputs[col]) for n in range(2, 40): inputs["bband_u_"+str(n)], inputs["bband_m_"+str(n)], inputs["bband_l_"+str(n)] = ta.BBANDS(inputs, n) inputs["sma_"+str(n)] = ta.SMA(inputs, timeperiod = n) inputs["adx_"+str(n)] = ta.ADX(inputs, timeperiod = n) inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = ta.MACD(inputs, n, n*2, n*2/3) inputs["mfi_"+str(n)] = ta.MFI(inputs, n) inputs["ult_"+str(n)] = ta.ULTOSC(inputs, n, n*2, n*4) inputs["willr_"+str(n)] = ta.WILLR(inputs, n) inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["volume"] = list(map(lambda x: x/10000, inputs["volume"])) df = pd.DataFrame().from_dict(inputs) price = df["close"].values if isinstance(price, np.ndarray): price = price.tolist() for idx, val in df.isnull().any(axis=1).iteritems(): if val == True: df.drop(idx, inplace = True) try: price[idx] = None except IndexError: #drop the security print("Error, failed to drop price on index", idx) sys.exit(1) # print("Dropped index:", idx) for i, p in reversed(list(enumerate(price))): actual_idx = len(price) - 1 - i if p == None: price.pop(actual_idx) print(df["adx_10"]) X = df.values data_with_ind += [{"data": X, "price": price}] return data_with_ind
def build_data(raw = False, random_split = True, start_date = None, end_date = None, test_proportion = 0.1): # if len(sec) == 1 and os.path.isfile(secs[0]): #it's a file # with open(secs[0]) as f: # secs = ["WIKI/" + line.strip() for line in f] # print("SECURITIES: ", s[5:] for s in secs) with open("stock_data/invalid_stocks.txt", "r+") as f: invalid_stock_codes = [line.strip() for line in f] f = open("stock_data/invalid_stocks.txt", "a") stock_code = yield while True and stock_code is not None: valid_stock = False while not valid_stock: if "." in stock_code: stock_code = yield None continue if stock_code in invalid_stock_codes: # print("Skipping security", sec) stock_code = yield None continue valid_stock = True sec = stock_code.split("/")[1] # Just the ticker, not the database code pickle_name = sec if raw: pickle_name += "_raw" if not random_split: pickle_name += "_notrand" if start_date and end_date: pickle_name += start_date + "to" + end_date elif start_date: pickle_name += start_date elif end_date: pickle_name += "to" + end_date if not os.path.isfile("./stock_data/" + pickle_name + "_data.pickle"): # print("No pickle found, getting data for", sec) try: # print("Getting data for", stock_code) df = quandl.get(stock_code, start_date = start_date, end_date = end_date) except quandl.errors.quandl_error.NotFoundError: invalid_stock_codes += [stock_code] f.write(stock_code + "\n") stock_code = yield None continue if "Adj. Close" in df.columns: df = df[["Adj. Open", "Adj. High", "Adj. Low", "Adj. Close", "Adj. Volume"]] df.rename(columns=lambda x: x[5:].lower(), inplace=True) # Remove the "Adj. " and make lowercase elif "Close" in df.columns: df = df[["Open", "High", "Low", "Close", "Volume"]] df.rename(columns=lambda x: x.lower(), inplace=True) # make lowercase price = df['close'].values minIdxs = argrelextrema(price, np.less) maxIdxs = argrelextrema(price, np.greater) Y = pd.Series(name="signal", dtype=np.ndarray, index=range(0, len(price))) n=0 for _, idx in np.ndenumerate(minIdxs): # if idx < MIN_MAX_PERIOD: continue max_price = max(price[idx: idx + MIN_MAX_PERIOD]) if ((max_price - price[idx]) / price[idx]) > HI_LO_DIFF: #if the difference between max and min is > X% Y.set_value(idx, np.array([1., 0.], np.float32)) n+=1 # print("MINS:", n) n=0 for _, idx in np.ndenumerate(maxIdxs): # if idx < MIN_MAX_PERIOD: continue min_price = min(price[idx: idx + MIN_MAX_PERIOD]) if ((price[idx] - min_price)/ min_price) > HI_LO_DIFF: #if the difference between max and min is > X% Y.set_value(idx, np.array([0., 1.], np.float32)) n+=1 # print("MAXS:", n) _min_idx, _max_idx = 0, 0 for i, y in np.ndenumerate(Y.values): if np.array_equal(y, [1., 0.]): _min_idx = i[0] elif np.array_equal(y, [0., 1.]): _max_idx = i[0] else: if _min_idx > _max_idx: s = np.array([1., 0.]) elif _max_idx > _min_idx: s = np.array([0., 1.]) else: s = np.array([0., 0.]) # no action taken, only occurs at the beginnings of datasets, afaik Y.set_value(i, s, np.float32) # x = list(zip(price[0:50], Y.values[0:50])) # for i in x: # print("{0:.2f} -- {1}".format(i[0], "sell" if np.array_equal(i[1], [0, 1]) else "buy" if np.array_equal(i[1], [1, 0]) else "nothing")) df.reset_index(drop=True, inplace = True) if isinstance(price, np.ndarray): price = price.tolist() ''' INDICATORS ''' # print(len(df), len(Y)) # print("Building indicators...") inputs = df.to_dict(orient="list") for col in inputs: inputs[col] = np.array(inputs[col]) for n in range(2, 40): inputs["bband_u_"+str(n)], inputs["bband_m_"+str(n)], inputs["bband_l_"+str(n)] = ta.BBANDS(inputs, n) inputs["sma_"+str(n)] = ta.SMA(inputs, timeperiod = n) inputs["adx_"+str(n)] = ta.ADX(inputs, timeperiod = n) inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = ta.MACD(inputs, n, n*2, n*2/3) inputs["mfi_"+str(n)] = ta.MFI(inputs, n) inputs["ult_"+str(n)] = ta.ULTOSC(inputs, n, n*2, n*4) inputs["willr_"+str(n)] = ta.WILLR(inputs, n) inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["mom_"+str(n)] = ta.MOM(inputs, n) inputs["volume"] = list(map(lambda x: x/10000, inputs["volume"])) df = pd.DataFrame().from_dict(inputs) broken = False for idx, val in reversed(list(df.isnull().any(axis=1).iteritems())): if val == True: # print(actual_idx, val) df.drop(idx, inplace = True) Y.drop(idx, inplace = True) try: # price[actual_idx] = None price.pop(idx) except IndexError: #drop the security # print("Error, dropping security", sec) broken = True break # print(list(df.isnull().any(axis=1).iteritems())) # print("PRICES", price) # print(len(price), len(df.values)) # for i, p in reversed(list(enumerate(price))): # actual_idx = len(price) - 1 - i # if p is None: # print(actual_idx) # price.pop(actual_idx) ''' BUILD NEURAL NET INPUTS ''' if not broken: Y = np.vstack(Y.values) print(df["adx_10"]) X = df.values # print(X[0:2]) if not raw: rand = "_notrand" if not random_split else "" if not os.path.isfile("./stock_data/" + sec + rand + ".scaler"): scaler = prep.StandardScaler().fit(X) X_norm = scaler.transform(X) joblib.dump(scaler, "./stock_data/" + sec + rand + ".scaler") else: scaler = joblib.load("./stock_data/" + sec + rand + ".scaler") X_norm = scaler.transform(X) else: X_norm = X if random_split: trX, testX, trY, testY = train_test_split(X_norm, Y, test_size = test_proportion, random_state=0) else: # just clips the test data off the end l = len(X_norm) trX, testX = X_norm[:int(-test_proportion*l)], X_norm[int(-test_proportion*l):] trY, testY = Y[:int(-test_proportion*l)], Y[int(-test_proportion*l):] # print("Pickling...") output = {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price} pickle.dump(output, open("./stock_data/" + pickle_name + "_data.pickle", "wb")) stock_code = yield output else: invalid_stock_codes += [stock_code] f.write(stock_code + "\n") stock_code = yield None else: # print("Pickle found, loading...") _data = pickle.load(open("./stock_data/" + pickle_name + "_data.pickle", "rb")) trX, trY, testX, testY, price, X_norm, Y = _data["trX"], _data["trY"], _data["testX"], _data["testY"], _data["price"], _data["X_norm"], _data["Y"] stock_code = yield {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price}