예제 #1
0
def calculator_talib(data):
    ETF = {
        'open': data[OHLCV_columns[0]].dropna().astype(float),
        'high': data[OHLCV_columns[1]].dropna().astype(float),
        'low': data[OHLCV_columns[2]].dropna().astype(float),
        'close': data[OHLCV_columns[3]].dropna().astype(float),
        'volume': data[OHLCV_columns[4]].dropna().astype(float)
    }

    def talib2df(talib_output):
        if type(talib_output) == list:
            ret = pd.DataFrame(talib_output).transpose()
        else:
            ret = pd.Series(talib_output)
        ret.index = data['收盤價'].index
        return ret

    KD = talib2df(abstract.STOCH(ETF, fastk_period=9))
    #計算MACD#
    MACD = talib2df(abstract.MACD(ETF))
    #計算OBV#
    OBV = talib2df(abstract.OBV(ETF))
    #計算威廉指數#
    WILLR = talib2df(abstract.WILLR(ETF))
    #ATR 計算#
    ATR = talib2df(abstract.ATR(ETF))

    ETF = pd.DataFrame()
    ETF = pd.concat([data, KD, MACD, OBV, WILLR, ATR], axis=1)
    return ETF
예제 #2
0
 def populate_indicators(self, dataframe: DataFrame,
                         metadata: dict) -> DataFrame:
     # SMA - ex Moving Average
     dataframe[f'hma{shma}'] = qtpylib.hma(dataframe['close'], window=shma)
     dataframe[f'hma{lhma}'] = qtpylib.hma(dataframe['close'], window=lhma)
     # dataframe[f'hma{shma_c}'] = qtpylib.hma(dataframe['close'], window=shma_c)
     # dataframe[f'hma{lhma_c}'] = qtpylib.hma(dataframe['close'], window=lhma_c)
     dataframe['willr'] = ta.WILLR(dataframe['high'],
                                   dataframe['low'],
                                   dataframe['close'],
                                   timeperiod=pwill)
     # dataframe['will_mean'] = ta.EMA(dataframe, timeperiod=pmv, price='willr')
     dataframe['vol_mean'] = ta.EMA(dataframe,
                                    timeperiod=pvol,
                                    price='volume')
     return dataframe
예제 #3
0
 def technical_index(self):
     df = self.max_min_price()
     df2 = self.institutional_investors()
     df['RSI'] = abstract.RSI(df) / 100
     df['CMO'] =(abstract.CMO(df)+100) / (2 *100)
     df['MACD'] =(abstract.MACD(df)['macd']+abstract.MACD(df)['macd'].max()) / (2 *abstract.MACD(df)['macd'].max())
     df['WILLR'] =(abstract.WILLR(df)+100) / (2 *100)
     df['WMA'] =abstract.WMA(df) / abstract.WMA(df).max()
     df['PPO'] =(abstract.PPO(df)+abstract.PPO(df).max()) / (2 *abstract.PPO(df).max())
     df['EMA'] =abstract.EMA(df) / abstract.EMA(df).max()
     df['ROC'] =(abstract.ROC(df)+abstract.ROC(df).max()) / (2 *abstract.ROC(df).max())
     df['SMA'] =abstract.SMA(df) / abstract.SMA(df).max()
     df['TEMA'] =abstract.TEMA(df) / abstract.TEMA(df).max()
     df['CCI'] =(abstract.CCI(df)+abstract.CCI(df).max()) / (2 *abstract.CCI(df).max())
     df['investment_trust'] = (df2['investment_trust'] + df2['investment_trust'].max()) / (2*df2['investment_trust'].max())
     df['foreign_investor'] = (df2['foreign_investor'] + df2['foreign_investor'].max()) / (2*df2['foreign_investor'].max())
     df = df.drop(columns=['volume', 'open', 'high', 'low', 'close', 'close_max', 'close_min'])
     df = df.dropna()
     return df
예제 #4
0
def TA_processing(dataframe):
    bias(dataframe, days=[3, 6, 10, 25])
    moving_average(dataframe, days=[5, 10, 20])
    dataframe['ROC'] = abstract.ROC(dataframe, timeperiod=10)
    dataframe['MACD'] = abstract.MACD(dataframe, fastperiod=12, slowperiod=26, signalperiod=9)['macd']
    dataframe['MACD_signal'] = abstract.MACD(dataframe, fastperiod=12, slowperiod=26, signalperiod=9)['macdsignal']
    dataframe['UBBANDS'] = abstract.BBANDS(dataframe, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)['upperband']
    dataframe['MBBANDS'] = abstract.BBANDS(dataframe, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)['middleband']
    dataframe['LBBANDS'] = abstract.BBANDS(dataframe, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)['lowerband']
    dataframe['%K'] = abstract.STOCH(dataframe, fastk_period=9)['slowk']/100
    dataframe['%D'] = abstract.STOCH(dataframe, fastk_period=9)['slowd']/100
    dataframe['W%R'] = abstract.WILLR(dataframe, timeperiod=14)/100
    dataframe['RSI9'] = abstract.RSI(dataframe, timeperiod = 9)/100
    dataframe['RSI14'] = abstract.RSI(dataframe, timeperiod = 14)/100
    dataframe['CCI'] = abstract.CCI(dataframe, timeperiod=14)/100
    counter_daily_potential(dataframe)
    dataframe['MOM'] = abstract.MOM(dataframe, timeperiod=10)
    dataframe['DX'] = abstract.DX(dataframe, timeperiod=14)/100
    psy_line(dataframe)
    volumn_ratio(dataframe, d=26)
    on_balance_volume(dataframe)
예제 #5
0
def TKE(dataframe, *, length=14, emaperiod=5):
    """
    Source: https://www.tradingview.com/script/Pcbvo0zG/
    Author: Dr Yasar ERDINC

    The calculation is simple:
    TKE=(RSI+STOCHASTIC+ULTIMATE OSCILLATOR+MFI+WIILIAMS %R+MOMENTUM+CCI)/7
    Buy signal: when TKE crosses above 20 value
    Oversold region: under 20 value
    Overbought region: over 80 value

    Another usage of TKE is with its EMA ,
    the default value is defined as 5 bars of EMA of the TKE line,
    Go long: when TKE crosses above EMALine
    Go short: when TKE crosses below EMALine

    Usage:
        `dataframe['TKE'], dataframe['TKEema'] = TKE1(dataframe)`
    """
    import talib.abstract as ta
    df = dataframe.copy()
    # TKE=(RSI+STOCHASTIC+ULTIMATE OSCILLATOR+MFI+WIILIAMS %R+MOMENTUM+CCI)/7
    df["rsi"] = ta.RSI(df, timeperiod=length)
    df['stoch'] = (100 *
                   (df['close'] - df['low'].rolling(window=length).min()) /
                   (df['high'].rolling(window=length).max() -
                    df['low'].rolling(window=length).min()))

    df["ultosc"] = ta.ULTOSC(df, timeperiod1=7, timeperiod2=14, timeperiod3=28)
    df["mfi"] = ta.MFI(df, timeperiod=length)
    df["willr"] = ta.WILLR(df, timeperiod=length)
    df["mom"] = ta.ROCR100(df, timeperiod=length)
    df["cci"] = ta.CCI(df, timeperiod=length)
    df['TKE'] = df[['rsi', 'stoch', 'ultosc', 'mfi', 'willr', 'mom',
                    'cci']].mean(axis='columns')
    df["TKEema"] = ta.EMA(df["TKE"], timeperiod=emaperiod)
    return df["TKE"], df["TKEema"]
예제 #6
0
    def _build_indicators(self, df):
        if not self.realtime:
            inputs = df.to_dict(orient="list")
            for col in inputs:
                inputs[col] = np.array(inputs[col])

            c = df["close"]
            for n in range(2, 40):
                inputs["bband_u_" +
                       str(n)], inputs["bband_m_" +
                                       str(n)], inputs["bband_l_" +
                                                       str(n)] = ta.BBANDS(
                                                           inputs, n)
                inputs["sma_" + str(n)] = ta.SMA(inputs, timeperiod=n)
                inputs["adx_" + str(n)] = ta.ADX(inputs, timeperiod=n)

                # fast_ema = c.ewm(span = n, adjust = False).mean()
                # slow_ema = c.ewm(span = n*2, adjust = False).mean()
                # macd1 = fast_ema - slow_ema
                # macd2 = macd1.ewm(span = int(n*2/3), adjust = False).mean()
                # macd3 = macd1 - macd2
                # inputs["macd_"+str(n)] = macd1.values
                # inputs["macdsignal_"+str(n)] = macd2.values
                # inputs["macdhist_"+str(n)] = macd3.values
                if n != 2:
                    inputs["macd_" +
                           str(n)], inputs["macdsignal_" +
                                           str(n)], inputs["macdhist_" +
                                                           str(n)] = ta.MACD(
                                                               inputs, n,
                                                               n * 2,
                                                               int(n * 2 / 3))
                else:
                    inputs["macd_" +
                           str(n)], inputs["macdsignal_" +
                                           str(n)], inputs["macdhist_" +
                                                           str(n)] = ta.MACD(
                                                               inputs, n,
                                                               n * 2, 1)

                # macd = [macd1.values, macd2.values, macd3.values]
                # for idx, i in enumerate(["macd_"+str(n), "macdsignal_"+str(n), "macdhist_"+str(n)]):
                # 	for day in zip(inputs[i], macd[idx]):
                # 		print("Type: %s N: %d PD: %.3f TA: %.3f, " % (i, n, day[1], day[0]))
                inputs["mfi_" + str(n)] = ta.MFI(inputs, n)
                inputs["ult_" + str(n)] = ta.ULTOSC(inputs, n, n * 2, n * 4)
                inputs["willr_" + str(n)] = ta.WILLR(inputs, n)
                inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs)
                inputs["mom_" + str(n)] = ta.MOM(inputs, n)

            inputs["volume"] = list(map(lambda x: x / 10000, inputs["volume"]))
            df = pd.DataFrame().from_dict(inputs)
            # df = df.ix[100:]

            # print(df.tail(5)["macd_3"], df.tail(5)["macdsignal_3"], df.tail(5)["macdhist_3"])
            return df

        else:
            # Build data one-by-one, as if it's coming in one at a time
            output = pd.DataFrame()
            sliding_window = pd.DataFrame()

            for idx, day in df.iterrows():
                print("\rNow building day", str(idx), end="", flush=True)
                day = copy.deepcopy(day)  # Avoid reference vs copy bullshit
                sliding_window = sliding_window.append(day, ignore_index=True)
                # print(day, type(day))
                day_out = {}

                # print(sliding_window)
                o = sliding_window["open"].values
                h = sliding_window["high"].values
                l = sliding_window["low"].values
                c_series = sliding_window["close"]
                c = sliding_window["close"].values
                # print("----")
                # print(c)
                v = sliding_window["volume"].values

                for t in ["open", "high", "low", "close"]:
                    day_out[t] = sliding_window[t].values[-1]

                for n in range(2, 40):
                    # time.sleep(0.1)
                    day_out["bband_u_" +
                            str(n)], day_out["bband_m_" + str(n)], day_out[
                                "bband_l_" + str(n)] = stream.BBANDS(c, n)
                    day_out["sma_" + str(n)] = stream.SMA(c, timeperiod=n)
                    day_out["adx_" + str(n)] = stream.ADX(h,
                                                          l,
                                                          c,
                                                          timeperiod=n)

                    fast_ema = c_series.ewm(span=n, adjust=False).mean()
                    slow_ema = c_series.ewm(span=n * 2, adjust=False).mean()
                    macd1 = fast_ema - slow_ema
                    macd2 = macd1.ewm(span=int(n * 2 / 3), adjust=False).mean()
                    macd3 = macd1 - macd2
                    day_out["macd_" + str(n)] = macd1.values[-1]
                    day_out["macdsignal_" + str(n)] = macd2.values[-1]
                    day_out["macdhist_" + str(n)] = macd3.values[-1]
                    # if n != 2:
                    # 	day_out["macd_"+str(n)], day_out["macdsignal_"+str(n)], day_out["macdhist_"+str(n)] = stream.MACD(c, n, n*2, int(n*2/3))
                    # elif idx > 100:
                    # 	macd =  ta.MACD({"close":c}, n, n*2, 1)
                    # 	day_out["macd_2"], day_out["macdsignal_2"], day_out["macdhist_2"] = (x[-1] for x in macd)
                    # else:
                    # 	day_out["macd_2"], day_out["macdsignal_2"], day_out["macdhist_2"] = None, None, None

                    # macd = [macd1.values, macd2.values, macd3.values]
                    # for idx, i in enumerate(["macd_"+str(n), "macdsignal_"+str(n), "macdhist_"+str(n)]):
                    # 	for day in zip(inputs[i], macd[idx]):
                    # 		print("Type: %s N: %d PD: %.3f TA: %.3f, " % (i, n, day[1], day[0]))
                    day_out["mfi_" + str(n)] = stream.MFI(h, l, c, v, n)
                    day_out["ult_" + str(n)] = stream.ULTOSC(
                        h, l, c, n, n * 2, n * 4)
                    day_out["willr_" + str(n)] = stream.WILLR(h, l, c, n)
                    day_out["slowk"], day_out["slowd"] = stream.STOCH(h, l, c)
                    day_out["mom_" + str(n)] = stream.MOM(c, n)

                day_out["volume"] = v[-1] / 10000
                # print(day_out["macd_2"], day_out["macdsignal_2"], day_out["macdhist_2"])

                output = output.append(day_out, ignore_index=True)

            # print(output.tail(5)["macd_3"], output.tail(5)["macdsignal_3"], output.tail(5)["macdhist_3"])
            return output
예제 #7
0
    def populate_indicators(self, dataframe: DataFrame) -> DataFrame:
        from technical.util import resample_to_interval
        from technical.util import resampled_merge

        dataframe['sma'] = ta.SMA(dataframe, timeperiod=40)
        # EMA - Exponential Moving Average
        dataframe['ema3'] = ta.EMA(dataframe, timeperiod=3)
        dataframe['ema5'] = ta.EMA(dataframe, timeperiod=5)
        dataframe['ema10'] = ta.EMA(dataframe, timeperiod=10)
        dataframe['ema20'] = ta.EMA(dataframe, timeperiod=20)
        dataframe['ema50'] = ta.EMA(dataframe, timeperiod=50)
        dataframe['ema100'] = ta.EMA(dataframe, timeperiod=100)
        dataframe['ema200'] = ta.EMA(dataframe, timeperiod=200)

        # Stoch
        stoch = ta.STOCH(dataframe,
                         fastk_period=5,
                         slowk_period=2,
                         slowk_matype=0,
                         slowd_period=2,
                         slowd_matype=0)
        dataframe['slowd15'] = stoch['slowd']
        dataframe['slowk15'] = stoch['slowk']

        stoch = ta.STOCH(dataframe,
                         fastk_period=10,
                         slowk_period=3,
                         slowk_matype=0,
                         slowd_period=3,
                         slowd_matype=0)
        dataframe['slowd'] = stoch['slowd']
        dataframe['slowk'] = stoch['slowk']

        # Stoch fast
        stoch_fast = ta.STOCHF(dataframe)
        dataframe['fastd'] = stoch_fast['fastd']
        dataframe['fastk'] = stoch_fast['fastk']

        dataframe['minus_di'] = ta.MINUS_DI(dataframe, timeperiod=24)
        dataframe['plus_di'] = ta.PLUS_DI(dataframe, timeperiod=24)

        dataframe['blower'] = ta.BBANDS(dataframe, nbdevup=2,
                                        nbdevdn=2)['lowerband']

        # Bollinger bands
        bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(dataframe),
                                            window=20,
                                            stds=2)
        dataframe['bb_lowerband'] = bollinger['lower']
        dataframe['bb_middleband'] = bollinger['mid']
        dataframe['bb_upperband'] = bollinger['upper']

        dataframe['sma3'] = ta.SMA(dataframe, timeperiod=3)
        dataframe['sma5'] = ta.SMA(dataframe, timeperiod=5)
        dataframe['sma10'] = ta.SMA(dataframe, timeperiod=10)
        dataframe['sma20'] = ta.SMA(dataframe, timeperiod=20)
        dataframe['sma50'] = ta.SMA(dataframe, timeperiod=50)
        dataframe['sma100'] = ta.SMA(dataframe, timeperiod=100)
        dataframe['sma220'] = ta.SMA(dataframe, timeperiod=220)
        dataframe['sma200'] = ta.SMA(dataframe, timeperiod=200)

        dataframe['willr'] = ta.WILLR(dataframe, timeperiod=28)

        # resample our dataframes
        dataframe_short = resample_to_interval(dataframe,
                                               self.get_ticker_indicator() * 3)
        dataframe_long = resample_to_interval(dataframe,
                                              self.get_ticker_indicator() * 7)

        # compute our RSI's
        dataframe_short['rsi'] = ta.RSI(dataframe_short, timeperiod=14)
        dataframe_long['rsi'] = ta.RSI(dataframe_long, timeperiod=14)

        dataframe['cci'] = ta.CCI(dataframe, timeperiod=20)

        dataframe['mfi'] = ta.MFI(dataframe)

        dataframe['CDLHAMMER'] = ta.CDLHAMMER(dataframe)

        # merge dataframe back together
        dataframe = resampled_merge(dataframe, dataframe_short)
        dataframe = resampled_merge(dataframe, dataframe_long)

        dataframe['rsi'] = ta.RSI(dataframe, timeperiod=14)

        dataframe.fillna(method='ffill', inplace=True)

        # Inverse Fisher transform on RSI, values [-1.0, 1.0] (https://goo .gl/2JGGoy)
        dataframe['fisher_rsi'] = fishers_inverse(dataframe['rsi'])
        # Inverse Fisher transform on RSI normalized, value [0.0, 100.0] (https://goo.gl/2JGGoy)
        dataframe['fisher_rsi_norma'] = 50 * (dataframe['fisher_rsi'] + 1)

        dataframe['resample_rsi_2'] = dataframe['resample_{}_rsi'.format(
            self.get_ticker_indicator() * 3)]
        dataframe['resample_rsi_8'] = dataframe['resample_{}_rsi'.format(
            self.get_ticker_indicator() * 7)]

        dataframe['average'] = (dataframe['close'] + dataframe['open'] +
                                dataframe['high'] + dataframe['low']) / 4

        return dataframe
예제 #8
0
def _build_indicators(num_secs):  # accepts a list of one-day Series

    sec_idx_range = range(num_secs)
    sliding_window = []  # list of pd.DataFrames

    data = yield

    for datum in data:
        sliding_window += [_rename_columns(datum)]

    current_day = 0
    while True:
        passes_validity_check, num_validation_iterations = False, 0
        # time.sleep(1)
        while not passes_validity_check:
            for i in sec_idx_range:  # for each security
                # print("Current day:", current_day)
                if current_day != 0:
                    if current_day > 170 and num_validation_iterations == 0:
                        sliding_window[i] = sliding_window[i].iloc[
                            1:]  # pop the first

                    for datum in data:
                        if num_validation_iterations == 0:
                            sliding_window[i] = sliding_window[i].append(
                                _rename_columns(datum))

                data_with_ind = []

                series = sliding_window[i]
                series = series.reset_index(drop=True)

                inputs = series.to_dict(orient="list")
                for col in inputs:
                    inputs[col] = np.array(inputs[col])

                c = series.close
                for n in range(2, 40):
                    inputs["bband_u_" +
                           str(n)], inputs["bband_m_" +
                                           str(n)], inputs["bband_l_" +
                                                           str(n)] = ta.BBANDS(
                                                               inputs, n)
                    inputs["sma_" + str(n)] = ta.SMA(inputs, timeperiod=n)
                    inputs["adx_" + str(n)] = ta.ADX(inputs, timeperiod=n)
                    # print("\nINPUTS:", inputs)
                    # if current_day > n*2:
                    fast_ema = c.ewm(span=n).mean()
                    slow_ema = c.ewm(span=n * 2).mean()

                    # print(fast_ema, slow_ema)
                    macd1 = fast_ema - slow_ema
                    macd2 = macd1.ewm(span=n * 2 / 3).mean()
                    macd3 = macd1 - macd2
                    inputs["macd_" +
                           str(n)], inputs["macdsignal_" + str(n)], inputs[
                               "macdhist_" + str(n)] = macd1.iloc[
                                   -1], macd2.iloc[-1], macd3.iloc[-1]

                    if current_day == 160:
                        print(n)
                        print(macd1, macd2, macd3)
                        sys.exit(69)
                    # else:
                    # 	inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = [np.NaN]*3

                    inputs["mfi_" + str(n)] = ta.MFI(inputs, n)
                    inputs["ult_" + str(n)] = ta.ULTOSC(
                        inputs, n, n * 2, n * 4)
                    inputs["willr_" + str(n)] = ta.WILLR(inputs, n)
                    inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs)
                    inputs["mom_" + str(n)] = ta.MOM(inputs, n)
                    inputs["mom_" + str(n)] = ta.MOM(inputs, n)

                inputs["volume"] = list(
                    map(lambda x: x / 10000, inputs["volume"]))

                series = pd.DataFrame().from_dict(inputs)

                price = series["close"].iloc[-1]
                if isinstance(price, np.ndarray):
                    price = price.tolist()

                # for idx, val in series.isnull().any(axis=1).iteritems():
                # 	if val == True:
                # series.drop(idx, inplace = True)
                # try:
                # 	price[idx] = None
                # except IndexError:	#drop the security
                # 	print("Error, failed to drop price on index", idx)
                # 	sys.exit(1)
                # # print("Dropped index:", idx)

                # for i, p in reversed(list(enumerate(price))):
                # 	actual_idx = len(price) - 1 - i
                # 	if p == None:
                # 		price.pop(actual_idx)

                # print(series["adx_10"])
                X = series.iloc[-1].values

                if current_day < 170:
                    passes_validity_check = True

                elif not np.isnan(X).any():
                    passes_validity_check = True
                    # if num_validation_iterations != 0:
                    # with pd.option_context('display.max_rows', None, 'display.max_columns', None):
                    # 	print(series.iloc[-1])
                    # 	sys.exit(1)

                else:
                    num_validation_iterations += 1
                    print("Reevaluating, iteration", num_validation_iterations,
                          "day:", current_day)
                    # with pd.option_context('display.max_rows', None, 'display.max_columns', None):
                    # 	print(series.iloc[-1])
                    # 	sys.exit(1)

                # if current_day > 170:
                # print(series.iloc[-1].values)

                # if np.isnan(X).any() and current_day > 170:
                # 	# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
                # 	# 	print(series)
                # 	print(sliding_window[0])
                # 	break

                # print("ADX_10:\n", series["adx_10"].tail(3))

                # if current_day == 900:
                # 	print(series)
                # 	print(X)

                data_with_ind += [{"data": X, "price": round(price, 2)}]

        data = yield data_with_ind
        current_day += 1
예제 #9
0
def build_data_to_dict(secs, raw = False):

	PICKLE_NAME = "_".join(s[5:] for s in secs)
	print("SECURITIES: ", PICKLE_NAME.split("_"))

	if not os.path.isfile("./stock_data/" + PICKLE_NAME + "_data.pickle"):
		print("No pickle found, getting data...")
		# df = pd.concat([quandl.get("WIKI/AAPL"), quandl.get("WIKI/F"), quandl.get("WIKI/XOM")])
		df = pd.DataFrame()
		Y = pd.Series()
		prices = []
		for sec in secs:
			sec_df = quandl.get(sec)

			if "Adj. Close" in sec_df.columns:
				sec_df = sec_df[["Adj. Open",  "Adj. High",  "Adj. Low",  "Adj. Close", "Adj. Volume"]]
				sec_df.rename(columns=lambda x: x[5:].lower(), inplace=True)    # Remove the "Adj. " and make lowercase
			elif "Close" in sec_df.columns:
				sec_df = sec_df[["Open",  "High",  "Low",  "Close", "Volume"]]
				sec_df.rename(columns=lambda x: x.lower(), inplace=True)    # make lowercase

			print("Calculating output for", sec)
			price = sec_df['close'].values
			minIdxs = argrelextrema(price, np.less)
			maxIdxs = argrelextrema(price, np.greater)


			sec_Y = pd.Series(name="signal", dtype=np.ndarray, index=range(0, len(price)))
			n=0
			for _, idx in np.ndenumerate(minIdxs):
				if idx < MIN_MAX_PERIOD: continue
				max_price = max(price[idx: idx + MIN_MAX_PERIOD])
				if ((max_price - price[idx]) / price[idx]) > HI_LO_DIFF:    #if the difference between max and min is > 2%
					sec_Y.set_value(idx, np.array([1, 0, 0], np.int32))
					n+=1

			print("MINS:", n)
			n=0
			for _, idx in np.ndenumerate(maxIdxs):
				if idx < MIN_MAX_PERIOD: continue
				min_price = min(price[idx: idx + MIN_MAX_PERIOD])
				if ((price[idx] - min_price)/ min_price) > HI_LO_DIFF:  #if the difference between max and min is > 2%
					sec_Y.set_value(idx, np.array([0, 0, 1], np.int32))
					n+=1
			print("MAXS:", n)

			for idx in pd.isnull(sec_Y).nonzero()[0]:
				sec_Y.set_value(idx, np.array([0, 1, 0], np.int32))

			sec_df.reset_index(drop=True, inplace = True)
			if isinstance(price, np.ndarray):
				price = price.tolist()

			''' INDICATORS '''
			# print(len(sec_df), len(sec_Y))
			print("Building indicators...")
			inputs = sec_df.to_dict(orient="list")
			for col in inputs:
				inputs[col] = np.array(inputs[col])

			for n in range(2, 40):
				inputs["bband_u_"+str(n)], inputs["bband_m_"+str(n)], inputs["bband_l_"+str(n)] = ta.BBANDS(inputs, n)
				inputs["sma_"+str(n)] = ta.SMA(inputs, timeperiod = n)
				inputs["adx_"+str(n)] = ta.ADX(inputs, timeperiod = n)
				inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = ta.MACD(inputs, n, n*2, n*2/3)
				inputs["mfi_"+str(n)] = ta.MFI(inputs, n)
				inputs["ult_"+str(n)] = ta.ULTOSC(inputs, n, n*2, n*4)
				inputs["willr_"+str(n)] = ta.WILLR(inputs, n)
				inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs)
				inputs["mom_"+str(n)] = ta.MOM(inputs, n)
				inputs["mom_"+str(n)] = ta.MOM(inputs, n)

			inputs["volume"] = list(map(lambda x: x/10000, inputs["volume"]))

			sec_df = pd.DataFrame().from_dict(inputs)
			# print(sec_df.isnull().any(axis=1))
			for idx, val in sec_df.isnull().any(axis=1).iteritems():
				if val == True:
					# print(idx, val)
					sec_df.drop(idx, inplace = True)
					sec_Y.drop(idx, inplace = True)
					price.pop(idx)

			prices.append(price)


			df = pd.concat([df, sec_df])
			Y = pd.concat([Y, sec_Y])

		prices = [j for i in prices for j in i]	# spooky magic

		''' BUILD NEURAL NET INPUTS '''
		Y = np.vstack(Y.values)
		X = df.values


		if not raw:
			scaler = prep.StandardScaler().fit(X)
			X_norm = scaler.transform(X)
			from sklearn.externals import joblib
			joblib.dump(scaler, "./stock_data/" + sec + ".scaler")
		else:
			X_norm = X

		trX, testX, trY, testY= train_test_split(X_norm, Y, test_size = 0.1, random_state=0)
		# print("Pickling...")
		output = {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price}
		pickle.dump(output, open("./stock_data/" + (PICKLE_NAME if not raw else PICKLE_NAME + "_raw") + "_data.pickle", "wb"))
		return output

	else:
		print("Pickle found, loading...")
		_data = pickle.load(open("./stock_data/" + PICKLE_NAME + "_data.pickle", "rb"))
		trX, trY, testX, testY, price, X_norm, Y = _data["trX"], _data["trY"], _data["testX"], _data["testY"], _data["price"], _data["X_norm"], _data["Y"]
		return {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price}
예제 #10
0
def _build_indicators(data):

	# sliding_window = []

	while True:
		data_with_ind = []

		for df in data:
			df = copy.deepcopy(df)

			if "Adj. Close" in df.columns:
				df = df[["Adj. Open",  "Adj. High",  "Adj. Low",  "Adj. Close", "Adj. Volume"]]
				df.rename(columns=lambda x: x[5:].lower(), inplace=True)    # Remove the "Adj. " and make lowercase
			elif "Close" in df.columns:
				df = df[["Open",  "High",  "Low",  "Close", "Volume"]]
				df.rename(columns=lambda x: x.lower(), inplace=True)    # make lowercase

			df.reset_index(drop=True, inplace = True)

			inputs = df.to_dict(orient="list")
			for col in inputs:
				inputs[col] = np.array(inputs[col])

			for n in range(2, 40):
				inputs["bband_u_"+str(n)], inputs["bband_m_"+str(n)], inputs["bband_l_"+str(n)] = ta.BBANDS(inputs, n)
				inputs["sma_"+str(n)] = ta.SMA(inputs, timeperiod = n)
				inputs["adx_"+str(n)] = ta.ADX(inputs, timeperiod = n)
				inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = ta.MACD(inputs, n, n*2, n*2/3)
				inputs["mfi_"+str(n)] = ta.MFI(inputs, n)
				inputs["ult_"+str(n)] = ta.ULTOSC(inputs, n, n*2, n*4)
				inputs["willr_"+str(n)] = ta.WILLR(inputs, n)
				inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs)
				inputs["mom_"+str(n)] = ta.MOM(inputs, n)
				inputs["mom_"+str(n)] = ta.MOM(inputs, n)

			inputs["volume"] = list(map(lambda x: x/10000, inputs["volume"]))

			df = pd.DataFrame().from_dict(inputs)

			price = df["close"].values
			if isinstance(price, np.ndarray):
				price = price.tolist()


			for idx, val in df.isnull().any(axis=1).iteritems():
				if val == True:
					df.drop(idx, inplace = True)
					try:
						price[idx] = None
					except IndexError:	#drop the security
						print("Error, failed to drop price on index", idx)
						sys.exit(1)
					# print("Dropped index:", idx)

			for i, p in reversed(list(enumerate(price))):
				actual_idx = len(price) - 1 - i
				if p == None:
					price.pop(actual_idx)

			print(df["adx_10"])
			X = df.values


			data_with_ind += [{"data": X, "price": price}]

		return data_with_ind
예제 #11
0
def build_data(raw = False, random_split = True, start_date = None, end_date = None, test_proportion = 0.1):
	# if len(sec) == 1 and os.path.isfile(secs[0]):	#it's a file
	# 	with open(secs[0]) as f:
	# 		secs = ["WIKI/" + line.strip() for line in f]

	# print("SECURITIES: ", s[5:] for s in secs)

	with open("stock_data/invalid_stocks.txt", "r+") as f:
		invalid_stock_codes = [line.strip() for line in f]
	f = open("stock_data/invalid_stocks.txt", "a")

	stock_code = yield

	while True and stock_code is not None:
		valid_stock = False
		while not valid_stock:
			if "." in stock_code:
				stock_code = yield None
				continue
			if stock_code in invalid_stock_codes:
				# print("Skipping security", sec)
				stock_code = yield None
				continue
			valid_stock = True

		sec = stock_code.split("/")[1]	# Just the ticker, not the database code

		pickle_name = sec
		if raw:
			pickle_name += "_raw"
		if not random_split:
			pickle_name += "_notrand"

		if start_date and end_date:
			pickle_name += start_date + "to" + end_date
		elif start_date:
			pickle_name += start_date
		elif end_date:
			pickle_name += "to" + end_date

		if not os.path.isfile("./stock_data/" + pickle_name + "_data.pickle"):
			# print("No pickle found, getting data for", sec)
			try:
				# print("Getting data for", stock_code)
				df = quandl.get(stock_code, start_date = start_date, end_date = end_date)

			except quandl.errors.quandl_error.NotFoundError:
				invalid_stock_codes += [stock_code]
				f.write(stock_code + "\n")
				stock_code = yield None
				continue

			if "Adj. Close" in df.columns:
				df = df[["Adj. Open",  "Adj. High",  "Adj. Low",  "Adj. Close", "Adj. Volume"]]
				df.rename(columns=lambda x: x[5:].lower(), inplace=True)    # Remove the "Adj. " and make lowercase
			elif "Close" in df.columns:
				df = df[["Open",  "High",  "Low",  "Close", "Volume"]]
				df.rename(columns=lambda x: x.lower(), inplace=True)    # make lowercase

			price = df['close'].values
			minIdxs = argrelextrema(price, np.less)
			maxIdxs = argrelextrema(price, np.greater)


			Y = pd.Series(name="signal", dtype=np.ndarray, index=range(0, len(price)))
			n=0
			for _, idx in np.ndenumerate(minIdxs):
				# if idx < MIN_MAX_PERIOD: continue
				max_price = max(price[idx: idx + MIN_MAX_PERIOD])
				if ((max_price - price[idx]) / price[idx]) > HI_LO_DIFF:    #if the difference between max and min is > X%
					Y.set_value(idx, np.array([1., 0.], np.float32))
					n+=1

			# print("MINS:", n)
			n=0
			for _, idx in np.ndenumerate(maxIdxs):
				# if idx < MIN_MAX_PERIOD: continue
				min_price = min(price[idx: idx + MIN_MAX_PERIOD])
				if ((price[idx] - min_price)/ min_price) > HI_LO_DIFF:  #if the difference between max and min is > X%
					Y.set_value(idx, np.array([0., 1.], np.float32))
					n+=1

			# print("MAXS:", n)
			_min_idx, _max_idx = 0, 0
			for i, y in np.ndenumerate(Y.values):
				if np.array_equal(y, [1., 0.]):
					_min_idx = i[0]
				elif np.array_equal(y, [0., 1.]):
					_max_idx = i[0]
				else:
					if _min_idx > _max_idx:
						s =  np.array([1., 0.])
					elif _max_idx > _min_idx:
						s =  np.array([0., 1.])
					else:
						s = np.array([0., 0.]) 	# no action taken, only occurs at the beginnings of datasets, afaik

					Y.set_value(i, s, np.float32)

			# x = list(zip(price[0:50], Y.values[0:50]))
			# for i in x:
			# 	print("{0:.2f} -- {1}".format(i[0], "sell" if np.array_equal(i[1], [0, 1]) else "buy" if np.array_equal(i[1], [1, 0]) else "nothing"))

			df.reset_index(drop=True, inplace = True)
			if isinstance(price, np.ndarray):
				price = price.tolist()

			''' INDICATORS '''
			# print(len(df), len(Y))
			# print("Building indicators...")
			inputs = df.to_dict(orient="list")
			for col in inputs:
				inputs[col] = np.array(inputs[col])

			for n in range(2, 40):
				inputs["bband_u_"+str(n)], inputs["bband_m_"+str(n)], inputs["bband_l_"+str(n)] = ta.BBANDS(inputs, n)
				inputs["sma_"+str(n)] = ta.SMA(inputs, timeperiod = n)
				inputs["adx_"+str(n)] = ta.ADX(inputs, timeperiod = n)
				inputs["macd_"+str(n)], inputs["macdsignal_"+str(n)], inputs["macdhist_"+str(n)] = ta.MACD(inputs, n, n*2, n*2/3)
				inputs["mfi_"+str(n)] = ta.MFI(inputs, n)
				inputs["ult_"+str(n)] = ta.ULTOSC(inputs, n, n*2, n*4)
				inputs["willr_"+str(n)] = ta.WILLR(inputs, n)
				inputs["slowk"], inputs["slowd"] = ta.STOCH(inputs)
				inputs["mom_"+str(n)] = ta.MOM(inputs, n)
				inputs["mom_"+str(n)] = ta.MOM(inputs, n)

			inputs["volume"] = list(map(lambda x: x/10000, inputs["volume"]))

			df = pd.DataFrame().from_dict(inputs)
			broken = False

			for idx, val in reversed(list(df.isnull().any(axis=1).iteritems())):
				if val == True:
					# print(actual_idx, val)
					df.drop(idx, inplace = True)
					Y.drop(idx, inplace = True)
					try:
						# price[actual_idx] = None
						price.pop(idx)
					except IndexError:	#drop the security
						# print("Error, dropping security", sec)
						broken = True
						break

			# print(list(df.isnull().any(axis=1).iteritems()))
			# print("PRICES", price)

			# print(len(price), len(df.values))

			# for i, p in reversed(list(enumerate(price))):
			# 	actual_idx = len(price) - 1 - i
			# 	if p is None:
			# 		print(actual_idx)
			# 		price.pop(actual_idx)

			''' BUILD NEURAL NET INPUTS '''
			if not broken:
				Y = np.vstack(Y.values)
				print(df["adx_10"])
				X = df.values
				# print(X[0:2])

				if not raw:
					rand = "_notrand" if not random_split else ""

					if not os.path.isfile("./stock_data/" + sec + rand + ".scaler"):
						scaler = prep.StandardScaler().fit(X)
						X_norm = scaler.transform(X)
						joblib.dump(scaler, "./stock_data/" + sec + rand + ".scaler")
					else:
						scaler = joblib.load("./stock_data/" + sec + rand + ".scaler")
						X_norm = scaler.transform(X)

				else:
					X_norm = X

				if random_split:
					trX, testX, trY, testY = train_test_split(X_norm, Y, test_size = test_proportion, random_state=0)

				else: 		# just clips the test data off the end
					l = len(X_norm)
					trX, testX = X_norm[:int(-test_proportion*l)], X_norm[int(-test_proportion*l):]
					trY, testY = Y[:int(-test_proportion*l)], Y[int(-test_proportion*l):]

				# print("Pickling...")

				output = {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price}
				pickle.dump(output, open("./stock_data/" + pickle_name + "_data.pickle", "wb"))
				stock_code = yield output

			else:
				invalid_stock_codes += [stock_code]
				f.write(stock_code + "\n")
				stock_code = yield None


		else:
			# print("Pickle found, loading...")

			_data = pickle.load(open("./stock_data/" + pickle_name + "_data.pickle", "rb"))
			trX, trY, testX, testY, price, X_norm, Y = _data["trX"], _data["trY"], _data["testX"], _data["testY"], _data["price"], _data["X_norm"], _data["Y"]
			stock_code = yield {"X_norm": X_norm, "Y": Y, "trX": trX, "trY": trY, "testX": testX, "testY": testY, "price": price}
예제 #12
0
    def WILLR(self):  #7
        willr = abstract.WILLR(self.company_stock, timeperiod=14)

        self.company_stock['WILLR'] = willr