def add_features(df: pd.DataFrame, add_all=False, drop_times=False, sma="20,50,200", ema="20,50"): # df = df.drop("vl_incr", 1) df['h'] = pd.to_numeric(df['datetime'].str[-9:-7]) df['min'] = pd.to_numeric(df['datetime'].str[-6:-4]) df['year'] = pd.to_numeric(df['datetime'].str[0:4]) for i in str(sma).split(","): df['sma_' + str(i)] = df['last'].rolling(int(i)).mean() # if ma: # df['sma_20'] = df['last'].rolling(20).mean() # df['sma_50'] = df['last'].rolling(50).mean() # df['sma_200'] = df['last'].rolling(200).mean() for i in str(ema).split(","): df['ema_' + str(i)] = df['last'].ewm(span=int(i)).mean() if drop_times: df = df.drop("datetime", 1) # df = df.drop("date", 1) # df = df.drop("timestamp", 1) if add_all: df = df.astype('float32') ta.add_all_ta_features(df, open="open", high='hi', low='lo', close='last', volume='vl', fillna=False) return df
def test_general(self): # Clean nan values df = ta.utils.dropna(self._df) # Add all ta features filling nans values ta.add_all_ta_features(df=df, open="Open", high="High", low="Low", close="Close", volume="Volume_BTC", fillna=True) # Add all ta features not filling nans values ta.add_all_ta_features(df=df, open="Open", high="High", low="Low", close="Close", volume="Volume_BTC", fillna=False) # Check added ta features are all numerical values after filling nans input_cols = self._df.columns df_with_ta = ta.add_all_ta_features(df=df, open="Open", high="High", low="Low", close="Close", volume="Volume_BTC", fillna=True) ta_cols = [c for c in df_with_ta.columns if c not in input_cols] assert df_with_ta[ta_cols].apply(lambda series: pd.to_numeric( series, errors='coerce')).notnull().all().all()
def test_runs_with_external_feed_only(portfolio): df = pd.read_csv("tests/data/input/bitfinex_(BTC,ETH)USD_d.csv").tail(100) df = df.rename({"Unnamed: 0": "date"}, axis=1) df = df.set_index("date") bitfinex_btc = df.loc[:, [name.startswith("BTC") for name in df.columns]] bitfinex_eth = df.loc[:, [name.startswith("ETH") for name in df.columns]] ta.add_all_ta_features( bitfinex_btc, colprefix="BTC:", **{k: "BTC:" + k for k in ['open', 'high', 'low', 'close', 'volume']}) ta.add_all_ta_features( bitfinex_eth, colprefix="ETH:", **{k: "ETH:" + k for k in ['open', 'high', 'low', 'close', 'volume']}) streams = [] with NameSpace("bitfinex"): for name in bitfinex_btc.columns: streams += [ Stream.source(list(bitfinex_btc[name]), dtype="float").rename(name) ] for name in bitfinex_eth.columns: streams += [ Stream.source(list(bitfinex_eth[name]), dtype="float").rename(name) ] feed = DataFeed(streams) action_scheme = ManagedRiskOrders() reward_scheme = SimpleProfit() env = default.create(portfolio=portfolio, action_scheme=action_scheme, reward_scheme=reward_scheme, feed=feed, window_size=50, enable_logger=False) done = False obs = env.reset() while not done: action = env.action_space.sample() obs, reward, done, info = env.step(action) assert obs.shape[0] == 50
def test_runs_with__external_feed_only(portfolio): df = pd.read_csv("tests/data/input/coinbase_(BTC,ETH)USD_d.csv").tail(100) df = df.rename({"Unnamed: 0": "date"}, axis=1) df = df.set_index("date") coinbase_btc = df.loc[:, [name.startswith("BTC") for name in df.columns]] coinbase_eth = df.loc[:, [name.startswith("ETH") for name in df.columns]] ta.add_all_ta_features( coinbase_btc, colprefix="BTC:", **{k: "BTC:" + k for k in ['open', 'high', 'low', 'close', 'volume']} ) ta.add_all_ta_features( coinbase_eth, colprefix="ETH:", **{k: "ETH:" + k for k in ['open', 'high', 'low', 'close', 'volume']} ) nodes = [] with Module("coinbase") as coinbase: for name in coinbase_btc.columns: nodes += [Stream(name, list(coinbase_btc[name]))] for name in coinbase_eth.columns: nodes += [Stream(name, list(coinbase_eth[name]))] feed = DataFeed()(coinbase) action_scheme = ManagedRiskOrders() reward_scheme = SimpleProfit() env = TradingEnvironment( portfolio=portfolio, action_scheme=action_scheme, reward_scheme=reward_scheme, feed=feed, window_size=50, use_internal=False, enable_logger=False ) done = False obs = env.reset() while not done: action = env.action_space.sample() obs, reward, done, info = env.step(action) n_features = coinbase_btc.shape[1] + coinbase_eth.shape[1] assert obs.shape == (50, n_features)
def delete_row(self): # Instantiating a Workbook object by excel file path workbook = self.Workbook(self.dataDir + 'Book1.xls') # Accessing the first worksheet in the Excel file worksheet = workbook.getWorksheets().get(0) # Deleting 3rd row from the worksheet worksheet.getCells().deleteRows(2, 1, True) # Saving the modified Excel file in default (that is Excel 2003) format workbook.save(self.dataDir + "Delete Row.xls") print "Delete Row Successfully." # Clean nan values df = ta.utils.dropna(df) # Add all ta features filling nans values df = ta.add_all_ta_features(df, "Open", "High", "Low", "Close", "Volume_BTC", fillna=True) ###################################################################### df['Signal'] = 0 sell = [] buy = [] date_sell = [] date_buy = [] indicators = ['trend_psar'] for indicator in indicators: for y in range(10, len(df.index)): if df[indicator].iloc[y] <= df['Close'].iloc[y] and (df[indicator].iloc[y - 1] > df['Close'].iloc[y - 1]): first_buy_signal = y print(first_buy_signal) break for x in range(first_buy_signal - 1, len(df.index)): if df[indicator].iloc[x] >= df['Close'].iloc[x] and (df[indicator].iloc[x - 1] < df['Close'].iloc[x - 1]): df['Signal'].iloc[x] = 'Sell' sell.append(df['Close'].iloc[x]) date_sell.append(df['Date'].iloc[x]) elif df[indicator].iloc[x] <= df['Close'].iloc[x] and (df[indicator].iloc[x - 1] > df['Close'].iloc[x - 1]): df['Signal'].iloc[x] = 'Buy' buy.append(df['Close'].iloc[x]) date_buy.append(df['Date'].iloc[x]) sell.append(0) date_sell.append(0) profits = pd.DataFrame() profits['Buy'] = buy profits['Buy Date'] = date_buy profits['Sell'] = sell profits['Sell Date'] = date_sell profits['Profits'] = ((profits['Sell'] - profits['Buy']) / profits['Sell']) * 100 profits.drop(profits.tail(1).index, inplace=True) # drop last n rows sum(profits['Profits']) indicators_value.append(sum(profits['Profits'])) ##################################################################### tik = df.iloc[0]['TICKER'] ticker_name.append(tik)
def add_indicators(df): print("Adding technical indicators") df = ta.add_all_ta_features(df, "1. open", "2. high", "3. low", "4. close", "6. volume", fillna=True) # df['100ma'] = df['5. adjusted close'].rolling(window=100).mean() # df['9ema'] = df['5. adjusted close'].ewm(span=9, adjust=False).mean() # df['12ema'] = df['5. adjusted close'].ewm(span=12, adjust=False).mean() # df['26ema'] = df['5. adjusted close'].ewm(span=26, adjust=False).mean() # df['macd'] = df['12ema'] - df['26ema'] # previousRow = 0 # for index, row in df.iterrows(): # df.loc[index, 'macd_relChange'] = abs(row.macd-previousRow)*100 # previousRow = row.macd # # Add bollinger band high indicator filling Nans values # df['bb_high_indicator'] = ta.bollinger_hband_indicator(df["close"], n=20, ndev=2, fillna=True) # # Add bollinger band low indicator filling Nans values # df['bb_low_indicator'] = ta.bollinger_lband_indicator(df["close"], n=20, ndev=2, fillna=True) # # Add bollinger band high # df['bb_high'] = ta.bollinger_hband(df["close"], n=20, ndev=2, fillna=True) # # Add bolling band low # df['bb_low'] = ta.bollinger_lband(df["close"], n=20, ndev=2, fillna=True) # # Get rid of infinite changes # df = df.replace([np.inf, -np.inf], np.nan) # # Replace NaN with 0 # df.fillna(0, inplace=True) return df
def PCA_TA(df): openval = _PCA_TA(df, 'open') closeval = _PCA_TA(df, 'close') askval = _PCA_TA(df, 'ask') bidval = _PCA_TA(df, 'bid') import random News_countval = [random.choice(df['News_count']) for i in range(181)] newdf = pd.DataFrame({ 'open': openval, 'close': closeval, 'bid': bidval, 'ask': askval, 'News_count': News_countval }) df = df[["open", "ask", "bid", "close", "News_count"]] df = pd.concat([df, newdf], ignore_index=True) df = ta.add_all_ta_features(df, "open", "ask", "bid", "close", "News_count", fillna=True) return df
def _download_kline_interval(symbol, start_date, end_date, candlestick_interval, config_path): #read in the config config = read_config(path=config_path) #create the client client = Client(api_key=config["binance"]["key"], api_secret=config["binance"]["secret"]) #download the data and safe it in a dataframe print(f"Downloading {candlestick_interval} klines...") raw_data = client.get_historical_klines(symbol=symbol, interval=candlestick_interval, start_str=start_date, end_str=end_date) data = pd.DataFrame(raw_data) #clean the dataframe data = data.astype(float) data.drop(data.columns[[7, 8, 9, 10, 11]], axis=1, inplace=True) data.rename(columns={ 0: 'open_time', 1: 'open', 2: 'high', 3: 'low', 4: 'close', 5: 'volume', 6: 'close_time' }, inplace=True) #set the correct times data['close_time'] += 1 data['close_time'] = pd.to_datetime(data['close_time'], unit='ms') data['open_time'] = pd.to_datetime(data['open_time'], unit='ms') #check for nan values if data.isna().values.any(): raise Exception( "Nan values in data, please discard this object and try again") #add the technical analysis data with warnings.catch_warnings(): warnings.filterwarnings("ignore") data = ta.add_all_ta_features(data, open='open', high="high", low="low", close="close", volume="volume", fillna=True) #drop first 60 rows data = data.iloc[60:] #reset the index data.reset_index(inplace=True, drop=True) return data
def calculate_indicators(product: Product, **kwargs): try: days = 60 better_start_date = fucking_date.now() - datetime.timedelta(days) better_end_date = fucking_date.now() shortened_better_data = get_data_for_timespan(better_start_date, better_end_date, product) # IMPORTANT TODO CALCULATE DATA FOR EVERY POSSIBLE RELATION IN A 3DIMENSIONAL ARRAY7 complete_data = pd.DataFrame(shortened_better_data, columns=[ "Product", "Timestamp", "Open", "High", "Low", "Close", "Volume" ]) all_indicators = add_all_ta_features(complete_data, open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=True) product.calculated_indicators = all_indicators product.rsi = all_indicators["momentum_rsi"].values[-1] except Exception as e: pass
def engineer_features(df, period='5T'): """Takes a df, engineers ta features, and returns a df default period=['5T']""" # convert unix closing_time to datetime df['date'] = pd.to_datetime(df['closing_time'], unit='s') # time resampling to fill gaps in data df = resample_ohlcv(df, period) # move date off the index df = df.reset_index() # create closing_time closing_time = df.date.values df.drop(columns='date', inplace=True) # create feature to indicate where rows were gaps in data df['nan_ohlcv'] = df['close'].apply(lambda x: 1 if pd.isnull(x) else 0) # fill gaps in data df = fill_nan(df) # adding all the technical analysis features... df = add_all_ta_features(df, 'open', 'high', 'low', 'close', 'base_volume', fillna=True) # add closing time column df['closing_time'] = closing_time return df
def get_candles(symbol, tf): global binance, contracts candles_raw = binance.get_historical_candles(contracts[symbol], tf) candles = {} candles['close'] = [candle.data_dict['close'] for candle in candles_raw] candles['open'] = [candle.data_dict['open'] for candle in candles_raw] candles['high'] = [candle.data_dict['high'] for candle in candles_raw] candles['low'] = [candle.data_dict['low'] for candle in candles_raw] candles['volume'] = [candle.data_dict['volume'] for candle in candles_raw] candles['timestamp'] = [ candle.data_dict['timestamp'] for candle in candles_raw ] df = pd.DataFrame(data=candles) df = ta.add_all_ta_features(df, "open", "high", "low", "close", "volume", fillna=True) return {'candles': candles, 'df': df.to_dict()}
def feature_engineer(path): # import csv and drop the Unnamed:0 column df = pd.read_csv(path, index_col=0)[::-1][-60:] # add close_diff feature df['close_diff'] = df['close'] - df['close'].shift(1) # engineer all ta features from ta library df = add_all_ta_features(df, "open", "high", "low", "close", "volume", fillna=True)[-1:] # get time of prediction prediction_time = df.time.values prediction_time = datetime.datetime.fromtimestamp( prediction_time).strftime('%Y-%m-%d %H:%M:%S') # drop null columns and time drop_columns = [ 'volume_obv', 'trend_adx', 'trend_adx_pos', 'trend_adx_neg', 'trend_trix', 'time' ] df.drop(columns=drop_columns, inplace=True) return [df, prediction_time]
def _feature_engineering(df, volume=True): if volume: df = ta.add_all_ta_features(df, open="Open", high="High", low="Low", close="Last", volume="Volume", fillna=False) else: df = ta.add_momentum_ta(df, high="High", low="Low", close="Last", volume="Volume", fillna=False) df = ta.add_volatility_ta(df, high="High", low="Low", close="Last", fillna=False) df = ta.add_trend_ta(df, high="High", low="Low", close="Last", fillna=False) df = ta.add_others_ta(df, close="Last", fillna=False) df["trend_psar_up"] = df["trend_psar_up"].fillna(0.0) df["trend_psar_down"] = df["trend_psar_down"].fillna(0.0) return df
def pad_history(): full_resampled = resampled_df.append(df_mid, sort=False) a = pd.DataFrame([full_resampled.iloc[0] for j in range(30+1-len(full_resampled))]) a = a.append(full_resampled, sort=False) a.index = pd.date_range(start=df_mid.index[-1], periods=len(a), freq='-15Min').sort_values() df_mid_ta = ta.add_all_ta_features(a, "open", "high", "low", "close", "vol", fillna=True) return df_mid_ta
def get_state(self, device="cpu"): #get the data data = self.data.copy() #add the technical analysis data with warnings.catch_warnings(): warnings.filterwarnings("ignore") data = ta.add_all_ta_features(data, open='open', high="high", low="low", close="close", volume="volume", fillna=True) #select the features data = data[self.HP.features] #prep the data (data is now a numpy array) data, _ = TrainDataBase._raw_data_prep(data=data, derive=self.HP.derivation, scaling_method=self.HP.scaling, preloaded_scaler=self.scaler, scaler_type=self.HP.scaler_type) #get correct size data = data.iloc[-self.HP.window_size:, :] #convert to pytorch tensor and move to device data = torch.tensor(data.to_numpy(), device=device) #add the batch dimension data = data.unsqueeze(dim=0) return data
def transform(self, X, **transform_params): X = ta.add_all_ta_features(df=X, open=self._open_column, high=self._high_column, low=self._low_column, close=self._close_column, volume=self._volume_column, fillna=self._fillna, colprefix=self._colprefix) return X.values
def add_ta_features2(df, ta_settings): """Add technial analysis features from typical financial dataset that typically include columns such as "open", "high", "low", "price" and "volume". http://github.com/bukosabino/ta Args: df(pandas.DataFrame): original DataFrame. ta_settings(dict): configuration. Returns: pandas.DataFrame: DataFrame with new features included. """ if ta_settings: # Add ta features filling NaN values df = add_all_ta_features(df, "open", "high", "low", "price", "volume", fillna=True) return df
def addAllTechnicalIndicators(df): df = df.copy() assert all([ a == b for a, b in zip(df.columns, ['open', 'high', 'low', 'close', 'volume']) ]), "Columns must be open, high, low, close, volume" df = ta.add_all_ta_features(df, open="open", high="high", low="low", close="close", volume="volume") df['ao'] = pandas_ta.ao(df['high'], df['low'], fast=5, slow=34) df['apo'] = pandas_ta.apo(df['close'], fast=12, slow=26) df['bop'] = pandas_ta.bop(df['open'], df['high'], df['low'], df['close']) df['cg'] = pandas_ta.cg(df['close'], length=10) df['fwma'] = pandas_ta.fwma(df['close'], length=10) df['kurtosis'] = pandas_ta.kurtosis(df['close'], length=30) return df
def preprocess_dataset(df, indicators=[ 'trend_sma_fast', 'trend_ema_fast', 'trend_macd', 'momentum_roc', 'volatility_bbh', 'volatility_bbl', 'volatility_bbp', 'momentum_stoch', 'momentum_stoch_signal' ]): data = df.copy() tmp = df.copy() data = set_up_down(data) #tmp = ta.add_all_ta_features(tmp, open="Open", high="High", low="Low", close="Close", volume="Volume",fillna=True) data = ta.add_all_ta_features(data, open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=True) #data=data[['Date','Close','NextDayUp']] #data[indicators] = tmp[indicators] data = data.drop(df.head(30).index) return data
def ta_test(): ticker_list = [ 'aapl', 'amzn', 'msft', 'amd', 'nvda', 'goog', 'baba', 'fitb', 'mu', 'fb', 'sq', 'tsm', 'qcom', 'mo', 'bp', 'unh', 'cvs', 'tpr' ] data = {} conn = connect() timestamp1 = datetime(2008, 1, 1) timestamp2 = datetime(2030, 1, 1) for i in ticker_list: data[i] = get_data_interval(conn, 'data_daily_{}'.format(i), timestamp1, timestamp2, pandas=True) data[i] = ta.add_all_ta_features(data[i], open='open', high='high', low='low', close='close', volume='volume') print(data[i]) train, test, label_train, label_test = process_data(data) print_distribution(train, label_train) print_distribution(test, label_test) train_cnn(cnn(input_shape=(30, 4), num_classes=num_classes), train, label_train, test, label_test)
def initHistory(share_name): """ This function generates a history from a share name Parameters ---------- share_name: str Share name in yahoo format Returns ------- A pandas with last share values and TA """ df = __getLastPrices(share_name, HISTORY_LENGTH) if df is not None: df = ta.add_all_ta_features(df, "open", "high", "low", "close", "volume", fillna=True) for day in PREDICTIONS_DAYS: column_name = PREDICTION_PREFIX + str(day) df[column_name] = np.nan for i in range(0, len(df)): makePredictions(df, i) return df
def ta_xtrain_def(self): # op = self.df['open'] # hi = self.df['high'] # lo = self.df['low'] # cl = self.df['close'] # self.talibdf= self.df.drop(['date'],axis=1) # candle_names = talib.get_function_groups()['Pattern Recognition'] # for candle in candle_names: # self.talibdf[candle] = getattr(talib, candle)(op, hi, lo, cl) df = self.df df = df.drop(['date'], axis=1) self.talibdf = add_all_ta_features(df, open="open", high="high", low="low", close="close", volume="volume").fillna(0) training_data_len = math.ceil(len(self.talibdf) * .99) x_train = [] y_train = [] train_data = np.array(self.talibdf)[0:training_data_len, :] for i in range(60, len(train_data) - 61): x_train.append(train_data[i - 60:i, :]) y_train.append(train_data[i:i + 60, 0]) self.ta_x_train, self.ta_y_train = np.array(x_train), np.array(y_train) # x_train=np.array(x_train) # x_train=x_train.reshape([x_train.shape[0],x_train.shape[1]*x_train.shape[2]]) # y_train=np.array(y_train) # self.dxtrain=xgb.DMatrix(x_train) # self.dytrain=xgb.DMatrix(y_train) # self.dtrain = xgb.DMatrix(x_train, label=y_train) ######################################################################################## test_data = np.array(self.talibdf)[training_data_len - 60:, :] x_test = [] y_test = [] for i in range(60, len(test_data) - 61): x_test.append(test_data[i - 60:i, :]) y_test.append(test_data[i:i + 60, 0]) self.ta_x_test, self.ta_y_test = np.array(x_test), np.array(y_test) # x_test=np.array(x_test) # x_test=x_test.reshape([x_test.shape[0],x_test.shape[1]*x_test.shape[2]]) # y_test=np.array(y_test) # self.dxtest=xgb.DMatrix(x_test) # self.dytest=xgb.DMatrix(y_test) # self.dtest = xgb.DMatrix(x_test, label=y_test) print(self.name + ' : ta_x_train shape : ', str(self.ta_x_train.shape)) print(self.name + ' : ta_y_train shape : ', str(self.ta_y_train.shape)) print(self.name + ' : ta_x_test shape : ', str(self.ta_x_test.shape)) print(self.name + ' : ta_y_test shape : ', str(self.ta_y_test.shape)) print('ta_xtrain_def end............')
def add_all_indicators(self): for frame in self._stock_prices: frame.data = add_all_ta_features(frame.data, 'open', 'high', 'low', 'close', 'volume') self._current_indicators[frame.symbol] = {} self._current_indicators[ frame.symbol]['indicators'] = frame.data.iloc[:, 6:]
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: # Add all ta features # dataframe = dropna(dataframe) dataframe = add_all_ta_features( dataframe, open="open", high="high", low="low", close="close", volume="volume", fillna=False) # dataframe.to_csv("df.csv", index=True) print(metadata['pair']) return dataframe
def __init__(self, data): self.data = data self._features = ta.add_all_ta_features(self.data, open="Open", high="High", low="Low", close="Close", volume="Volume")
def add_technical_indicators(history): return add_all_ta_features( history, open="Open", high="High", low="Low", close="Close", volume="Volume") # Substantiate data with momentum indicators
def add_technical_indicators(raw_data): return ta.add_all_ta_features(raw_data, "Open", "High", "Low", "Close", "Volume", fillna=True)
def tech(df): import ta return ta.add_all_ta_features(df, open="open", high="high", low="low", close="close", volume="volume")
def add_all_ta(self, df): df_ta = ta.add_all_ta_features(df, open='open', high='high', low='low', close='close', volume='volume', fillna=True) return df_ta
def add_ta_features(df): dd = add_all_ta_features(df, open="open", high="high", low="low", close="close", volume="volume") df = dd.copy() return df