class General: def __init__(self, is_it_for_real=False): self.iv_sec = { '1m': 60, '3m': 60 * 3, '5m': 60 * 5, '15m': 60 * 15, '30m': 60 * 30, '1h': 60 * 60, '2h': 60 * 60 * 2, '4h': 60 * 60 * 4, '8h': 60 * 60 * 8, '1d': 60 * 60 * 24, '3d': 60 * 60 * 24 * 3, '1w': 60 * 60 * 24 * 7 } if is_it_for_real: from binance.client import Client self.client = Client("", "") def wait_till(self, interval, advance=0, skip_till=None): lag = self.client.get_server_time()['serverTime'] / 1000 - time.time( ) + 0.5 #taka kalibracja, żeby serwer otrzymał nasz request ułamek sekundy przed rządaną przez nas chwilą print('Lag: ', round(lag, 3)) to_wait = (-time.time() - advance - lag) % self.iv_sec[interval] print(f"Candle ({interval}) closing in {round(to_wait,3)} seconds") if skip_till: if to_wait > skip_till: print("Skipping the wait") return to_wait time.sleep(to_wait) print( f"Server time: {datetime.fromtimestamp(self.client.get_server_time()['serverTime']/1000)}" ) return 0 def get_candles(self, symbol, interval, limit, market='spot'): if market == 'spot': candles = pd.DataFrame( self.client.get_klines(symbol=symbol + 'USDT', interval=interval, limit=limit + 1)) elif market == 'future' or market == 'futures': candles = pd.DataFrame( self.client.futures_klines(symbol=symbol + 'USDT', interval=interval, limit=limit + 1)) candles = candles[[0, 1, 2, 3, 4, 5]].astype(float) return candles
class MomentumStrategy: def __init__(self): self.client = Client(api_key=API_KEY, api_secret=API_SECRET) self.bsm_s = BinanceSocketManager(self.client) self.bsm_f = BinanceSocketManager(self.client) self.symbol = 'BTCUSDT' self.bet_size = 0.001 self.interval = '15m' self.data_limit = 200 self.lim1 = 0.3 self.lim2 = 0.8 self.last_signal = 0 self.column_names = [ 'open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore' ] self.position_open = False self.buy_position = False self.sell_position = False self.client.futures_change_leverage(symbol=self.symbol, leverage=10) self.conn_key_s = self.bsm_s.start_kline_socket( self.symbol, self.web_socket_handler, interval=self.interval) self.bsm_s.start() time.sleep(2) def web_socket_handler(self, data): self.is_kline_closed = data['k']['x'] if self.is_kline_closed: self.calculate_signal() while self.last_signal == self.signal: self.calculate_signal() self.last_signal = self.signal print(self.signal) if not self.position_open: #open order because kline is closed - runs just the first time if self.signal >= self.lim1 and self.signal <= self.lim2: self.position_open = True self.buy_position = True self.buy_bet_size = self.bet_size self.client.futures_create_order(side='BUY', quantity=self.bet_size, symbol=self.symbol, type='MARKET') elif self.signal <= -self.lim1 and self.signal >= -self.lim2: self.position_open = True self.sell_position = True self.sell_bet_size = self.bet_size self.client.futures_create_order(side='SELL', quantity=self.bet_size, symbol=self.symbol, type='MARKET') #close positions if were open because kline is closed else: if self.buy_position: self.position_open = False self.buy_position = False self.client.futures_create_order( side='SELL', quantity=self.buy_bet_size, symbol=self.symbol, type='MARKET') if self.sell_position: self.position_open = False self.sell_position = False self.client.futures_create_order( side='BUY', quantity=self.sell_bet_size, symbol=self.symbol, type='MARKET') #open order because kline is closed if self.signal >= self.lim1 and self.signal <= self.lim2: self.position_open = True self.buy_position = True self.buy_bet_size = self.bet_size self.client.futures_create_order(side='BUY', quantity=self.bet_size, symbol=self.symbol, type='MARKET') elif self.signal <= -self.lim1 and self.signal >= -self.lim2: self.position_open = True self.sell_position = True self.sell_bet_size = self.bet_size self.client.futures_create_order(side='SELL', quantity=self.bet_size, symbol=self.symbol, type='MARKET') def futures_socket(self, data): pass # self.ask = float(data['data']['a']) # self.bid = float(data['data']['b']) # self.price = (self.ask + self.bid)/2 def calculate_signal(self): df = self.client.futures_klines(symbol=self.symbol, interval=self.interval, limit=self.data_limit) df = pd.DataFrame(df, columns=self.column_names) df['close'] = pd.to_numeric(df['close']) nks = [4, 8, 16] nkl = [16, 24, 32] r1 = 12 r2 = 168 l = 198 for i in range(3): df[f'ema_s_{i}'] = EMA(df['close'], window=nks[i]).ema_indicator() df[f'ema_l_{i}'] = EMA(df['close'], window=nkl[i]).ema_indicator() df[f'x{i}'] = df[f'ema_s_{i}'] - df[f'ema_l_{i}'] df[f'y{i}'] = df[f'x{i}'] / np.std(df['close'][l - r1:l]) df[f'z{i}'] = df[f'y{i}'] / np.std(df[f'y{i}'][l - r2:l]) df[f'u{i}'] = (df[f'z{i}'] * np.exp(-(df[f'z{i}']**2) / 4)) / ( np.sqrt(2) * np.exp(-1 / 2)) df['signal'] = (df['u0'] + df['u1'] + df['u2']) / 3 self.signal = df['signal'][l] def strategy(self): pass
client = Client( api_key=config.binance['public_key'], api_secret=config.binance['secret_key'] ) symbol_list = ['BATUSDT', 'XRPUSDT', 'ZILUSDT'] BATUSDT_candlesticks = [] XRPUSDT_candlesticks = [] ZILUSDT_candlesticks = [] for symbol in symbol_list: candlesticks = client.futures_klines( symbol=symbol, interval=Client.KLINE_INTERVAL_1HOUR, start_str='Last 20 Days' ) if symbol=='BATUSDT': for data in candlesticks: candlestick = { 'time': round(data[0]/1000), 'open': data[1], 'high': data[2], 'low': data[3], 'close': data[4], 'volume': data[5] }
from binance.client import Client import pandas as pd from pprint import pprint cred = pd.read_csv("credentials.csv", index_col=0).loc["binance", :] client = Client(cred.api_key, cred.secret_key) SYMBOL = "BTCUSDT" ticker = client.futures_ticker() print(ticker) klines = client.futures_klines(symbol=SYMBOL, interval=Client.KLINE_INTERVAL_1DAY, startTime=1546473600000, endTime=1609632000000) # print(len(klines)) # pprint(klines) # [[1609459200000, '28948.19', '29668.86', '28627.12', '29337.16', '210716.398', 1609545599999, '6157505024.08511', 1511793, '101247.902', '2960175587.62208', '0'], [1609545600000, '29337.15', '33480.00', '28958.24', '32199.91', '545541.080', 1609631999999, '17122938614.70610', 3514545, '273388.463', '8578964529.70894', '0'], [1609632000000, '32198.41', '34832.25', '32000.02', '33054.53', '487486.989', 1609718399999, '16389111411.52760', 3325307, '238761.657', '8029365512.72767', '0']] fund_rate = client.futures_funding_rate(symbol=SYMBOL, startTime=1568102400001, endTime=1612469148000, limit=1000) # print(len(fund_rate)) pprint(fund_rate) # futures_klines랑 그냥 같은 데이터 같음 continous_fut = client.futures_continous_klines( pair=SYMBOL,
def get_futures_historical_klines(symbol, interval, start_str, end_str=None): """Get Historical Klines from Binance See dateparse docs for valid start and end string formats http://dateparser.readthedocs.io/en/latest/ If using offset strings for dates add "UTC" to date string e.g. "now UTC", "11 hours ago UTC" :param symbol: Name of symbol pair e.g BNBBTC :type symbol: str :param interval: Biannce Kline interval :type interval: str :param start_str: Start date string in UTC format :type start_str: str :param end_str: optional - end date string in UTC format :type end_str: str :return: list of OHLCV values """ # create the Binance client, no need for api key client = Client("", "") # init our list output_data = [] # setup the max limit limit = 500 # convert interval to useful value in seconds timeframe = interval_to_milliseconds(interval) # convert our date strings to milliseconds start_ts = date_to_milliseconds(start_str) # if an end time was passed convert it end_ts = None if end_str: end_ts = date_to_milliseconds(end_str) idx = 0 # it can be difficult to know when a symbol was listed on Binance so allow start time to be before list date symbol_existed = False while True: # fetch the klines from start_ts up to max 500 entries or the end_ts if set temp_data = client.futures_klines(symbol=symbol, interval=interval, limit=limit, startTime=start_ts, endTime=end_ts) # handle the case where our start date is before the symbol pair listed on Binance if not symbol_existed and len(temp_data): symbol_existed = True if symbol_existed: # append this loops data to our output data output_data += temp_data # update our start timestamp using the last value in the array and add the interval timeframe start_ts = temp_data[len(temp_data) - 1][0] + timeframe else: # it wasn't listed yet, increment our start date start_ts += timeframe idx += 1 # check if we received less than the required limit and exit the loop if len(temp_data) < limit: # exit the while loop break # sleep after every 3rd call to be kind to the API if idx % 3 == 0: time.sleep(1) return output_data
class Trader(): """ Purpose is to trade live on the Binance Crypto Exchange Attributes ----------- symbol: str client: Client object - From binance.py capital: float - represents how much money is in account leverage: float - how many multiples of your money you want to trade tf: int - timeframe df: pd.DataFrame - data that will be analyzed. Methods ---------- load_account set_leverage set_timeframe get_position get_necessary_data set_asset make_row load_existing_asset start_trading Please look at each method for descriptions """ def __init__(self): self.start = False self.client = None self.capital = None self.leverage = 1 / 1000 self.tf = None self.symbol = None self.loader = _DataLoader() self.df = None def check_status(self) -> bool: """ Reads a txt file to see whether user wants trading to be on or off :return boolean of True or False. True means trading is on, and false means it is off. """ local_path = os.path.dirname( os.path.dirname( os.path.abspath("__file__"))) + r"\local\trade_status.txt" status = pd.read_csv(local_path).set_index('index') self.start = bool(status.loc[0, 'status']) def load_account(self, additional_balance: int = 20000) -> str: """ Sign in to account using API_KEY and using Binance API """ local_path = os.path.dirname( os.path.dirname( os.path.abspath("__file__"))) + r"\local\binance_api.txt" info = pd.read_csv(local_path).set_index('Name') API_KEY = info.loc["API_KEY", "Key"] SECRET = info.loc["SECRET", "Key"] self.client = Client(API_KEY, SECRET) # Initializing how much money I have on the exchange. Sample 20000 has been added. self.capital = int( float(self.client.futures_account_balance()[0] ['balance'])) + additional_balance return "Welcome Haseab" def set_leverage(self, leverage: float) -> float: """Sets the current leverage of the account: should normally be 1. And 0.001 for testing purposes""" self.leverage = leverage return self.leverage def set_timeframe(self, tf: int) -> int: """Sets the timeframe of the trading data""" self.tf = tf return self.tf def get_necessary_data(self, symbol: str, tf: int, max_candles_needed: int) -> pd.DataFrame: """ Gets the minimum necessary data to trade this asset. Only a symbol and timeframe need to be inputted Note: This method is used as a way to tackle the 1000 candle limit that is currently on the Binance API. A discrete set of ~1000 group candles will be determined, and then the data will be extracted from each, using the _get_binance_futures_candles method, and then all of them will be merged together. Parameters: symbol: str - Symbol of price ticker Ex. "BTCUSDT", "ETHUSDT" tf: int - Timeframe wanted Ex. 1, 3, 5, 77, 100 max_candles_needed: int - Maximum candles needed in the desired timeframe Ex. 231, 770, 1440 :return pd.DataFrame of candlestick data """ now = time.time() df = pd.DataFrame() ranges = Helper.determine_candle_positions(max_candles_needed, tf) # Grabbing each set of about 1000 candles and appending them one after the other for i in range(len(ranges)): try: df = df.append( self.loader._get_binance_futures_candles( symbol, int(ranges[i]), int(ranges[i + 1]), now)) except IndexError: pass return df.drop_duplicates() def _update_data( self, diff: int, symbol: str, ) -> None: """ Used to update the trading data with the exchange data so that it is real time Parameters: ----------- diff: a number that explains how many minutes of disparity there is between current data and live data. :return dataframe with the most up-to-date data. """ # Calculating how many minutes to fetch from API minutes = math.floor(diff) + 1 # Getting minute candlestick data. Number of minute candlesticks represented by "minutes" variable last_price = self.loader._get_binance_futures_candles(symbol, minutes) # Adding a legible Datetime column and using the timestamp data to obtain datetime data last_price['Datetime'] = [ datetime.fromtimestamp(i / 1000) for i in last_price.index ] last_price = last_price[[ "Datetime", "Open", "High", "Low", "Close", "Volume" ]] # Abstracting minute data into appropriate tf last_price = self.loader._timeframe_setter(last_price, self.tf, 0) # Updating original data with new data. self.df = self.df.append(last_price).drop_duplicates() def get_position(self, symbol: str) -> float: """ Gets the total amount of current position for a given symbol Parameters: ------------ symbol: str Ex. "BTCUSDT" "ETHUSDT" Returns float Ex. If a 1.000 BTC position is open, it will return 1.0 """ return float([ i["positionAmt"] for i in self.client.futures_position_information() if i['symbol'] == symbol ][0]) def set_asset(self, symbol: str, tf: int) -> str: """ Set Symbol of the ticker and load the necessary data with the given timeframe to trade it Parameters: ------------ symbol: str Ex. "BTCUSDT", "ETHUSDT" :return str response """ self.symbol = symbol max_candles_needed = 231 # For Binance API purposes, 240 min needs to be inputted as "4h" on binance when fetching data map_tf = { 1: "1m", 3: "3m", 5: "5m", 15: "15m", 30: "30m", 60: "1h", 120: "2h", 240: "4h", 360: "6h", 480: "8h" } start_time = int((time.time() - self.tf * 235 * 60) * 1000) if tf in [1, 3, 5, 15, 30, 60, 120, 240, 360, 480]: # Note: 12H, 1D, 3D, 1W, 1M are also recognized # Fetching data from Binance if it matches the eligible timeframe, as it will be faster df = Helper.into_dataframe( self.client.futures_klines(symbol=symbol, interval=map_tf[tf], startTime=start_time)) df.drop(df.tail(1).index, inplace=True) else: # If it doesn't match Binance available timeframes, it must be transformed after fetching 1m data. df = self.get_necessary_data(symbol, tf, max_candles_needed) df = self.loader._timeframe_setter(df, tf) # Adding Datetime column for readability of the timestamp. Also more formatting done df['Datetime'] = [datetime.fromtimestamp(i / 1000) for i in df.index] df = df[["Datetime", "Open", "High", "Low", "Close", "Volume"]] df[["Open", "High", "Low", "Close", "Volume"]] = df[["Open", "High", "Low", "Close", "Volume"]].astype(float) self.df = df return f"Symbol changed to {self.symbol}" def make_row( self, high=[], low=[], volume=[], count: int = 0, open_price: float = None, open_date: str = None ) -> (pd.DataFrame, list, list, list, list, list): """ Helper function used to update the last row of a dataframe, using all of the data in the previous "last row" as input. This is used to update the price of the candlestick live, with the incomplete current candle constantly updating. Parameters: ----------- high: list Ex. [23348, 23350, 23335, 23330, 23339] low: list Ex. [23300, 23345, 23335, 23320, 23300] volume: list Ex. [47, 31, 110, 117, 2, 55] count: int Ex. 1,2,3 open_price: float Ex. 23342 open_date: str Ex. "2020-08-04 17:33:02" Returns tuple -> (pd.DataFrame, list, list, list, list, list) """ # row variable gets a pd.DataFrame of size 1. row = self.loader._get_binance_futures_candles("BTCUSDT", 1).reset_index() timestamp = row.loc[0, "Timestamp"] close = float(row.loc[0, "Close"]) high.append(float(row.loc[0, "High"])) low.append(float(row.loc[0, "Low"])) volume.append(float(row.loc[0, "Volume"])) # Initial values of a candle that only get set on the first iteration. if count == 0: open_price = row.loc[0, "Open"] open_date = timestamp # Adding to previous candlestick data of the last row by updating the row. dfrow = pd.DataFrame([[ open_date, datetime.fromtimestamp(open_date / 1000), open_price, max(high), min(low), close, sum(volume) ]], columns=[ "Timestamp", "Datetime", "Open", "High", "Low", "Close", "Volume" ]) dfrow[["Open", "High", "Low", "Close", "Volume"]] = dfrow[["Open", "High", "Low", "Close", "Volume"]].astype(float) dfrow = dfrow.set_index("Timestamp") return dfrow, high, low, volume, open_price, open_date def load__existing_asset(self, df: pd.DataFrame) -> pd.DataFrame: """Sets the trading data to an already existing dataframe, passed in as an argument""" self.df = df return self.df def start_trading(self, strategy: FabStrategy, executor, tf: int, sensitivity=0, debug=False) -> str: """ Starts the process of trading live. Each minute, the last row of the data is updated and Rule #2 is checked, otherwise, it waits till the end of the timeframe where it gets the latest data before it checks the rest of the rules. If it does see something following the rules, it will buy/short, given the initial parameters it has. (e.g. leverage, quantity) This process continues indefinetly, unless interrupted. Returns None. """ count, open_price = 0, 0 open_date = None high, low, volume = [], [], [] # Loading data into strategy, and creating moving averages. strategy.load_data(self.df) strategy.create_objects() # Checking to see whether trading is on or off. If it is off then it will not enter while loop. self.check_status() while self.start != False: diff = Helper.calculate_minute_disparity(self.df, tf) if round(time.time() % 60, 1) == 0 and diff <= tf: # Getting the most up-to-date row of the <tf>-min candlestick dfrow, high, low, volume, open_price, open_date = self.make_row( high, low, volume, count, open_price, open_date) if debug == True: print(dfrow) print(f"{tf - diff} minutes left") # Updating moving averages strategy.load_data(self.df.append(dfrow)) strategy.create_objects() # Checking only Rule 2, because it needs a minute by minute check. # Second condition is making sure that there is no existing position if strategy.rule_2_buy_enter( -1, sensitivity) and self.get_position( self.symbol) == 0: trade_info = executor.enter_market(self.symbol, "BUY", 2) elif strategy.rule_2_short_enter( -1, sensitivity) and self.get_position( self.symbol) == 0: trade_info = executor.enter_market(self.symbol, "SELL", 2) # Saves CPU usage, waits 5 seconds before the next minute time.sleep(50) count += 1 elif diff > tf: # Updating data using Binance API instead of appending the completed final row from the while loop self._update_data(math.floor(diff), "BTCUSDT") # Updating Moving averages strategy.load_data(self.df) strategy.create_objects() # Checks for the rest of the rules if strategy.rule_2_short_stop(-1) and self.get_position(self.symbol) < 0 and \ executor.live_trade_history.last_trade().rule == 2: trade_info = executor.exit_market( self.symbol, 2, self.get_position(self.symbol)) elif strategy.rule_2_buy_stop(-1) and self.get_position(self.symbol) > 0 and \ executor.live_trade_history.last_trade().rule == 2: trade_info = executor.exit_market( self.symbol, 2, self.get_position(self.symbol)) elif strategy.rule_1_buy_enter(-1) and self.get_position( self.symbol) == 0: trade_info = executor.enter_market(self.symbol, "BUY", 1) elif strategy.rule_1_buy_exit(-1) and self.get_position( self.symbol) > 0: trade_info = executor.exit_market( self.symbol, 1, self.get_position(self.symbol)) elif strategy.rule_1_short_enter(-1) and self.get_position( self.symbol) == 0: trade_info = executor.enter_market(self.symbol, "SELL", 1) elif strategy.rule_1_short_exit(-1) and self.get_position( self.symbol) < 0: trade_info = executor.exit_market( self.symbol, 1, self.get_position(self.symbol)) elif strategy.rule_3_buy_enter(-1) and self.get_position( self.symbol) == 0: trade_info = executor.enter_market(self.symbol, "BUY", 3) elif strategy.rule_3_short_enter(-1) and self.get_position( self.symbol) == 0: trade_info = executor.enter_market(self.symbol, "SELL", 3) # Resetting candlesticks high, low, volume = [], [], [] count = 0 time.sleep(1) if debug == True: print(diff) print("next") self.check_status() return "Trading Stopped"
class BinanceManager: def __init__(self, api_key=None, secret_key=None): # Binance API Client self.client = Client(api_key, secret_key) # Binance API websocket self.socket = BinanceSocketManager(self.client) # Panda dataFrame self.df = None # Trading asset self.symbol = None # Interval self.interval = None # Logger self.logger = Logger() # File to save data self.filename = None def start(self, symbol, interval, startTime=None, filename="data.csv"): self.logger.success("Manager start") self.symbol = symbol self.interval = interval self.filename = filename self.get_historical_klines(self.symbol, interval, startTime) self.logger.success("Socket start") self.socket.start() self.logger.success("Socket kline start") self.socket.start_kline_socket(self.symbol, self.klines_callback, interval=self.interval) return self def get_historical_klines(self, symbol="btcusdt", interval="1h", startTime=None, endTime=None, limit=500): self.logger.info("Get historical klines") # fetch klines data = self.client.futures_klines(symbol=symbol, interval=interval, startTime=startTime, endTime=endTime, limit=limit) # Keep only the first 6 columns data = np.array(data)[:, 0:6] # Create a DataFrame with annotated columns df = pd.DataFrame( data, columns=["Date", "Open", "High", "Low", "Close", "Volume"]) # Convert Date from ms to Datetime df["Date"] = pd.to_datetime(df["Date"], unit="ms") # Convert columns to numeric values df["Open"] = pd.to_numeric(df["Open"], errors="coerce") df["High"] = pd.to_numeric(df["High"], errors="coerce") df["Low"] = pd.to_numeric(df["Low"], errors="coerce") df["Close"] = pd.to_numeric(df["Close"], errors="coerce") df["Volume"] = pd.to_numeric(df["Volume"], errors="coerce") # Set Date column as index df.set_index("Date", inplace=True) self.df = df self.write() return df def klines_callback(self, msg): self.logger.info("Websocket: Updating last kline") if msg['e'] == 'error': print("[!] ERROR: Klines websocket does not work.") return self # [Kline start time, Open price, High price, Low price, Close price, Base asset volume kline = np.array([ msg['k']['t'], msg['k']['o'], msg['k']['h'], msg['k']['l'], msg['k']['c'], msg['k']['v'] ]) # Create a Tableau with annotated columns df = pd.DataFrame( [kline], columns=["Date", "Open", "High", "Low", "Close", "Volume"]) # Convert Date from ms to Datetime df["Date"] = pd.to_datetime(df["Date"], unit="ms") # Convert columns to numeric values df["Open"] = pd.to_numeric(df["Open"], errors="coerce") df["High"] = pd.to_numeric(df["High"], errors="coerce") df["Low"] = pd.to_numeric(df["Low"], errors="coerce") df["Close"] = pd.to_numeric(df["Close"], errors="coerce") df["Volume"] = pd.to_numeric(df["Volume"], errors="coerce") df.set_index("Date", inplace=True) # If is not a new kline if self.df.last_valid_index() == df.last_valid_index(): # Update last kline value self.df.drop(self.df.last_valid_index(), inplace=True) # Append df to main df self.df = self.df.append(df, ignore_index=False) # Update data self.write() return self def write(self): if self.filename is not None: lock = FileLock(self.filename + '.lock') with lock: self.df.to_csv(self.filename) def stop(self): self.logger.success("Manager stop") self.socket.close() return self
class _DataLoader: """ Private class Responsible for all ETL related tasks. Loads data from csv, fetches data from Binance API. Attributes ----------- client: Client object which is the Binance API Python wrapper Methods ------------ _load_csv _get_range _get_binance_futures_candles _timeframe_setter Please look at each method for descriptions """ def __init__(self): self.client = Client() def _load_csv(self, csv_url: str) -> pd.DataFrame: """Function used to load 1-minute historical candlestick data with a given csv url The important columns are the ones that create the candlestick (open, high, low, close) """ # Reading CSV File containing 1 min candlestick data data = pd.read_csv(csv_url, index_col='Timestamp') # Converting Timestamp numbers into a new column of readable dates data['Datetime'] = [datetime.fromtimestamp(i) for i in data.index] data[["Open", "High", "Low", "Close", "Volume"]] = data[["Open", "High", "Low", "Close", "Volume"]].astype(float) data = data[['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume']] return data def _get_binance_futures_candles(self, symbol: str, start_minutes_ago: int, end_minutes_ago: int = 0, now: float = None) -> pd.DataFrame: """ Provides a method for getting a set of candlestick data without inputting start and end date. Ex. _get_binance_futures_candles("BTCUSDT", 5, 3) = get candlestick data from 5 minutes ago to 3 minutes ago. Parameters: ----------- symbol: str Ex. "BTCUSDT", "ETHUSDT" start_minutes_ago: int Ex. 1, 5, 1000 end_minutes_ago: int Ex. 1, 5, 1000 :return pd.DataFrame of candlestick data. """ if now == None: now = time.time() seconds_in_a_minute = 60 # Multiplies second timestamp to turn into millisecond timestamp (which binance uses) timestamp_adjust = 1000 # Defining params to put in exchange API call startTime = (int(now) - seconds_in_a_minute * (start_minutes_ago) - 1) * timestamp_adjust # Ex. 1609549634 -> in seconds endTime = int(now - seconds_in_a_minute * end_minutes_ago) * timestamp_adjust limit = abs(start_minutes_ago - end_minutes_ago) data = self.client.futures_klines(symbol=symbol, interval="1m", startTime=startTime, endTime=endTime, limit=limit) return Helper.into_dataframe(data) def _get_range(self, dataframe: pd.DataFrame, start_date: str = None, end_date: str = None) -> pd.DataFrame: """Returns the range of 1-min data within specified start & end date from the entire dataset Parameters ---------- dataframe: pd.DataFrame object with a Timestamp as its index start_date: date in the format of YYYY-MM-DD format end_date: date in the format of YYYY-MM-DD format :return dataframe """ if start_date == None or end_date == None: raise Exception("No Start date given") start_date = int( time.mktime(datetime.strptime(start_date, "%Y-%m-%d").timetuple())) end_date = int( time.mktime(datetime.strptime(end_date, "%Y-%m-%d").timetuple())) return dataframe.loc[start_date:end_date] def _timeframe_setter(self, dataframe: pd.DataFrame, tf: int, shift: int = None) -> pd.DataFrame: """ Vertical way of abstracting data Converts minute candlestick data into the timeframe(tf) of choice. Parameters ----------- dataframe: the dataframe that is being passed as an argument tf: The combination of 1-min candles into one value. Number of 1-min candles combined is the timeframe value itself. The raw data is in a 1-min timeframe. Dataframe contains the following columns: ['Open', 'High', 'Low, 'Close']. Converting to a X minute timeframe is handled differently for every column of the candlestick: Close - Since all that matters is the close value every 'tf' minutes, you can skip every 'tf' minutes. Ex. df['Close'] = pd.Series([4.50, 4.60, 4.65, 4.44, 4.21, 4.54, 4.10]) _timeframe_setter(df['Close'], 2) -> [4.50, 4.65, 4.21, 4.10] _timeframe_setter(df['Close'], 3) -> [[4.50, 4.44, 4.10] Open - Same rules as Close High - Get the maximum 1-min high value given the range of the timeframe Ex. df['Close'] = pd.Series([4.50, 4.60, 4.65, 4.44, 4.21, 4.54, 4.10]) _timeframe_setter(df['High'], 2) -> [4.60, 4.65, 4.44, 4.54] _timeframe_setter(df['High'], 3) -> [4.65, 4.54] Low - Same rules as 'High', but instead the minimum of that range Volume - Same rules as "High", but instead the sum of that range If the range of tf is not even (such as having a tf=2 but only 5 elements), then the last value will be dropped :return dataframe """ if shift == None: # This is making sure that there it shifts so that the last tf candle includes the last 1-minute candle shift = tf - len(dataframe) % tf - 1 dataframe[["Open", "High", "Low", "Close", "Volume" ]] = dataframe[["Open", "High", "Low", "Close", "Volume"]].astype(float) # Creating a new dataframe so that the size of the rows of the new dataframe will be the same as the new columns df = dataframe.iloc[shift::tf].copy() # Iterating through candle data, and abstracting based on the highest, lowest and sum respectively. df['High'] = [ max(dataframe['High'][i:tf + i]) for i in range(shift, len(dataframe['High']), tf) ] df['Low'] = [ min(dataframe['Low'][i:tf + i]) for i in range(shift, len(dataframe['Low']), tf) ] df['Volume'] = [ sum(dataframe['Volume'][i:tf + i]) for i in range(shift, len(dataframe['Volume']), tf) ] # Selecting every nth value in the list, where n is the timeframe df['Close'] = [ dataframe['Close'].iloc[i:tf + i].iloc[-1] for i in range(shift, len(dataframe['Close']), tf) ] # Dropping the last value, this gets rid of the candle that isn't complete until the end of the tf df.drop(df.tail(1).index, inplace=True) return df def _timeframe_setter_v2(self, df_raw: pd.DataFrame, tf: int, shift: int = None) -> pd.DataFrame: """ WORK IN PROGRESS - Horizontal way of abstracting the data This way of abstracting data actually takes longer and more time, however it allows for complex cases in which not all data needs to have the same timeframe. """ if shift == None: # This is making sure that there it shifts so that the last tf candle includes the last 1-minute candle shift = tf - len(df_raw) % tf - 1 tf = 77 count = 0 low, high = shift, shift + tf df2 = df_raw.copy().head(0) hi_df = df_raw.loc[:, "High"] lo_df = df_raw.loc[:, "Low"] while count < 1000: df2 = df2.append( { "Datetime": df_raw.iloc[0, 0], "Open": df_raw.iloc[0, 1], "High": max(hi_df[low:high]), "Low": min(lo_df[low:high]), "Close": df_raw.iloc[-1, 4] }, ignore_index=True) low += 77 high += 77 count += 1 return df2
class _DataLoader: """ Private class Responsible for all ETL related tasks. Loads data from csv, fetches data from Binance API. Attributes ----------- binance: Client object which is the Binance API Python wrapper Methods ------------ _load_csv _get_range _get_binance_futures_candles _timeframe_setter Please look at each method for descriptions """ SECOND_TO_MILLISECOND = 1000 def __init__(self, db=False, qtrade=False, ib=False): self.sql = SqlMapper() self.ib = IB() self.binance = Client() if qtrade: self.qtrade = Questrade( token_yaml= 'C:/Users/haseab/Desktop/Python/PycharmProjects/FAB/local/Workers/access_token.yml', save_yaml=True) print('Connected to Questrade API') if db: self.conn = self.sql.connect_psql() if ib: print(self.ib.connect('127.0.0.1', 7496, 104)) def load_db_data(self, symbol, conns, chunks=3): results = [] resp_df = self.sql.SELECT( f"explain select * from candlesticks where symbol = '{symbol}'", show_select=False, cursor=conns[0].cursor()) table_row_count = int( resp_df.loc[0, 'QUERY PLAN'].split(' ')[-2].strip("rows=")) limit = table_row_count // chunks with ThreadPoolExecutor(max_workers=3) as executor: for chunk, conn in zip(range(0, table_row_count, limit), conns): print('start') cursor = conn.cursor() results.append( executor.submit( self.sql.SELECT, f"* FROM candlesticks WHERE SYMBOL = '{symbol}' AND TF = '1' LIMIT {limit} OFFSET {chunk}", cursor)) print('done') executor.shutdown(wait=True) return results def _randomly_delete_rows(self, df, percentage_of_data=0.10): index_list = [] for _ in range(len(df) // (1 / percentage_of_data)): index = random.choice(df.index) if index not in index_list: index_list.append(index) return df.drop(index_list) def _clean_1m_data(self, df): start_date = df['timestamp'].iloc[0] end_date = df['timestamp'].iloc[-1] full_timestamps = pd.DataFrame( [time for time in range(start_date, end_date + 60, 60)], columns=['timestamp']) full_df = full_timestamps.merge(df.reset_index(), on='timestamp', how='left') full_df['volume'] = full_df['volume'].fillna(0.001) filled_df = full_df.fillna(method='ffill') filled_df['date'] = [ datetime.fromtimestamp(timestamp) for timestamp in filled_df['timestamp'].values ] return filled_df def _load_csv_v2(self, csv_url): tf = csv_url.split(' ')[2][:-1] symbol = csv_url.split(' ')[1] data = pd.read_csv(csv_url) data['timestamp'] = [ int(timestamp / 1000) for timestamp in data['timestamp'] ] data['date'] = [ datetime.fromtimestamp(timestamp) for timestamp in data['timestamp'].values ] data['tf'] = [tf] * len(data) data['symbol'] = [symbol] * len(data) data[["open", "high", "low", "close", "volume"]] = data[["open", "high", "low", "close", "volume"]].astype(float) data = data[[ 'symbol', 'tf', 'timestamp', 'date', 'open', 'high', 'low', 'close', 'volume' ]] return data.set_index(["symbol", "tf", "timestamp"]) def _load_csv(self, csv_url: str) -> pd.DataFrame: """Function used to load 1-minute historical candlestick data with a given csv url The important columns are the ones that create the candlestick (open, high, low, close) """ # Reading CSV File containing 1 min candlestick data data = pd.read_csv(csv_url, index_col='timestamp') # Converting Timestamp numbers into a new column of readable dates data['date'] = [ datetime.fromtimestamp(timestamp) for timestamp in data.index ] data[["open", "high", "low", "close", "volume"]] = data[["open", "high", "low", "close", "volume"]].astype(float) data = data[['date', 'open', 'high', 'low', 'close', 'volume']] return data def _get_binance_futures_candles(self, symbol: str, tf: int, start_candles_ago: int, end_candles_ago: int = 0, now: float = None) -> pd.DataFrame: """ Provides a method for getting a set of candlestick data without inputting start and end date. Ex. _get_binance_futures_candles("BTCUSDT", 5, 3) = get candlestick data from 5 minutes ago to 3 minutes ago. Parameters: ----------- symbol: str Ex. "BTCUSDT", "ETHUSDT" start_minutes_ago: int Ex. 1, 5, 1000 end_minutes_ago: int Ex. 1, 5, 1000 :return pd.DataFrame of candlestick data. """ if now == None: now = time.time() # Defining params to put in exchange API call map_tf = { 1: "1m", 3: "3m", 5: "5m", 15: "15m", 30: "30m", 60: "1h", 120: "2h", 240: "4h", 360: "6h", 480: "8h" } start_minutes_ago = start_candles_ago * tf end_minutes_ago = end_candles_ago * tf start_time = Helper().minutes_ago_to_timestamp( start_minutes_ago, now, adjust=self.SECOND_TO_MILLISECOND) end_time = Helper().minutes_ago_to_timestamp( end_minutes_ago, now, adjust=self.SECOND_TO_MILLISECOND) num_candles = abs(start_candles_ago - end_candles_ago) data = self.binance.futures_klines(symbol=symbol, interval=map_tf[tf], startTime=start_time, endTime=end_time, limit=num_candles) return Helper.into_dataframe(data, symbol=symbol, tf=tf, index=False) def load_finviz_data(): import pandas as pd from finviz.screener import Screener filters_nyse = ['exch_nyse' ] # Shows companies in NASDAQ which are in the S&P500 stock_list_nyse = Screener( filters=filters_nyse, table='Ownership', order='Market Cap' ) # Get the performance table and sort it by price ascending filters_nasdaq = ['exch_nasd'] stock_list_nasd = Screener( filters=filters_nasdaq, table='Ownership', order='Market Cap' ) # Get the performance table and sort it by price ascending nasdaq_df = pd.DataFrame(stock_list_nasd.data).drop('No.', axis=1) nyse_df = pd.DataFrame(stock_list_nyse.data).drop('No.', axis=1) df = nyse_df.append(nasdaq_df).reset_index(drop=True) df.to_csv('finviz_stocks.csv', index=False) return df def _get_ibkr_stocks_candles(self, symbol: str, tf: int, start_time, end_time): tf_map = { 1: "1 min", 5: "5 mins", 15: "15 mins", 30: "30 mins", 60: "1 hour", 240: "4 hours", 1440: "1 day" } parsed_start, parsed_end = dateparser.parse( start_time), dateparser.parse(end_time) duration = (parsed_end - parsed_start).days + 1 bars = self.ib.reqHistoricalData(Stock(str(symbol), 'SMART', 'USD'), endDateTime=parsed_end, durationStr=f'{duration} D', barSizeSetting=tf_map[tf], whatToShow='TRADES', useRTH=False, formatDate=1) return Helper.into_dataframe(bars, symbol=symbol, tf=tf) def _get_range(self, dataframe: pd.DataFrame, start_date: str = None, end_date: str = None) -> pd.DataFrame: """Returns the range of 1-min data within specified start & end date from the entire dataset Parameters ---------- dataframe: pd.DataFrame object with a Timestamp as its index start_date: date in the format of YYYY-MM-DD format end_date: date in the format of YYYY-MM-DD format :return dataframe """ if start_date == None or end_date == None: raise Exception("No Start date given") start_date = Helper.string_to_timestamp(start_date) end_date = Helper.string_to_timestamp(end_date) # Converting from timestamp index to numbered index, then adding numbered index as column dataframe_temp = dataframe.reset_index().reset_index().set_index( 'timestamp') start_index = dataframe_temp.loc[start_date, 'index'] try: end_index = dataframe_temp.loc[end_date, 'index'] except KeyError: end_index = dataframe_temp['index'].iloc[-1] return dataframe[start_index:end_index + 1] def _timeframe_setter(self, dataframe: pd.DataFrame, skip: int, shift: int = 0, keep_last_row=False) -> pd.DataFrame: """ Vertical way of abstracting data Converts minute candlestick data into the timeframe(tf) of choice. Parameters ----------- dataframe: the dataframe that is being passed as an argument tf: The combination of 1-min candles into one value. Number of 1-min candles combined is the timeframe value itself. The raw data is in a 1-min timeframe. Dataframe contains the following columns: ['open', 'high', 'Low, 'close']. Converting to a X minute timeframe is handled differently for every column of the candlestick: Close - Since all that matters is the close value every 'tf' minutes, you can skip every 'tf' minutes. Ex. df['close'] = pd.Series([4.50, 4.60, 4.65, 4.44, 4.21, 4.54, 4.10]) _timeframe_setter(df['close'], 2) -> [4.50, 4.65, 4.21, 4.10] _timeframe_setter(df['close'], 3) -> [[4.50, 4.44, 4.10] Open - Same rules as Close High - Get the maximum 1-min high value given the range of the timeframe Ex. df['close'] = pd.Series([4.50, 4.60, 4.65, 4.44, 4.21, 4.54, 4.10]) _timeframe_setter(df['high'], 2) -> [4.60, 4.65, 4.44, 4.54] _timeframe_setter(df['high'], 3) -> [4.65, 4.54] Low - Same rules as 'high', but instead the minimum of that range Volume - Same rules as "High", but instead the sum of that range If the range of tf is not even (such as having a tf=2 but only 5 elements), then the last value will be dropped :return dataframe """ if skip == 1: return dataframe base_tf = int(dataframe['tf'].iloc[0]) if shift == None: # This is making sure that there it shifts so that the last tf candle includes the last 1-minute candle shift = skip - len(dataframe) % skip - 1 dataframe[["open", "high", "low", "close", "volume" ]] = dataframe[["open", "high", "low", "close", "volume"]].astype(float) # Creating a new dataframe so that the size of the rows of the new dataframe will be the same as the new columns df = dataframe.iloc[shift::skip].copy() rolled_df = dataframe.rolling(skip) high = rolled_df['high'].max() low = rolled_df['low'].min() volume = rolled_df['volume'].sum() close = dataframe.copy()['close'] # Abstracting based on the highest, lowest and sum respectively. df['high'] = np.append(high.iloc[shift + skip::skip].values, high.iloc[-1]) df['low'] = np.append(low.iloc[shift + skip::skip].values, low.iloc[-1]) df['volume'] = np.append(volume.iloc[shift + skip::skip].values, volume.iloc[-1]) # Selecting every nth value in the list, where n is the timeframe try: df['close'] = close.iloc[shift + skip - 1::skip].values except ValueError as e: df['close'] = np.append(close.iloc[shift + skip - 1::skip].values, close.iloc[-1]) tf = base_tf * skip df['tf'] = [tf] * len(df['volume']) # Dropping the last value, this gets rid of the candle that isn't complete until the end of the tf if not keep_last_row: df.drop(df.tail(1).index, inplace=True) return df.reset_index().set_index(['symbol', 'tf', 'timestamp']) def _get_fast_questrade_data(self, symbol, start_datetime, end_datetime, tf_str, tf): data = self.qtrade.get_historical_data(symbol, start_datetime, end_datetime, tf_str) return Helper.into_dataframe(data, symbol=symbol, tf=tf, qtrade=True) def _get_fast_ibkr_data(self, symbol, duration, end_datetime, tf_str, tf): data = self.ib.reqHistoricalData(Stock(str(symbol), 'SMART', 'USD'), endDateTime=end_datetime, durationStr=f'{duration} D', barSizeSetting=tf_str, whatToShow='TRADES', useRTH=False, formatDate=1) return Helper.into_dataframe(data, symbol=symbol, tf=tf) ################################################# ASYNC FUNCTIONS ############################################################ async def _async_get_fast_questrade_data(self, symbol, start_datetime, end_datetime, tf_str, tf): data = self.qtrade.get_historical_data(symbol, start_datetime, end_datetime, tf_str) return Helper.into_dataframe(data, symbol=symbol, tf=tf, qtrade=True) async def _async_get_fast_ibkr_data(self, symbol, duration, end_datetime, tf_str, tf): data = await self.ib.reqHistoricalDataAsync( Stock(str(symbol), 'SMART', 'USD'), endDateTime=end_datetime, durationStr=f'{duration} D', barSizeSetting=tf_str, whatToShow='TRADES', useRTH=False, formatDate=1) print(symbol, tf) # return data return Helper.into_dataframe(data, symbol=symbol, tf=tf) ################################################################################################################################## def get_ibkr_stock_candles(self, symbol, tf, start_time, end_time): tf_map = { 1: "1 min", 5: "5 mins", 15: "15 mins", 30: "30 mins", 60: "1 hour", 240: "4 hours", 1440: "1 day" } start_datetime, end_datetime = dateparser.parse( start_time), dateparser.parse(end_time) duration = (end_datetime - start_datetime).days + 1 data = self.ib.reqHistoricalData(Stock(str(symbol), 'SMART', 'USD'), endDateTime=end_datetime, durationStr=f'{duration} D', barSizeSetting=tf_map[tf], whatToShow='TRADES', useRTH=False, formatDate=1) return util.df(data) def get_questrade_stock_candles(self, symbol: str, tf: int, start_time, end_time): tf_map = { 1: "OneMinute", 5: "FiveMinutes", 15: "FifteenMinutes", 30: "HalfHour", 60: "OneHour", 240: "FourHours", 1440: "OneDay" } parsed_start, parsed_end = dateparser.parse( start_time), dateparser.parse(end_time) parsed_start, parsed_end = parsed_start.strftime( '%Y-%m-%d %H:%M:%S.%f'), parsed_end.strftime( '%Y-%m-%d %H:%M:%S.%f') print('finished converting the times', parsed_start, parsed_end) data = self.qtrade.get_historical_data(symbol, parsed_start, parsed_end, tf_map[tf]) print('got data', len(data)) return Helper.into_dataframe(data, symbol=symbol, tf=tf, qtrade=True) def get_binance_candles(self, symbol, tf, start_date, end_date=None): map_tf = { 1: "1m", 3: "3m", 5: "5m", 15: "15m", 30: "30m", 60: "1h", 120: "2h", 240: "4h", 360: "6h", 480: "8h" } lst = self.binance.get_historical_klines(symbol=symbol, interval=map_tf[tf], start_str=start_date) return Helper.into_dataframe(lst, symbol=symbol, tf=tf, index=False) def get_all_binance_data(self, symbol, tf, start_date, end_date=None): map_tf = { 1: "1m", 3: "3m", 5: "5m", 15: "15m", 30: "30m", 60: "1h", 120: "2h", 240: "4h", 360: "6h", 480: "8h" } list_symbol = self.binance.get_historical_klines(symbol=symbol, interval=map_tf[tf], start_str=start_date) df_symbol = pd.DataFrame(list_symbol) df_symbol.columns = [ "timestamp", "open", "high", "low", "close", "volume", "timestamp_end", "", "", "", "", "" ] ##Fixing Columns df_symbol['timestamp'] = [ int(timestamp / 1000) for timestamp in df_symbol['timestamp'] ] df_symbol['date'] = [ datetime.fromtimestamp(timestamp) for timestamp in df_symbol['timestamp'].values ] df_symbol['tf'] = [tf[:-1]] * len(df_symbol) df_symbol['symbol'] = [symbol] * len(df_symbol) df_symbol[["open", "high", "low", "close", "volume" ]] = df_symbol[["open", "high", "low", "close", "volume"]].astype(float) df_symbol = df_symbol[[ 'symbol', 'tf', 'timestamp', 'date', 'open', 'high', 'low', 'close', 'volume' ]] df_symbol = df_symbol.set_index(["symbol", "tf", "timestamp"]) start_date = str(df_symbol.iloc[0, 0])[:10] string = f"Binance {symbol} {tf}m data from {start_date} to {str(datetime.now())[:10]}.csv" print(string) # df_symbol.to_csv(string) return df_symbol