def print_(args, r=False): line = ts_to_date(time())[:19] + ' ' str_args = '{} ' * len(args) line += str_args.format(*args) if r: sys.stdout.write('\r' + line + ' ') else: print(line) sys.stdout.flush() return line
def write_cache(self, filepath: str, items: [dict], condition: Callable = lambda x: True): written_items = [] for d in remove_duplicates(items, key='id', sort=True): if condition(d): month = ts_to_date(d['timestamp'] / 1000)[:7] with open(f'{filepath}{month}.txt', 'a') as f: f.write(json.dumps(d) + '\n') written_items.append(d) return written_items
def iterate_ohlcvs(tss_covered: set): max_n_tries = 10 ohlcvs = format_ohlcvs(request_klines()) prev_ohlcvs = [{}] while True: yield ohlcvs from_ts = ohlcvs[0]['timestamp'] while from_ts in tss_covered: # sys.stdout.write('\rskipping trades {} '.format(from_id_)) from_ts -= timeframe_millis from_ts -= (len(ohlcvs) - 1) * timeframe_millis ohlcvs = format_ohlcvs(request_klines(from_ts)) k = 0 while ohlcvs[0] == prev_ohlcvs[0]: print('gaps', ts_to_date(ohlcvs[0]['timestamp'] / 1000), ts_to_date(from_ts / 1000)) from_ts -= (len(ohlcvs) - 1) * timeframe_millis ohlcvs = format_ohlcvs(request_klines(from_ts)) k += 1 if k >= max_n_tries: yield None break prev_ohlcvs = ohlcvs
def init_my_trades(self, symbol: str) -> [dict]: no_dash = symbol.replace('/', '_') cache_filepath = make_get_filepath( f'cache/binance/{self.user}/my_trades/{no_dash}/') cached_history = [] start_month = ts_to_date(self.cc.milliseconds() / 1000 - 60 * 60 * 24 * 90)[:7] filenames = [ fp for fp in os.listdir(cache_filepath) if fp.endswith('txt') and fp >= start_month ] for filename in sorted(filenames): with open(cache_filepath + filename) as f: cached_history += [json.loads(line) for line in f.readlines()] cached_history = remove_duplicates(cached_history, key='id', sort=True) if cached_history == []: most_recent_id = 0 limit = 1000 from_id = 0 else: most_recent_id = cached_history[-1]['id'] limit = 100 from_id = cached_history[-1]['id'] + 1 fetched_history = [] prev_my_trades = [] while True: my_trades = self.fetch_margin_my_trades(symbol, limit, from_id) fetched_history += my_trades if len(my_trades) < limit or my_trades == prev_my_trades: break prev_my_trades = my_trades limit = 1000 from_id = my_trades[-1]['id'] + 1 sleep(1) written_history = self.write_cache(cache_filepath, fetched_history) my_trades = remove_duplicates(cached_history + written_history, key='id', sort=True) age_limit_millis = self.cc.milliseconds( ) - self.hyperparams['max_memory_span_millis'] my_trades = [e for e in my_trades if e['timestamp'] > age_limit_millis] self.my_trades[symbol], self.my_trades_analyses[symbol] = \ analyze_my_trades(my_trades) self.time_keepers['update_my_trades'][symbol] = time()
def format_mt(mt): formatted_mt = { 'timestamp': mt['time'], 'datetime': ts_to_date(mt['time'] / 1000), 'symbol': self.nodash_to_dash_map[mt['symbol']], 'id': int(mt['id']), 'order_id': int(mt['orderId']), 'type': None, 'is_maker': mt['isMaker'], 'side': 'buy' if mt['isBuyer'] else 'sell', 'price': float(mt['price']), 'amount': float(mt['qty']) } formatted_mt[ 'cost'] = formatted_mt['amount'] * formatted_mt['price'] formatted_mt['fee_cost'] = float(mt['commission']) formatted_mt['fee_currency'] = mt['commissionAsset'] return formatted_mt
def format_o(o): formatted_o = { 'id': int(o['orderId']), 'timestamp': o['updateTime'], 'datetime': ts_to_date(o['updateTime'] / 1000), 'symbol': self.nodash_to_dash_map[o['symbol']], 'type': o['type'].lower(), 'side': o['side'].lower(), 'price': float(o['price']), 'amount': float(o['origQty']), 'cost': 0.0, 'average': None, 'filled': float(o['executedQty']) } formatted_o[ 'remaining'] = formatted_o['amount'] - formatted_o['filled'] formatted_o[ 'status'] = 'open' if o['status'] == 'NEW' else o['status'] return formatted_o
def init_loan_history(self, coin: str, side: str): assert side in ['borrow', 'repay'] cache_filepath = make_get_filepath( f'cache/binance/{self.user}/{side}_history/{coin}/') cached_loan_history = [] start_month = ts_to_date(self.cc.milliseconds() / 1000 - 60 * 60 * 24 * 90)[:7] filenames = [ fname for fname in os.listdir(cache_filepath) if fname.endswith('txt') and fname >= start_month ] for filename in sorted(filenames): with open(cache_filepath + filename) as f: cached_loan_history += [ json.loads(line) for line in f.readlines() ] cached_loan_history = remove_duplicates(cached_loan_history, key='id', sort=True) if cached_loan_history: most_recent_id = cached_loan_history[-1]['id'] size = 10 else: most_recent_id = 0 size = 100 fetched_loan_history = [] current = 1 while True: loans = getattr(self, f'fetch_{side}_history')(coin, current, size) fetched_loan_history += loans if len(loans) < size or loans[0]['id'] <= most_recent_id: break current += 1 size = 100 fetched_loan_history = sorted(fetched_loan_history, key=lambda x: x['id']) condition = lambda x: x['id'] > most_recent_id written_loan_history = self.write_cache(cache_filepath, fetched_loan_history, condition) getattr(self, f'{side}_history')[coin] = remove_duplicates( cached_loan_history + written_loan_history, key='id', sort=True) self.time_keepers[f'update_{side}_history'][coin] = time()
def format_my_trades(my_trades_: [dict]) -> [dict]: formatted = [] for t in my_trades_: price = float(t['price']) amount = float(t['qty']) formatted.append( {'symbol': symbol, 'id': t['id'], 'order_id': t['orderId'], 'price': price, 'amount': amount, 'cost': amount * price, 'side': 'buy' if t['isBuyer'] else 'sell', 'timestamp': t['time'], 'datetime': ts_to_date(t['time'] / 1000), 'is_maker': t['isMaker'], 'fee_cost': float(t['commission']), 'fee_currency': t['commissionAsset']} ) return formatted
def fetch_my_trades(symbol: str, n_days: float = 30, no_download: bool = False, limit: int = 1000) -> pd.DataFrame: def request_my_trades(from_id: int = -1) -> dict: timestamp = int(time() * 1000) url = 'https://api.binance.com/api/v3/myTrades?' params = {'symbol': symbol.replace('/', ''), 'limit': limit, 'timestamp': timestamp} if from_id > 0: params['fromId'] = from_id query_string = urlencode(params) params['signature'] = hmac.new(secret.encode('utf-8'), query_string.encode('utf-8'), hashlib.sha256).hexdigest() headers = {'X-MBX-APIKEY': key} return json.loads(requests.get(url, headers=headers, params=params).text) def format_my_trades(my_trades_: [dict]) -> [dict]: formatted = [] for t in my_trades_: price = float(t['price']) amount = float(t['qty']) formatted.append( {'symbol': symbol, 'id': t['id'], 'order_id': t['orderId'], 'price': price, 'amount': amount, 'cost': amount * price, 'side': 'buy' if t['isBuyer'] else 'sell', 'timestamp': t['time'], 'datetime': ts_to_date(t['time'] / 1000), 'is_maker': t['isMaker'], 'fee_cost': float(t['commission']), 'fee_currency': t['commissionAsset']} ) return formatted def iterate_my_trades(ids_covered: set) -> Iterator[dict]: my_trades_ = format_my_trades(request_my_trades()) while True: yield my_trades_ from_id_ = my_trades_[0]['id'] while from_id_ in ids_covered: from_id_ -= 1 from_id_ -= limit new_my_trades_ = format_my_trades(request_my_trades(from_id_)) my_trades_ = new_my_trades_ def format_csv_loaded_my_trades(csv: pd.DataFrame) -> pd.DataFrame: if csv is None: return None return sort_and_drop_duplicates_by_index(csv[columns].set_index('id')) symbol_no_dash = symbol.replace('/', '_') key, secret = load_key_secret('binance') dirpath = make_get_filepath('historical_data/my_trades_margin/{}/'.format(symbol_no_dash)) columns = ['symbol', 'id', 'order_id', 'price', 'amount', 'cost', 'side', 'timestamp', 'datetime', 'is_maker', 'fee_cost', 'fee_currency'] cache_path = 'historical_data/my_trades_margin_cache/{}.csv'.format(symbol_no_dash) since = ts_to_day(time() - 60 * 60 * 24 * n_days - 24) if n_days > 0 else '0' my_trades_loaded = [format_csv_loaded_my_trades(pd.read_csv(dirpath + f)) for f in get_filenames(dirpath) if f > since] if not no_download: clear_cache(cache_path, dirpath, columns, True, format_csv_loaded_my_trades) begin_csv(cache_path, columns) if my_trades_loaded: ids_covered = set(pd.concat(my_trades_loaded).index) else: ids_covered = set() until_ts = (time() - 60 * 60 * 24 * n_days) * 1000 prev_id = 0 for my_trades in iterate_my_trades(ids_covered): if my_trades[0]['id'] == prev_id: break prev_id = my_trades[0]['id'] write_to_cache(my_trades, columns, cache_path) print('fetched my_trades for {} {}'.format( symbol, ts_to_date(my_trades[0]['timestamp'] / 1000))) if my_trades[0]['timestamp'] <= until_ts: break clear_cache(cache_path, dirpath, columns, True, format_csv_loaded_my_trades) my_trades_loaded = [format_csv_loaded_my_trades(pd.read_csv(dirpath + f)) for f in get_filenames(dirpath) if f > since] if len(my_trades_loaded) > 0: return sort_and_drop_duplicates_by_index(pd.concat(my_trades_loaded))
def fetch_raw_trades(symbol: str, n_days: float = 7, no_download: bool = False) -> pd.DataFrame: def request_historical_trades(from_id: int = 0) -> dict: url = 'https://api.binance.com/api/v3/aggTrades?symbol={}&limit=1000'.format( symbol.replace('/', '')) if from_id > 0: url += '&fromId=' + str(from_id) return json.loads(requests.get(url).text) def format_raw_trades(trades_: [dict]) -> [dict]: return [{'agg_trade_id': t['a'], 'price': float(t['p']), 'amount': float(t['q']), 'timestamp': t['T'], 'is_buyer_maker': t['m']} for t in trades_] def iterate_raw_trades(ids_covered: set): trades = format_raw_trades(request_historical_trades()) while True: yield trades from_id_ = trades[0]['agg_trade_id'] while from_id_ in ids_covered: # sys.stdout.write('\rskipping trades {} '.format(from_id_)) from_id_ -= 1 from_id_ -= (len(trades) - 1) from_id_ = max(0, from_id_) trades = format_raw_trades(request_historical_trades(from_id_)) def format_csv_loaded_raw_trades(csv: pd.DataFrame) -> pd.DataFrame: if csv is None: return None return sort_and_drop_duplicates_by_index(csv[columns].set_index('agg_trade_id')) symbol_no_dash = symbol.replace('/', '_') dirpath = make_get_filepath('historical_data/raw_trades/{}/'.format(symbol_no_dash)) columns = sorted(['agg_trade_id', 'price', 'amount', 'timestamp', 'is_buyer_maker']) cache_path = 'historical_data/raw_trades_cache/{}.csv'.format(symbol_no_dash) since = ts_to_day(time() - 60 * 60 * 24 * n_days - 24) if n_days > 0 else '0' if not no_download: clear_cache(cache_path, dirpath, columns, False, format_csv_loaded_raw_trades) begin_csv(cache_path, columns) raw_trades_loaded = [format_csv_loaded_raw_trades(pd.read_csv(dirpath + f)) for f in get_filenames(dirpath) if f > since] if raw_trades_loaded: raw_trades_df = sort_and_drop_duplicates_by_index(pd.concat(raw_trades_loaded)) ids_covered = set(raw_trades_df.index) else: ids_covered = set() until_ts = (time() - 60 * 60 * 24 * n_days) * 1000 rt_tss0 = set() for raw_trades in iterate_raw_trades(ids_covered): write_to_cache(raw_trades, columns, cache_path) print('fetched raw trades for {} {}'.format( symbol, ts_to_date(raw_trades[0]['timestamp'] / 1000))) if raw_trades[0]['timestamp'] <= until_ts or raw_trades[0]['timestamp'] in rt_tss0: break rt_tss0.add(raw_trades[0]['timestamp']) clear_cache(cache_path, dirpath, columns, False, format_csv_loaded_raw_trades) raw_trades_loaded = [format_csv_loaded_raw_trades(pd.read_csv(dirpath + f)) for f in get_filenames(dirpath) if f > since] if len(raw_trades_loaded) > 0: return sort_and_drop_duplicates_by_index(pd.concat(raw_trades_loaded))
def fetch_ohlcvs(symbol: str, n_days: float = 7, timeframe: str = '1m', no_download: bool = False) -> pd.DataFrame: ''' fetches ohlcv data from binance allowed timeframes are 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 12h, 1d, 1w, 1M ''' def request_klines(start_time: int = -1) -> dict: url = 'https://api.binance.com/api/v3/klines?symbol={}&limit=1000&interval={}'.format( symbol.replace('/', ''), timeframe, start_time) if start_time != -1: url += '&startTime={}'.format(start_time) return json.loads(requests.get(url).text) def format_ohlcvs(ohlcvs_: [dict]) -> [dict]: formatted = [{'timestamp': e[0], 'open': float(e[1]), 'high': float(e[2]), 'low': float(e[3]), 'close': float(e[4]), 'volume': float(e[5]), 'volume_base': float(e[7]), 'n_trades': e[8], 'volume_bought': float(e[9])} for e in ohlcvs_] for e in formatted: e['volume_sold'] = e['volume'] - e['volume_bought'] return formatted def iterate_ohlcvs(tss_covered: set): max_n_tries = 10 ohlcvs = format_ohlcvs(request_klines()) prev_ohlcvs = [{}] while True: yield ohlcvs from_ts = ohlcvs[0]['timestamp'] while from_ts in tss_covered: # sys.stdout.write('\rskipping trades {} '.format(from_id_)) from_ts -= timeframe_millis from_ts -= (len(ohlcvs) - 1) * timeframe_millis ohlcvs = format_ohlcvs(request_klines(from_ts)) k = 0 while ohlcvs[0] == prev_ohlcvs[0]: print('gaps', ts_to_date(ohlcvs[0]['timestamp'] / 1000), ts_to_date(from_ts / 1000)) from_ts -= (len(ohlcvs) - 1) * timeframe_millis ohlcvs = format_ohlcvs(request_klines(from_ts)) k += 1 if k >= max_n_tries: yield None break prev_ohlcvs = ohlcvs def format_csv_loaded_ohlcvs(csv: pd.DataFrame) -> pd.DataFrame: if csv is None: return None return sort_and_drop_duplicates_by_index(csv[columns].set_index('timestamp')) symbol_no_dash = symbol.replace('/', '_') timeframe_to_millis_map = {'1m': 60 * 1000, '3m': 3 * 60 * 1000, '5m': 5 * 60 * 1000, '15m': 15 * 60 * 1000, '30m': 30 * 60 * 1000, '1h': 60 * 60 * 1000, '2h': 2 * 60 * 60 * 1000, '4h': 4 * 60 * 60 * 1000, '6h': 6 * 60 * 60 * 1000, '12h': 12 * 60 * 60 * 1000, '1d': 24 * 60 * 60 * 1000, '1w': 7 * 24 * 60 * 60 * 1000, '1M': 30 * 24 * 60 * 60 * 1000 } timeframe_millis = timeframe_to_millis_map[timeframe] dirpath = make_get_filepath('historical_data/ohlcvs_{}/{}/'.format(timeframe, symbol_no_dash)) columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'volume_base', 'n_trades', 'volume_bought', 'volume_sold'] cache_path = 'historical_data/ohlcvs_cache/{}_{}.csv'.format(timeframe, symbol_no_dash) since = ts_to_day(time() - 60 * 60 * 24 * n_days - 24) if n_days > 0 else '0' if not no_download: clear_cache(cache_path, dirpath, columns, True, format_csv_loaded_ohlcvs) begin_csv(cache_path, columns) ohlcvs_loaded = [format_csv_loaded_ohlcvs(pd.read_csv(dirpath + f)) for f in get_filenames(dirpath) if f > since] if ohlcvs_loaded: ohlcvs_df = sort_and_drop_duplicates_by_index(pd.concat(ohlcvs_loaded)) tss_covered = set(ohlcvs_df.index) else: tss_covered = set() until_ts = (time() - 60 * 60 * 24 * n_days) * 1000 for ohlcvs in iterate_ohlcvs(tss_covered): if ohlcvs is None: print('end of ohlcvs') break write_to_cache(ohlcvs, columns, cache_path) print('fetched {} ohlcvs for {} {}'.format( timeframe, symbol, ts_to_date(ohlcvs[0]['timestamp'] / 1000))) if ohlcvs[0]['timestamp'] <= until_ts: break clear_cache(cache_path, dirpath, columns, True, format_csv_loaded_ohlcvs) ohlcvs_loaded = [format_csv_loaded_ohlcvs(pd.read_csv(dirpath + f)) for f in get_filenames(dirpath) if f > since] if len(ohlcvs_loaded) > 0: return sort_and_drop_duplicates_by_index(pd.concat(ohlcvs_loaded))