def make_tick_samples(config: dict, sec_span: int = 1): """ makes tick samples from agg_trades tick samples are [(qty, price, timestamp)] config must include parameters - exchange: str - symbol: str - spot: bool - start_date: str - end_date: str """ for key in ["exchange", "symbol", "spot", "start_date", "end_date"]: assert key in config start_ts = date_to_ts(config["start_date"]) end_ts = date_to_ts(config["end_date"]) ticks_filepath = os.path.join( "historical_data", config["exchange"], f"agg_trades_{'spot' if config['spot'] else 'futures'}", config["symbol"], "", ) if not os.path.exists(ticks_filepath): return ticks_filenames = sorted( [f for f in os.listdir(ticks_filepath) if f.endswith(".csv")]) ticks = np.empty((0, 3)) sts = time() for f in ticks_filenames: _, _, first_ts, last_ts = map(int, f.replace(".csv", "").split("_")) if first_ts > end_ts or last_ts < start_ts: continue print(f"\rloading chunk {ts_to_date(first_ts / 1000)}", end=" ") tdf = pd.read_csv(ticks_filepath + f) tdf = tdf[(tdf.timestamp >= start_ts) & (tdf.timestamp <= end_ts)] ticks = np.concatenate((ticks, tdf[["timestamp", "qty", "price"]].values)) del tdf samples = calc_samples(ticks[ticks[:, 0].argsort()], sec_span * 1000) print( f"took {time() - sts:.2f} seconds to load {len(ticks)} ticks, creating {len(samples)} samples" ) del ticks return samples
def make_tick_samples(config: dict, sec_span: int = 1): ''' makes tick samples from agg_trades tick samples are [(qty, price, timestamp)] config must include parameters - exchange: str - symbol: str - spot: bool - start_date: str - end_date: str ''' for key in ['exchange', 'symbol', 'spot', 'start_date', 'end_date']: assert key in config start_ts = date_to_ts(config['start_date']) end_ts = date_to_ts(config['end_date']) ticks_filepath = os.path.join( 'historical_data', config['exchange'], f"agg_trades_{'spot' if config['spot'] else 'futures'}", config['symbol'], '') if not os.path.exists(ticks_filepath): return ticks_filenames = sorted( [f for f in os.listdir(ticks_filepath) if f.endswith('.csv')]) ticks = np.empty((0, 3)) sts = time() for f in ticks_filenames: _, _, first_ts, last_ts = map(int, f.replace('.csv', '').split('_')) if first_ts > end_ts or last_ts < start_ts: continue print(f'\rloading chunk {ts_to_date(first_ts / 1000)}', end=' ') tdf = pd.read_csv(ticks_filepath + f) tdf = tdf[(tdf.timestamp >= start_ts) & (tdf.timestamp <= end_ts)] ticks = np.concatenate((ticks, tdf[['timestamp', 'qty', 'price']].values)) del tdf samples = calc_samples(ticks[ticks[:, 0].argsort()], sec_span * 1000) print( f'took {time() - sts:.2f} seconds to load {len(ticks)} ticks, creating {len(samples)} samples' ) del ticks return samples
async def init_emas(self) -> None: ohlcvs1m = await self.fetch_ohlcvs(interval="1m") max_span = max(list(self.ema_spans_long) + list(self.ema_spans_short)) for mins, interval in zip([5, 15, 30, 60, 60 * 4], ["5m", "15m", "30m", "1h", "4h"]): if max_span <= len(ohlcvs1m) * mins: break ohlcvs = await self.fetch_ohlcvs(interval=interval) ohlcvs = {ohlcv["timestamp"]: ohlcv for ohlcv in ohlcvs + ohlcvs1m} samples1s = calc_samples( numpyize([[ o["timestamp"], o["volume"], o["close"] ] for o in sorted(ohlcvs.values(), key=lambda x: x["timestamp"])])) spans1s_long = np.array(self.ema_spans_long) * 60 spans1s_short = np.array(self.ema_spans_short) * 60 self.emas_long = calc_emas_last(samples1s[:, 2], spans1s_long) self.emas_short = calc_emas_last(samples1s[:, 2], spans1s_short) self.alpha_long = 2 / (spans1s_long + 1) self.alpha__long = 1 - self.alpha_long self.alpha_short = 2 / (spans1s_short + 1) self.alpha__short = 1 - self.alpha_short self.ema_sec = int(time())
async def prepare_files(self): """ Takes downloaded data and prepares a numpy array for use in backtesting. @return: """ filenames = [ f for f in self.get_filenames() if int(f.split("_")[3].split(".")[0]) >= self.start_time and int(f.split("_")[2]) <= self.end_time ] left_overs = pd.DataFrame() sample_size_ms = 1000 current_index = 0 try: first_frame = pd.read_csv( os.path.join(self.filepath, filenames[0]), dtype={ "price": np.float64, "is_buyer_maker": np.float64, "timestamp": np.float64, "qty": np.float64, }, usecols=["price", "is_buyer_maker", "timestamp", "qty"], ) first_frame = first_frame[ (first_frame["timestamp"] >= self.start_time) & (first_frame["timestamp"] <= self.end_time)] earliest_time = first_frame.timestamp.iloc[ 0] // sample_size_ms * sample_size_ms except Exception as e: print_(["Error in determining earliest time", e]) earliest_time = self.start_time try: last_frame = pd.read_csv( os.path.join(self.filepath, filenames[-1]), dtype={ "price": np.float64, "is_buyer_maker": np.float64, "timestamp": np.float64, "qty": np.float64, }, usecols=["price", "is_buyer_maker", "timestamp", "qty"], ) last_frame = last_frame[ (last_frame["timestamp"] >= self.start_time) & (last_frame["timestamp"] <= self.end_time)] latest_time = last_frame.timestamp.iloc[ -1] // sample_size_ms * sample_size_ms except Exception as e: print_(["Error in determining latest time", e]) latest_time = self.end_time array = np.zeros( (int((latest_time - earliest_time) / sample_size_ms + 1), 3), dtype=np.float64, ) for f in filenames: chunk = pd.read_csv( os.path.join(self.filepath, f), dtype={ "price": np.float64, "is_buyer_maker": np.float64, "timestamp": np.float64, "qty": np.float64, }, usecols=["price", "is_buyer_maker", "timestamp", "qty"], ) chunk = pd.concat([left_overs, chunk]) chunk.sort_values("timestamp", inplace=True) chunk = chunk[(chunk["timestamp"] >= self.start_time) & (chunk["timestamp"] <= self.end_time)] cut_off = ( chunk.timestamp.iloc[-1] // sample_size_ms * sample_size_ms - 1 - (1 * sample_size_ms)) left_overs = chunk[chunk["timestamp"] > cut_off] chunk = chunk[chunk["timestamp"] <= cut_off] sampled_ticks = calc_samples(chunk[["timestamp", "qty", "price"]].values) if current_index != 0 and array[current_index - 1, 0] + 1000 != sampled_ticks[0, 0]: size = int( (sampled_ticks[0, 0] - array[current_index - 1, 0]) / sample_size_ms) - 1 tmp = np.zeros((size, 3), dtype=np.float64) tmp[:, 0] = np.arange( array[current_index - 1, 0] + sample_size_ms, sampled_ticks[0, 0], sample_size_ms, dtype=np.float64, ) tmp[:, 2] = array[current_index - 1, 2] array[current_index:current_index + len(tmp)] = tmp current_index += len(tmp) array[current_index:current_index + len(sampled_ticks)] = sampled_ticks current_index += len(sampled_ticks) print( "\rloaded chunk of data", f, ts_to_date(float(f.split("_")[2]) / 1000), end=" ", ) print("\n") # Fill in anything left over if not left_overs.empty: sampled_ticks = calc_samples( left_overs[["timestamp", "qty", "price"]].values) if current_index != 0 and array[current_index - 1, 0] + 1000 != sampled_ticks[0, 0]: size = int( (sampled_ticks[0, 0] - array[current_index - 1, 0]) / sample_size_ms) - 1 tmp = np.zeros((size, 3), dtype=np.float64) tmp[:, 0] = np.arange( array[current_index - 1, 0] + sample_size_ms, sampled_ticks[0, 0], sample_size_ms, dtype=np.float64, ) tmp[:, 2] = array[current_index - 1, 2] array[current_index:current_index + len(tmp)] = tmp current_index += len(tmp) array[current_index:current_index + len(sampled_ticks)] = sampled_ticks current_index += len(sampled_ticks) # Fill the gap at the end with the latest price # Should not be necessary anymore if current_index + 1 < len(array): size = len(array) - current_index tmp = np.zeros((size, 3), dtype=np.float64) tmp[:, 0] = np.arange( array[current_index - 1, 0] + sample_size_ms, array[current_index - 1, 0] + ((size + 1) * sample_size_ms), sample_size_ms, dtype=np.float64, ) tmp[:, 2] = array[current_index - 1, 2] array[current_index:current_index + len(tmp)] = tmp current_index += len(tmp) print_([ "Saving single file with", len(array), " ticks to", self.tick_filepath, "...", ]) np.save(self.tick_filepath, array) print_(["Saved single file!"])