Ejemplo n.º 1
0
def make_tick_samples(config: dict, sec_span: int = 1):
    """
    makes tick samples from agg_trades
    tick samples are [(qty, price, timestamp)]
    config must include parameters
    - exchange: str
    - symbol: str
    - spot: bool
    - start_date: str
    - end_date: str
    """
    for key in ["exchange", "symbol", "spot", "start_date", "end_date"]:
        assert key in config
    start_ts = date_to_ts(config["start_date"])
    end_ts = date_to_ts(config["end_date"])
    ticks_filepath = os.path.join(
        "historical_data",
        config["exchange"],
        f"agg_trades_{'spot' if config['spot'] else 'futures'}",
        config["symbol"],
        "",
    )
    if not os.path.exists(ticks_filepath):
        return
    ticks_filenames = sorted(
        [f for f in os.listdir(ticks_filepath) if f.endswith(".csv")])
    ticks = np.empty((0, 3))
    sts = time()
    for f in ticks_filenames:
        _, _, first_ts, last_ts = map(int, f.replace(".csv", "").split("_"))
        if first_ts > end_ts or last_ts < start_ts:
            continue
        print(f"\rloading chunk {ts_to_date(first_ts / 1000)}", end="  ")
        tdf = pd.read_csv(ticks_filepath + f)
        tdf = tdf[(tdf.timestamp >= start_ts) & (tdf.timestamp <= end_ts)]
        ticks = np.concatenate((ticks, tdf[["timestamp", "qty",
                                            "price"]].values))
        del tdf
    samples = calc_samples(ticks[ticks[:, 0].argsort()], sec_span * 1000)
    print(
        f"took {time() - sts:.2f} seconds to load {len(ticks)} ticks, creating {len(samples)} samples"
    )
    del ticks
    return samples
Ejemplo n.º 2
0
def make_tick_samples(config: dict, sec_span: int = 1):
    '''
    makes tick samples from agg_trades
    tick samples are [(qty, price, timestamp)]
    config must include parameters
    - exchange: str
    - symbol: str
    - spot: bool
    - start_date: str
    - end_date: str
    '''
    for key in ['exchange', 'symbol', 'spot', 'start_date', 'end_date']:
        assert key in config
    start_ts = date_to_ts(config['start_date'])
    end_ts = date_to_ts(config['end_date'])
    ticks_filepath = os.path.join(
        'historical_data', config['exchange'],
        f"agg_trades_{'spot' if config['spot'] else 'futures'}",
        config['symbol'], '')
    if not os.path.exists(ticks_filepath):
        return
    ticks_filenames = sorted(
        [f for f in os.listdir(ticks_filepath) if f.endswith('.csv')])
    ticks = np.empty((0, 3))
    sts = time()
    for f in ticks_filenames:
        _, _, first_ts, last_ts = map(int, f.replace('.csv', '').split('_'))
        if first_ts > end_ts or last_ts < start_ts:
            continue
        print(f'\rloading chunk {ts_to_date(first_ts / 1000)}', end='  ')
        tdf = pd.read_csv(ticks_filepath + f)
        tdf = tdf[(tdf.timestamp >= start_ts) & (tdf.timestamp <= end_ts)]
        ticks = np.concatenate((ticks, tdf[['timestamp', 'qty',
                                            'price']].values))
        del tdf
    samples = calc_samples(ticks[ticks[:, 0].argsort()], sec_span * 1000)
    print(
        f'took {time() - sts:.2f} seconds to load {len(ticks)} ticks, creating {len(samples)} samples'
    )
    del ticks
    return samples
Ejemplo n.º 3
0
 async def init_emas(self) -> None:
     ohlcvs1m = await self.fetch_ohlcvs(interval="1m")
     max_span = max(list(self.ema_spans_long) + list(self.ema_spans_short))
     for mins, interval in zip([5, 15, 30, 60, 60 * 4],
                               ["5m", "15m", "30m", "1h", "4h"]):
         if max_span <= len(ohlcvs1m) * mins:
             break
     ohlcvs = await self.fetch_ohlcvs(interval=interval)
     ohlcvs = {ohlcv["timestamp"]: ohlcv for ohlcv in ohlcvs + ohlcvs1m}
     samples1s = calc_samples(
         numpyize([[
             o["timestamp"], o["volume"], o["close"]
         ] for o in sorted(ohlcvs.values(), key=lambda x: x["timestamp"])]))
     spans1s_long = np.array(self.ema_spans_long) * 60
     spans1s_short = np.array(self.ema_spans_short) * 60
     self.emas_long = calc_emas_last(samples1s[:, 2], spans1s_long)
     self.emas_short = calc_emas_last(samples1s[:, 2], spans1s_short)
     self.alpha_long = 2 / (spans1s_long + 1)
     self.alpha__long = 1 - self.alpha_long
     self.alpha_short = 2 / (spans1s_short + 1)
     self.alpha__short = 1 - self.alpha_short
     self.ema_sec = int(time())
Ejemplo n.º 4
0
    async def prepare_files(self):
        """
        Takes downloaded data and prepares a numpy array for use in backtesting.
        @return:
        """
        filenames = [
            f for f in self.get_filenames()
            if int(f.split("_")[3].split(".")[0]) >= self.start_time
            and int(f.split("_")[2]) <= self.end_time
        ]
        left_overs = pd.DataFrame()
        sample_size_ms = 1000
        current_index = 0

        try:
            first_frame = pd.read_csv(
                os.path.join(self.filepath, filenames[0]),
                dtype={
                    "price": np.float64,
                    "is_buyer_maker": np.float64,
                    "timestamp": np.float64,
                    "qty": np.float64,
                },
                usecols=["price", "is_buyer_maker", "timestamp", "qty"],
            )
            first_frame = first_frame[
                (first_frame["timestamp"] >= self.start_time)
                & (first_frame["timestamp"] <= self.end_time)]
            earliest_time = first_frame.timestamp.iloc[
                0] // sample_size_ms * sample_size_ms
        except Exception as e:
            print_(["Error in determining earliest time", e])
            earliest_time = self.start_time

        try:
            last_frame = pd.read_csv(
                os.path.join(self.filepath, filenames[-1]),
                dtype={
                    "price": np.float64,
                    "is_buyer_maker": np.float64,
                    "timestamp": np.float64,
                    "qty": np.float64,
                },
                usecols=["price", "is_buyer_maker", "timestamp", "qty"],
            )
            last_frame = last_frame[
                (last_frame["timestamp"] >= self.start_time)
                & (last_frame["timestamp"] <= self.end_time)]
            latest_time = last_frame.timestamp.iloc[
                -1] // sample_size_ms * sample_size_ms
        except Exception as e:
            print_(["Error in determining latest time", e])
            latest_time = self.end_time

        array = np.zeros(
            (int((latest_time - earliest_time) / sample_size_ms + 1), 3),
            dtype=np.float64,
        )

        for f in filenames:
            chunk = pd.read_csv(
                os.path.join(self.filepath, f),
                dtype={
                    "price": np.float64,
                    "is_buyer_maker": np.float64,
                    "timestamp": np.float64,
                    "qty": np.float64,
                },
                usecols=["price", "is_buyer_maker", "timestamp", "qty"],
            )

            chunk = pd.concat([left_overs, chunk])
            chunk.sort_values("timestamp", inplace=True)
            chunk = chunk[(chunk["timestamp"] >= self.start_time)
                          & (chunk["timestamp"] <= self.end_time)]

            cut_off = (
                chunk.timestamp.iloc[-1] // sample_size_ms * sample_size_ms -
                1 - (1 * sample_size_ms))

            left_overs = chunk[chunk["timestamp"] > cut_off]
            chunk = chunk[chunk["timestamp"] <= cut_off]

            sampled_ticks = calc_samples(chunk[["timestamp", "qty",
                                                "price"]].values)
            if current_index != 0 and array[current_index - 1,
                                            0] + 1000 != sampled_ticks[0, 0]:
                size = int(
                    (sampled_ticks[0, 0] - array[current_index - 1, 0]) /
                    sample_size_ms) - 1
                tmp = np.zeros((size, 3), dtype=np.float64)
                tmp[:, 0] = np.arange(
                    array[current_index - 1, 0] + sample_size_ms,
                    sampled_ticks[0, 0],
                    sample_size_ms,
                    dtype=np.float64,
                )
                tmp[:, 2] = array[current_index - 1, 2]
                array[current_index:current_index + len(tmp)] = tmp
                current_index += len(tmp)
            array[current_index:current_index +
                  len(sampled_ticks)] = sampled_ticks
            current_index += len(sampled_ticks)

            print(
                "\rloaded chunk of data",
                f,
                ts_to_date(float(f.split("_")[2]) / 1000),
                end="     ",
            )
        print("\n")

        # Fill in anything left over
        if not left_overs.empty:
            sampled_ticks = calc_samples(
                left_overs[["timestamp", "qty", "price"]].values)
            if current_index != 0 and array[current_index - 1,
                                            0] + 1000 != sampled_ticks[0, 0]:
                size = int(
                    (sampled_ticks[0, 0] - array[current_index - 1, 0]) /
                    sample_size_ms) - 1
                tmp = np.zeros((size, 3), dtype=np.float64)
                tmp[:, 0] = np.arange(
                    array[current_index - 1, 0] + sample_size_ms,
                    sampled_ticks[0, 0],
                    sample_size_ms,
                    dtype=np.float64,
                )
                tmp[:, 2] = array[current_index - 1, 2]
                array[current_index:current_index + len(tmp)] = tmp
                current_index += len(tmp)
            array[current_index:current_index +
                  len(sampled_ticks)] = sampled_ticks
            current_index += len(sampled_ticks)

        # Fill the gap at the end with the latest price
        # Should not be necessary anymore
        if current_index + 1 < len(array):
            size = len(array) - current_index
            tmp = np.zeros((size, 3), dtype=np.float64)
            tmp[:, 0] = np.arange(
                array[current_index - 1, 0] + sample_size_ms,
                array[current_index - 1, 0] + ((size + 1) * sample_size_ms),
                sample_size_ms,
                dtype=np.float64,
            )
            tmp[:, 2] = array[current_index - 1, 2]
            array[current_index:current_index + len(tmp)] = tmp
            current_index += len(tmp)

        print_([
            "Saving single file with",
            len(array),
            " ticks to",
            self.tick_filepath,
            "...",
        ])
        np.save(self.tick_filepath, array)
        print_(["Saved single file!"])