def update_config(self,
                      config: Dict,
                      split: Dict,
                      balance_and_pos: Optional[Dict] = None):
        if balance_and_pos:
            config.update(balance_and_pos)

        config.update({
            "start_date":
            ts_to_date((split["start_ms"] + self.ts_start) / 1000),
            "end_date":
            ts_to_date((split["end_ms"] + self.ts_start) / 1000),
            "n_days":
            split["diff_days"],
        })
 async def init_my_trades(self, age_limit_days: float = 7.0) -> [dict]:
     age_limit = self.cc.milliseconds(
     ) - 1000 * 60 * 60 * 24 * age_limit_days
     mtl = self.load_cached_my_trades()
     print(f'loaded {len(mtl)} cached my trades')
     if not mtl:
         mtl = await self.fetch_my_trades(start_time_ms=age_limit)
     else:
         mtl += await self.fetch_my_trades(
             start_time_ms=mtl[-1]['timestamp'])
     mtd = {t['order_id']: t for t in mtl}
     mt = sorted(mtd.values(), key=lambda x: x['timestamp'])
     if len(mt) == 0:
         return
     while True:
         print('fetching my trades', ts_to_date(mt[-1]['timestamp'] / 1000))
         new_mt = await self.fetch_my_trades(order_id=mt[-1]['order_id'] + 1
                                             )
         if len(new_mt) == 0:
             break
         mt += new_mt
     mtd = {t['order_id']: t for t in mt}
     my_trades = sorted(mtd.values(), key=lambda x: x['order_id'])
     print('dumping trades to cache...')
     with open(self.my_trades_cache_filepath, 'w') as f:
         for t in my_trades:
             f.write(json.dumps(t) + '\n')
     self.my_trades = my_trades
Exemple #3
0
 async def fetch_ticks(self, from_id: int = None, start_time: int = None, end_time: int = None,
                       do_print: bool = True):
     params = {'symbol': self.symbol, 'limit': 1000}
     if from_id is not None:
         params['fromId'] = max(0, from_id)
     if start_time is not None:
         params['startTime'] = start_time
     if end_time is not None:
         params['endTime'] = end_time
     try:
         fetched = await self.private_get(self.endpoints['ticks'], params)
     except Exception as e:
         print('error fetching ticks a', e)
         return []
     try:
         ticks = [{'trade_id': int(t['a']), 'price': float(t['p']), 'qty': float(t['q']),
                   'timestamp': int(t['T']), 'is_buyer_maker': t['m']}
                  for t in fetched]
         if do_print:
             print_(['fetched ticks', self.symbol, ticks[0]['trade_id'],
                     ts_to_date(float(ticks[0]['timestamp']) / 1000)])
     except Exception as e:
         print('error fetching ticks b', e, fetched)
         ticks = []
         if do_print:
             print_(['fetched no new ticks', self.symbol])
     return ticks
Exemple #4
0
def get_downloaded_trades(filepath: str,
                          age_limit_millis: float) -> (pd.DataFrame, dict):
    if os.path.isdir(filepath):
        filenames = sorted(
            [f for f in os.listdir(filepath) if f.endswith('.csv')],
            key=lambda x: int(x[:x.find('_')].replace('.cs', '').replace(
                'v', '')))
        chunks = []
        chunk_lengths = {}
        for f in filenames[::-1]:
            chunk = pd.read_csv(filepath + f).set_index('trade_id')
            chunk_lengths[f] = len(chunk)
            print('\rloaded chunk of trades',
                  f,
                  ts_to_date(chunk.timestamp.iloc[0] / 1000),
                  end='     ')
            chunks.append(chunk)
            if chunk.timestamp.iloc[0] < age_limit_millis:
                break
        if chunks:
            df = pd.concat(chunks, axis=0).sort_index()
            return df[~df.index.duplicated()], chunk_lengths
        else:
            return None, {}
    else:
        return None, {}
Exemple #5
0
async def main():
    exchange = sys.argv[1]
    user = sys.argv[2]

    settings_filepath = os.path.join('backtesting_settings', exchange, '')
    backtesting_settings = \
        json.load(open(os.path.join(settings_filepath, 'backtesting_settings.json')))
    symbol = backtesting_settings['symbol']
    n_days = backtesting_settings['n_days']
    ranges = json.load(open(os.path.join(settings_filepath, 'ranges.json')))
    print(settings_filepath)
    results_filepath = make_get_filepath(
        os.path.join(
            'backtesting_results', exchange,
            ts_to_date(time())[:19].replace(':', '_') +
            f'_{int(round(n_days))}', ''))

    trade_cache_filepath = make_get_filepath(
        os.path.join(settings_filepath, 'trade_cache', ''))
    trades_filename = f'{symbol}_raw_trades_{exchange}_{n_days}_days_{ts_to_date(time())[:10]}.npy'
    trades_filepath = f"{trade_cache_filepath}{trades_filename}"
    if os.path.exists(trades_filepath):
        print('loading cached trade list', trades_filepath)
        trades_list = np.load(trades_filepath, allow_pickle=True)
    else:
        agg_trades = await load_trades(exchange, user, symbol, n_days)
        print('preparing trades...')
        trades_list = prep_trades_list(agg_trades)
        np.save(trades_filepath, trades_list)
    jackrabbit(trades_list, backtesting_settings, ranges, results_filepath)
Exemple #6
0
async def fetch_trades(cc, symbol: str, from_id: int = None) -> [dict]:
    params = {'symbol': symbol, 'limit': 1000}
    if from_id:
        params['fromId'] = from_id
    fetched_trades = await cc.fapiPublic_get_aggtrades(params=params)
    trades = [{'trade_id': int(t['a']),
               'price': float(t['p']),
               'qty': float(t['q']),
               'timestamp': t['T'],
               'is_buyer_maker': t['m']} for t in fetched_trades]
    print_(['fetched trades', symbol, trades[0]['trade_id'],
            ts_to_date(trades[0]['timestamp'] / 1000)])
    return trades
Exemple #7
0
async def fetch_ticks(cc, symbol: str, from_id: int = None, do_print=True) -> [dict]:
    params = {'symbol': symbol, 'limit': 1000}
    if from_id:
        params['from'] = max(0, from_id)
    try:
        fetched_trades = await cc.v2_public_get_trading_records(params=params)
    except Exception as e:
        print(e)
        return []
    trades = [format_tick(t) for t in fetched_trades['result']]
    if do_print:
        print_(['fetched trades', symbol, trades[0]['trade_id'],
                ts_to_date(trades[0]['timestamp'] / 1000)])
    return trades
def iter_chunks(exchange: str, symbol: str) -> Iterator[pd.DataFrame]:
    chunk_size = 100000
    filepath = f'historical_data/{exchange}/agg_trades_futures/{symbol}/'
    if os.path.isdir(filepath):
        filenames = sorted(
            [f for f in os.listdir(filepath) if f.endswith('.csv')])
        for f in filenames[::-1]:
            chunk = pd.read_csv(filepath + f).set_index('trade_id')
            if chunk is not None:
                print('loaded chunk of trades', f,
                      ts_to_date(chunk.timestamp.iloc[0] / 1000))
                yield chunk
            else:
                yield None
        yield None
    else:
        yield None
Exemple #9
0
async def fetch_trades(cc, symbol: str, from_id: int = None) -> [dict]:

    params = {'symbol': symbol, 'limit': 1000}
    if from_id:
        params['from'] = from_id
    fetched_trades = await cc.public_get_trading_records(params=params)
    trades = [{
        'trade_id': int(t['id']),
        'side': t['side'],
        'price': t['price'],
        'qty': t['qty'],
        'timestamp': date_to_ts(t['time'][:-1])
    } for t in fetched_trades['result']]
    print_([
        'fetched trades', symbol, trades[0]['trade_id'],
        ts_to_date(trades[0]['timestamp'] / 1000)
    ])
    return trades
Exemple #10
0
 async def fetch_ticks(self, from_id: int = None, do_print: bool = True):
     params = {'symbol': self.symbol, 'limit': 1000}
     if from_id is not None:
         params['from'] = max(0, from_id)
     try:
         ticks = await self.public_get(self.endpoints['ticks'], params)
     except Exception as e:
         print('error fetching ticks', e)
         return []
     try:
         trades = list(map(format_tick, ticks['result']))
         if do_print:
             print_(['fetched trades', self.symbol, trades[0]['trade_id'],
                     ts_to_date(float(trades[0]['timestamp']) / 1000)])
     except:
         trades = []
         if do_print:
             print_(['fetched no new trades', self.symbol])
     return trades
Exemple #11
0
 async def init_my_trades(self, age_limit_days: float = 7.0) -> [dict]:
     age_limit = self.cc.milliseconds(
     ) - 1000 * 60 * 60 * 24 * age_limit_days
     mtl = await self.fetch_my_trades()
     print('loading my trades cache...')
     mtl += self.load_cached_my_trades()
     mtd = {t['order_id']: t for t in mtl}
     mt = sorted(mtd.values(), key=lambda x: x['timestamp'])
     page = 2
     while mt[0]['timestamp'] > age_limit:
         print('fetching my trades', ts_to_date(mt[0]['timestamp'] / 1000))
         new_mt = await self.fetch_my_trades(page)
         if len(new_mt) == 0 or new_mt[0]['order_id'] in mtd:
             break
         page += 1
         mtd = {t['order_id']: t for t in mt + new_mt}
         mt = sorted(mtd.values(), key=lambda x: x['timestamp'])
     my_trades = [t for t in mt if t['timestamp'] > age_limit]
     print('dumping trades to cache...')
     with open(self.my_trades_cache_filepath, 'w') as f:
         for t in my_trades:
             f.write(json.dumps(t) + '\n')
     self.my_trades = my_trades
Exemple #12
0
def backtest_tune(ticks: np.ndarray,
                  backtest_config: dict,
                  current_best: Union[dict, list] = None):
    config = create_config(backtest_config)
    n_days = round_((ticks[-1][2] - ticks[0][2]) / (1000 * 60 * 60 * 24), 0.1)
    backtest_config['optimize_dirpath'] = os.path.join(
        backtest_config['optimize_dirpath'],
        ts_to_date(time())[:19].replace(':', ''), '')
    if 'iters' in backtest_config:
        iters = backtest_config['iters']
    else:
        print(
            'Parameter iters should be defined in the configuration. Defaulting to 10.'
        )
        iters = 10
    if 'num_cpus' in backtest_config:
        num_cpus = backtest_config['num_cpus']
    else:
        print(
            'Parameter num_cpus should be defined in the configuration. Defaulting to 2.'
        )
        num_cpus = 2
    n_particles = backtest_config[
        'n_particles'] if 'n_particles' in backtest_config else 10
    phi1 = 1.4962
    phi2 = 1.4962
    omega = 0.7298
    if 'options' in backtest_config:
        phi1 = backtest_config['options']['c1']
        phi2 = backtest_config['options']['c2']
        omega = backtest_config['options']['w']
    current_best_params = []
    if current_best:
        if type(current_best) == list:
            for c in current_best:
                c = clean_start_config(c, config, backtest_config['ranges'])
                if c not in current_best_params:
                    current_best_params.append(c)
        else:
            current_best = clean_start_config(current_best, config,
                                              backtest_config['ranges'])
            current_best_params.append(current_best)

    ray.init(num_cpus=num_cpus,
             logging_level=logging.FATAL,
             log_to_driver=False)
    pso = ng.optimizers.ConfiguredPSO(transform='identity',
                                      popsize=n_particles,
                                      omega=omega,
                                      phip=phi1,
                                      phig=phi2)
    algo = NevergradSearch(optimizer=pso,
                           points_to_evaluate=current_best_params)
    algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus)
    scheduler = AsyncHyperBandScheduler()

    if 'wfo' in config and config['wfo']:
        print('\n\nwalk forward optimization\n\n')
        wfo = WFO(ticks, backtest_config, P_train=0.5).set_train_N(4)
        backtest_wrap = lambda config: tune_report(wfo.backtest(config))
    else:
        print('\n\nsimple sliding window optimization\n\n')
        backtest_wrap = tune.with_parameters(simple_sliding_window_wrap,
                                             ticks=ticks)
    analysis = tune.run(backtest_wrap,
                        metric='objective',
                        mode='max',
                        name='search',
                        search_alg=algo,
                        scheduler=scheduler,
                        num_samples=iters,
                        config=config,
                        verbose=1,
                        reuse_actors=True,
                        local_dir=backtest_config['optimize_dirpath'],
                        progress_reporter=LogReporter(
                            metric_columns=[
                                'daily_gain', 'closest_liquidation',
                                'max_hrs_no_fills',
                                'max_hrs_no_fills_same_side', 'objective'
                            ],
                            parameter_columns=[
                                k for k in backtest_config['ranges']
                                if type(config[k]) == ray.tune.sample.Float
                                or type(config[k]) == ray.tune.sample.Integer
                            ]),
                        raise_on_failed_trial=False)
    ray.shutdown()
    return analysis
Exemple #13
0
async def load_trades(exchange: str, user: str, symbol: str,
                      n_days: float) -> pd.DataFrame:
    def skip_ids(id_, ids_):
        if id_ in ids_:
            print('skipping from', id_)
            while id_ in ids_:
                id_ -= 1
            print('           to', id_)
        return id_

    def load_cache():
        cache_filenames = [
            f for f in os.listdir(cache_filepath) if '.csv' in f
        ]
        if cache_filenames:
            print('loading cached trades')
            cache_df = pd.concat(
                [pd.read_csv(cache_filepath + f) for f in cache_filenames],
                axis=0)
            cache_df = cache_df.set_index('trade_id')
            return cache_df
        return None

    if exchange == 'binance':
        fetch_trades_func = binance_fetch_trades
    elif exchange == 'bybit':
        fetch_trades_func = bybit_fetch_trades
    else:
        print(exchange, 'not found')
        return
    cc = init_ccxt(exchange, user)
    filepath = make_get_filepath(
        os.path.join('historical_data', exchange, 'agg_trades_futures', symbol,
                     ''))
    cache_filepath = make_get_filepath(
        filepath.replace(symbol, symbol + '_cache'))
    age_limit = time() - 60 * 60 * 24 * n_days
    age_limit_millis = age_limit * 1000
    print('age_limit', ts_to_date(age_limit))
    cache_df = load_cache()
    trades_df, chunk_lengths = get_downloaded_trades(filepath,
                                                     age_limit_millis)
    ids = set()
    if trades_df is not None:
        ids.update(trades_df.index)
    if cache_df is not None:
        ids.update(cache_df.index)
    gaps = []
    if trades_df is not None and len(trades_df) > 0:
        #
        sids = sorted(ids)
        for i in range(1, len(sids)):
            if sids[i - 1] + 1 != sids[i]:
                gaps.append((sids[i - 1], sids[i]))
        if gaps:
            print('gaps', gaps)
        #
    prev_fetch_ts = time()
    new_trades = await fetch_trades_func(cc, symbol)
    k = 0
    while True:
        k += 1
        if (break_ :=
                new_trades[0]['timestamp'] <= age_limit_millis) or k % 20 == 0:
            print('caching trades...')
            new_tdf = pd.DataFrame(new_trades).set_index('trade_id')
            cache_filename = f'{cache_filepath}{new_tdf.index[0]}_{new_tdf.index[-1]}.csv'
            new_tdf.to_csv(cache_filename)
            new_trades = [new_trades[0]]
            if break_:
                break
        from_id = skip_ids(new_trades[0]['trade_id'] - 1, ids) - 999
        # wait at least 0.75 sec between each fetch
        sleep_for = max(0.0, 0.75 - (time() - prev_fetch_ts))
        await asyncio.sleep(sleep_for)
        prev_fetch_ts = time()
        new_trades = await fetch_trades_func(cc, symbol,
                                             from_id=from_id) + new_trades
        ids.update([e['trade_id'] for e in new_trades])
async def load_trades(exchange: str, user: str, symbol: str,
                      n_days: float) -> pd.DataFrame:
    def skip_ids(id_, ids_):
        if id_ in ids_:
            print('skipping from', id_)
            while id_ in ids_:
                id_ -= 1
            print('           to', from_id)
        return id_

    cc = init_ccxt(exchange, user)
    try:
        if exchange == 'binance':
            fetch_trades_func = binance_fetch_trades
        elif exchange == 'bybit':
            fetch_trades_func = bybit_fetch_trades
        else:
            print(exchange, 'not found')
            return
        filepath = make_get_filepath(
            f'historical_data/{exchange}/agg_trades_futures/{symbol}/')
        cache_filepath = make_get_filepath(
            f'historical_data/{exchange}/agg_trades_futures/{symbol}_cache/')
        cache_filenames = [
            f for f in os.listdir(cache_filepath) if f.endswith('.csv')
        ]
        ids = set()
        if cache_filenames:
            print('loading cached trades...')
            cached_trades = pd.concat(
                [pd.read_csv(cache_filepath + f) for f in cache_filenames],
                axis=0)
            cached_trades = cached_trades.set_index('trade_id').sort_index()
            cached_trades = cached_trades[~cached_trades.index.duplicated()]
            ids.update(cached_trades.index)
        else:
            cached_trades = None
        age_limit = time() - 60 * 60 * 24 * n_days
        age_limit_millis = age_limit * 1000
        print('age_limit', ts_to_date(age_limit))
        chunk_iterator = iter_chunks(exchange, symbol)
        chunk = next(chunk_iterator)
        chunks = {} if chunk is None else {int(chunk.index[0]): chunk}
        if chunk is not None:
            ids.update(chunk.index)
        min_id = min(ids) if ids else 0
        new_trades = await fetch_trades_func(cc, symbol)
        cached_ids = set()
        k = 0
        while True:
            if new_trades[0]['timestamp'] <= age_limit_millis:
                break
            from_id = new_trades[0]['trade_id'] - 1
            while True:
                if chunk is None:
                    min_id = 0
                    break
                from_id = skip_ids(from_id, ids)
                if from_id < min_id:
                    chunk = next(chunk_iterator)
                    if chunk is None:
                        min_id = 0
                        break
                    else:
                        chunks[int(chunk.index[0])] = chunk
                        ids.update(chunk.index)
                        min_id = min(ids)
                        if chunk.timestamp.max() < age_limit_millis:
                            break
                else:
                    break
            from_id = skip_ids(from_id, ids)
            from_id -= 999
            new_trades = await fetch_trades_func(cc, symbol,
                                                 from_id=from_id) + new_trades
            k += 1
            if k % 20 == 0:
                print('dumping cache')
                cache_df = pd.DataFrame([
                    t for t in new_trades if t['trade_id'] not in cached_ids
                ]).set_index('trade_id')
                cache_df.to_csv(cache_filepath + str(int(time() * 1000)) +
                                '.csv')
                cached_ids.update(cache_df.index)
        new_trades_df = pd.DataFrame(new_trades).set_index('trade_id')
        trades_updated = pd.concat(list(chunks.values()) +
                                   [new_trades_df, cached_trades],
                                   axis=0)
        no_dup = trades_updated[~trades_updated.index.duplicated()]
        no_dup_sorted = no_dup.sort_index()
        chunk_size = 100000
        chunk_ids = no_dup_sorted.index // chunk_size * chunk_size
        for g in no_dup_sorted.groupby(chunk_ids):
            if g[0] not in chunks or len(chunks[g[0]]) != chunk_size:
                print('dumping chunk', g[0])
                g[1].to_csv(f'{filepath}{str(g[0])}.csv')
        for f in [
                f_ for f_ in os.listdir(cache_filepath) if f_.endswith('.csv')
        ]:
            os.remove(cache_filepath + f)
        await cc.close()
        return no_dup_sorted[no_dup_sorted.timestamp >= age_limit_millis]
    except KeyboardInterrupt:
        await cc.close()