Esempio n. 1
0
def iterate_trades(symbols: [str], n_days: int):
    base_path = 'historical_data/raw_trades/'
    ts = time() - 60 * 60 * 24 * n_days
    day = ts_to_day(ts)
    end_day = ts_to_day(time())
    while day < end_day:
        print(day)
        day_trades = []
        for s in symbols:
            s_ = s.replace('/', '_')
            filepath = f'{base_path}{s_}/{day}.csv'
            with open(filepath) as f:
                lines = f.readlines()
            header = lines[0].strip().split(',')
            lines = [line.split(',') for line in lines[1:]]
            day_trades += [{
                **{
                    'symbol': s
                },
                **{header[i]: to_f(line[i])
                   for i in range(len(header))}
            } for line in lines]
        yield sorted(day_trades, key=lambda x: x['timestamp'])
        ts += 60 * 60 * 24
        day = ts_to_day(ts)
def fetch_my_trades(symbol: str,
                    n_days: float = 30,
                    no_download: bool = False,
                    limit: int = 1000) -> pd.DataFrame:

    def request_my_trades(from_id: int = -1) -> dict:
        timestamp = int(time() * 1000)
        url = 'https://api.binance.com/api/v3/myTrades?'
        params = {'symbol': symbol.replace('/', ''),
                  'limit': limit,
                  'timestamp': timestamp}
        if from_id > 0:
            params['fromId'] = from_id
        query_string = urlencode(params)
        params['signature'] = hmac.new(secret.encode('utf-8'),
                                       query_string.encode('utf-8'),
                                       hashlib.sha256).hexdigest()
        headers = {'X-MBX-APIKEY': key}
        return json.loads(requests.get(url, headers=headers, params=params).text)

    def format_my_trades(my_trades_: [dict]) -> [dict]:
        formatted = []
        for t in my_trades_:
            price = float(t['price'])
            amount = float(t['qty'])
            formatted.append(
                {'symbol': symbol,
                 'id': t['id'],
                 'order_id': t['orderId'],
                 'price': price,
                 'amount': amount,
                 'cost': amount * price,
                 'side': 'buy' if t['isBuyer'] else 'sell',
                 'timestamp': t['time'],
                 'datetime': ts_to_date(t['time'] / 1000),
                 'is_maker': t['isMaker'],
                 'fee_cost': float(t['commission']),
                 'fee_currency': t['commissionAsset']}
            )
        return formatted

    def iterate_my_trades(ids_covered: set) -> Iterator[dict]:
        my_trades_ = format_my_trades(request_my_trades())
        while True:
            yield my_trades_
            from_id_ = my_trades_[0]['id']
            while from_id_ in ids_covered:
                from_id_ -= 1
            from_id_ -= limit
            new_my_trades_ = format_my_trades(request_my_trades(from_id_))
            my_trades_ = new_my_trades_

    def format_csv_loaded_my_trades(csv: pd.DataFrame) -> pd.DataFrame:
        if csv is None:
            return None
        return sort_and_drop_duplicates_by_index(csv[columns].set_index('id'))

    symbol_no_dash = symbol.replace('/', '_')
    key, secret = load_key_secret('binance')
    dirpath = make_get_filepath('historical_data/my_trades_margin/{}/'.format(symbol_no_dash))
    columns = ['symbol', 'id', 'order_id', 'price', 'amount', 'cost', 'side', 'timestamp',
               'datetime', 'is_maker', 'fee_cost', 'fee_currency']
    cache_path = 'historical_data/my_trades_margin_cache/{}.csv'.format(symbol_no_dash)
    since = ts_to_day(time() - 60 * 60 * 24 * n_days - 24) if n_days > 0 else '0'
    my_trades_loaded = [format_csv_loaded_my_trades(pd.read_csv(dirpath + f))
                        for f in get_filenames(dirpath) if f > since]
    if not no_download:
        clear_cache(cache_path, dirpath, columns, True, format_csv_loaded_my_trades)
        begin_csv(cache_path, columns)
        if my_trades_loaded:
            ids_covered = set(pd.concat(my_trades_loaded).index)
        else:
            ids_covered = set()
        until_ts = (time() - 60 * 60 * 24 * n_days) * 1000
        prev_id = 0
        for my_trades in iterate_my_trades(ids_covered):
            if my_trades[0]['id'] == prev_id:
                break
            prev_id = my_trades[0]['id']
            write_to_cache(my_trades, columns, cache_path)
            print('fetched my_trades for {} {}'.format(
                symbol, ts_to_date(my_trades[0]['timestamp'] / 1000)))
            if my_trades[0]['timestamp'] <= until_ts:
                break
        clear_cache(cache_path, dirpath, columns, True, format_csv_loaded_my_trades)
        my_trades_loaded = [format_csv_loaded_my_trades(pd.read_csv(dirpath + f))
                            for f in get_filenames(dirpath) if f > since]
    if len(my_trades_loaded) > 0:
        return sort_and_drop_duplicates_by_index(pd.concat(my_trades_loaded))
def fetch_raw_trades(symbol: str, n_days: float = 7, no_download: bool = False) -> pd.DataFrame:

    def request_historical_trades(from_id: int = 0) -> dict:
        url = 'https://api.binance.com/api/v3/aggTrades?symbol={}&limit=1000'.format(
            symbol.replace('/', ''))
        if from_id > 0:
            url += '&fromId=' + str(from_id)
        return json.loads(requests.get(url).text)

    def format_raw_trades(trades_: [dict]) -> [dict]:
        return [{'agg_trade_id': t['a'],
                 'price': float(t['p']),
                 'amount': float(t['q']),
                 'timestamp': t['T'],
                 'is_buyer_maker': t['m']} for t in trades_]

    def iterate_raw_trades(ids_covered: set):
        trades = format_raw_trades(request_historical_trades())
        while True:
            yield trades
            from_id_ = trades[0]['agg_trade_id']
            while from_id_ in ids_covered:
                # sys.stdout.write('\rskipping trades {}  '.format(from_id_))
                from_id_ -= 1
            from_id_ -= (len(trades) - 1)
            from_id_ = max(0, from_id_)
            trades = format_raw_trades(request_historical_trades(from_id_))

    def format_csv_loaded_raw_trades(csv: pd.DataFrame) -> pd.DataFrame:
        if csv is None:
            return None
        return sort_and_drop_duplicates_by_index(csv[columns].set_index('agg_trade_id'))

    symbol_no_dash = symbol.replace('/', '_')
    dirpath = make_get_filepath('historical_data/raw_trades/{}/'.format(symbol_no_dash))
    columns = sorted(['agg_trade_id', 'price', 'amount', 'timestamp', 'is_buyer_maker'])
    cache_path = 'historical_data/raw_trades_cache/{}.csv'.format(symbol_no_dash)
    since = ts_to_day(time() - 60 * 60 * 24 * n_days - 24) if n_days > 0 else '0'
    if not no_download:
        clear_cache(cache_path, dirpath, columns, False, format_csv_loaded_raw_trades)
        begin_csv(cache_path, columns)
        raw_trades_loaded = [format_csv_loaded_raw_trades(pd.read_csv(dirpath + f))
                             for f in get_filenames(dirpath) if f > since]
        if raw_trades_loaded:
            raw_trades_df = sort_and_drop_duplicates_by_index(pd.concat(raw_trades_loaded))
            ids_covered = set(raw_trades_df.index)
        else:
            ids_covered = set()
        until_ts = (time() - 60 * 60 * 24 * n_days) * 1000
        rt_tss0 = set()
        for raw_trades in iterate_raw_trades(ids_covered):
            write_to_cache(raw_trades, columns, cache_path)
            print('fetched raw trades for {} {}'.format(
                symbol, ts_to_date(raw_trades[0]['timestamp'] / 1000)))
            if raw_trades[0]['timestamp'] <= until_ts or raw_trades[0]['timestamp'] in rt_tss0:
                break
            rt_tss0.add(raw_trades[0]['timestamp'])
        clear_cache(cache_path, dirpath, columns, False, format_csv_loaded_raw_trades)
    raw_trades_loaded = [format_csv_loaded_raw_trades(pd.read_csv(dirpath + f))
                         for f in get_filenames(dirpath) if f > since]
    if len(raw_trades_loaded) > 0:
        return sort_and_drop_duplicates_by_index(pd.concat(raw_trades_loaded))
def fetch_ohlcvs(symbol: str,
                 n_days: float = 7,
                 timeframe: str = '1m',
                 no_download: bool = False) -> pd.DataFrame:
    '''
    fetches ohlcv data from binance
    allowed timeframes are
    1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 12h, 1d, 1w, 1M
    '''
    def request_klines(start_time: int = -1) -> dict:
        url = 'https://api.binance.com/api/v3/klines?symbol={}&limit=1000&interval={}'.format(
            symbol.replace('/', ''), timeframe, start_time)
        if start_time != -1:
            url += '&startTime={}'.format(start_time)
        return json.loads(requests.get(url).text)

    def format_ohlcvs(ohlcvs_: [dict]) -> [dict]:
        formatted = [{'timestamp': e[0],
                      'open': float(e[1]),
                      'high': float(e[2]),
                      'low': float(e[3]),
                      'close': float(e[4]),
                      'volume': float(e[5]),
                      'volume_base': float(e[7]),
                      'n_trades': e[8],
                      'volume_bought': float(e[9])} for e in ohlcvs_]
        for e in formatted:
            e['volume_sold'] = e['volume'] - e['volume_bought']
        return formatted

    def iterate_ohlcvs(tss_covered: set):
        max_n_tries = 10
        ohlcvs = format_ohlcvs(request_klines())
        prev_ohlcvs = [{}]
        while True:
            yield ohlcvs
            from_ts = ohlcvs[0]['timestamp']
            while from_ts in tss_covered:
                # sys.stdout.write('\rskipping trades {}  '.format(from_id_))
                from_ts -= timeframe_millis
            from_ts -= (len(ohlcvs) - 1) * timeframe_millis
            ohlcvs = format_ohlcvs(request_klines(from_ts))
            k = 0
            while ohlcvs[0] == prev_ohlcvs[0]:
                print('gaps', ts_to_date(ohlcvs[0]['timestamp'] / 1000), ts_to_date(from_ts / 1000))
                from_ts -= (len(ohlcvs) - 1) * timeframe_millis
                ohlcvs = format_ohlcvs(request_klines(from_ts))
                k += 1
                if k >= max_n_tries:
                    yield None
                    break
            prev_ohlcvs = ohlcvs

    def format_csv_loaded_ohlcvs(csv: pd.DataFrame) -> pd.DataFrame:
        if csv is None:
            return None
        return sort_and_drop_duplicates_by_index(csv[columns].set_index('timestamp'))

    symbol_no_dash = symbol.replace('/', '_')
    timeframe_to_millis_map = {'1m': 60 * 1000,
                               '3m': 3 * 60 * 1000,
                               '5m': 5 * 60 * 1000,
                               '15m': 15 * 60 * 1000,
                               '30m': 30 * 60 * 1000,
                               '1h': 60 * 60 * 1000,
                               '2h': 2 * 60 * 60 * 1000,
                               '4h': 4 * 60 * 60 * 1000,
                               '6h': 6 * 60 * 60 * 1000,
                               '12h': 12 * 60 * 60 * 1000,
                               '1d': 24 * 60 * 60 * 1000,
                               '1w': 7 * 24 * 60 * 60 * 1000,
                               '1M': 30 * 24 * 60 * 60 * 1000
                               }
    timeframe_millis = timeframe_to_millis_map[timeframe]
    dirpath = make_get_filepath('historical_data/ohlcvs_{}/{}/'.format(timeframe, symbol_no_dash))
    columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume',
               'volume_base', 'n_trades', 'volume_bought', 'volume_sold']
    cache_path = 'historical_data/ohlcvs_cache/{}_{}.csv'.format(timeframe, symbol_no_dash)
    since = ts_to_day(time() - 60 * 60 * 24 * n_days - 24) if n_days > 0 else '0'
    if not no_download:
        clear_cache(cache_path, dirpath, columns, True, format_csv_loaded_ohlcvs)
        begin_csv(cache_path, columns)
        ohlcvs_loaded = [format_csv_loaded_ohlcvs(pd.read_csv(dirpath + f))
                         for f in get_filenames(dirpath) if f > since]
        if ohlcvs_loaded:
            ohlcvs_df = sort_and_drop_duplicates_by_index(pd.concat(ohlcvs_loaded))
            tss_covered = set(ohlcvs_df.index)
        else:
            tss_covered = set()
        until_ts = (time() - 60 * 60 * 24 * n_days) * 1000
        for ohlcvs in iterate_ohlcvs(tss_covered):
            if ohlcvs is None:
                print('end of ohlcvs')
                break
            write_to_cache(ohlcvs, columns, cache_path)
            print('fetched {} ohlcvs for {} {}'.format(
                timeframe, symbol, ts_to_date(ohlcvs[0]['timestamp'] / 1000)))
            if ohlcvs[0]['timestamp'] <= until_ts:
                break
        clear_cache(cache_path, dirpath, columns, True, format_csv_loaded_ohlcvs)
    ohlcvs_loaded = [format_csv_loaded_ohlcvs(pd.read_csv(dirpath + f))
                     for f in get_filenames(dirpath) if f > since]
    if len(ohlcvs_loaded) > 0:
        return sort_and_drop_duplicates_by_index(pd.concat(ohlcvs_loaded))