def populate_candles(): import tqdm class_names = ['OHLC', 'HeikinAshi'] tickers = ['BTCUSD.SPOT.BITS', 'BTCUSD.SPOT.BITF', 'BTCUSD.PERP.BMEX'] frequencies = ['5T', '15T', '30T', '1H', '4H'] from sgmtradingcore.analytics.features.crypto.runner import CryptoFeatureRunner runner = CryptoFeatureRunner() combos = itertools.product(class_names, tickers, frequencies) valid_dt_dict = {} for cls, ticker, freq in tqdm.tqdm(list(combos)): if ticker not in valid_dt_dict: valid_dt_dict[ticker] = get_first_valid_datetime(ticker) start_date = get_first_valid_datetime(ticker) end_date = datetime.datetime.now(tz=pytz.UTC) params = {'input': FeatureRequest('CryptoTrades', {'source': CryptoDataSource.ARCTIC, 'ticker': ticker}), 'columns': [TSInput.TRADE_PRICE], 'start': start_date, 'clock': FeatureRequest('FixedPeriodClock', {'frequency': freq})} fr = FeatureRequest(cls, params) runner.precompute([fr], start_date, end_date, n_jobs=1)
def _default_parameters(cls, params, runner): if 'input' in params: ticker = params['input'].feature_params(runner)['ticker'] else: ticker = 'BTCUSD.SPOT.BITS' return {'input': FeatureRequest('CryptoTrades', {'source': CryptoDataSource.ARCTIC, 'ticker': ticker}), 'clock': FeatureRequest('FixedPeriodClock', {'frequency': '1S'}), 'columns': ['size']}
def _default_parameters(cls, params, runner): ticker = None if 'input' in params: input_params = params['input'].feature_params(runner) if 'ticker' in input_params: ticker = input_params['ticker'] ticker = ticker or 'BTCUSD.SPOT.BITS' return {'input': FeatureRequest('CryptoTrades', {'ts_name': TSInput.TRADE_PRICE, 'ticker': ticker}), 'columns': [TSInput.TRADE_PRICE], 'clock': FeatureRequest('FixedPeriodClock', {'frequency': '1H'})}
def _default_parameters(cls, params, runner): # This feature is for L1 prices only so the configuration of the input data component is fixed here: return { 'input': FeatureRequest( 'CryptoLOB', { 'ts_name': TSInput.L1_PRICE, 'ticker': 'BTCUSD.SPOT.BITF', 'source': CryptoDataSource.ARCTIC }), 'columns': ['BIDPRC1', 'ASKPRC1'], 'clock': FeatureRequest('FixedPeriodClock', {'frequency': '1S'}) }
def _compute_by_sticker(self, sticker): if self._source == 'MicroPrice': odds_feature_request = FeatureRequest('MicroPrice', { 'bookmakers': self._bookmakers, 'precache': True }, {}) cols = ['MicroPrice'] else: raise ValueError('Unknown Source {}'.format(self._source)) fc = self._runner.shared_objects()['fixture_cache'] start_time = fc.get_kickoff(str( extract_event(sticker))) - datetime.timedelta(hours=6) dfs = self._runner.get_dataframes_by_stickers([odds_feature_request], [sticker]) in_df = dfs[sticker] in_df = in_df[in_df.index > start_time].resample('{}s'.format( self._indicator_frequency)).pad() indicator_set = IndicatorSet(indicator_spec=self._indicator_spec, rolling_params=self._rolling_params) out_df = indicator_set._apply_indicators_rolling_fixture(in_df, cols) del out_df['MicroPrice'] return out_df
def _compute_by_sticker(self, sticker): requests = [FeatureRequest('VolumeBucketFeature', {'bookmakers': self._bookmakers, 'bucket_size': self.bucket_size}, {})] buckets = list(self._runner.get_dataframes_by_stickers(requests, [sticker])[sticker].itertuples()) total_vol = self.bucket_size * self.bucket_number bucket_q = deque(buckets[:self.bucket_number], maxlen=self.bucket_number) times = [] vpin = sum(abs(i.back - i.lay) for i in bucket_q) vpins = [vpin / total_vol] if len(buckets) < self.bucket_number: return pd.DataFrame([], columns=['timestamp', 'vpin']).set_index('timestamp') times.append(buckets[self.bucket_number - 1].Index) for bucket in buckets[self.bucket_number:]: back = bucket.back lay = bucket.lay time = bucket.Index if back + lay < self.bucket_size - 0.01: print 'Underfull bucket' continue remove = bucket_q.popleft() bucket_q.append(bucket) vpin += abs(back - lay) - abs(remove.back - remove.lay) vpins.append(vpin / total_vol) times.append(time) return pd.DataFrame(zip(times, vpins), columns=['timestamp', 'vpin']).set_index('timestamp')
def _compute(self, start_dt, end_dt): previous_tick = self._storage.load_previous_tick(start_dt) if previous_tick is None: previous_tick_end = self._params['start'] else: previous_tick_end = previous_tick.name # Calculate ticks from the previous ticks until the interval that we are currently interested in request = [FeatureRequest.from_feature(self)] if previous_tick_end != start_dt: self._runner.compute_dataframes(request, previous_tick_end, start_dt) new_previous_tick = self._storage.load_previous_tick(start_dt) if new_previous_tick is None or new_previous_tick.empty: new_previous_tick = previous_tick # Should be up-to-date now, so get the updated latest tick # If there is still no previous tick then we have to start from the beginning if new_previous_tick is None: start = self._params['start'] elif new_previous_tick.empty: raise ValueError('This should not be possible') else: start = new_previous_tick.name + dt.timedelta(microseconds=1) return self._compute_recursion(previous_tick, start, end_dt)
def _default_parameters(cls, params, runner): lob_input = FeatureRequest( 'CryptoLOB', { 'ts_name': TSInput.L1_MID, 'ticker': 'BTCUSD.SPOT.BITF', 'source': CryptoDataSource.ARCTIC }) clock = FeatureRequest('FixedPeriodClock', {'frequency': '15T'}) return { 'input': FeatureRequest('OHLC', { 'input': lob_input, 'columns': [TSInput.L1_MID], 'clock': clock }), 'columns': ['close'] }
def _compute_by_event_id(self, event_id, repopulate=False): """ :param repopulate: if True you might need to repopulate the features you depend from """ logging.info("Fetching orders for {}, {}".format( self.feature_id, event_id)) from sgmtradingcore.analytics.features.request import FeatureRequest live_orders_request = FeatureRequest(self.LIVE_CLASS.__name__, self._live_params(), {}) live_orders = self._runner.get_dataframes_by_event_ids( [live_orders_request], [event_id])[event_id] if not live_orders.empty: return live_orders backtest_orders_request = FeatureRequest(self.BACKTEST_CLASS.__name__, self._backtest_params(), {}) backtest_orders = self._runner.get_dataframes_by_event_ids( [backtest_orders_request], [event_id])[event_id] return backtest_orders
def _compute(self, start_dt, end_dt): if end_dt < self._params['start']: msg = 'Can''t calculate this range as {} is before the clock start {}' raise ValueError(msg.format(end_dt, self._params['start'])) previous_tick = self._storage.load_previous_tick(start_dt) if previous_tick is None: previous_tick_end = self._params['start'] else: previous_tick_end = previous_tick.name + datetime.timedelta(microseconds=1) # Calculate ticks from the previous ticks until the interval that we are currently interested in request = [FeatureRequest.from_feature(self)] if previous_tick_end != start_dt: self._runner.compute_dataframes(request, previous_tick_end, start_dt) new_previous_tick = self._storage.load_previous_tick(start_dt) if new_previous_tick is None or new_previous_tick.empty: new_previous_tick = previous_tick else: new_previous_tick = previous_tick # Should be up-to-date now, so get the updated latest tick # If there is still no previous tick then we have to start from the beginning if new_previous_tick is None: start = self._params['start'] elif new_previous_tick.empty: raise ValueError('This should not be possible') else: start = new_previous_tick.name + datetime.timedelta(microseconds=1) clock = self._clock_feature(start, end_dt) if len(clock) == 0: # There are no ticks in this range, so nothing to do return None input = self._input_feature(clock.iloc[0]['previous'], clock.iloc[-1].name) clock['tick'] = clock.index.to_series(keep_tz=True) merged_data = pd.merge_asof(input, clock.set_index('previous', drop=False), left_index=True, right_index=True) ticks = [] for tick, df in merged_data.groupby('tick'): clock_tick = clock.loc[tick] new_previous_tick = pd.Series(self._compute_recursive_tick(clock_tick, new_previous_tick, df), name=tick) ticks.append(new_previous_tick) return pd.DataFrame([t for t in ticks if end_dt > t.name >= start_dt])
def _compute_by_event_id(self, event_id, repopulate=False): sport = get_sport_from_event_id(event_id, self._runner.shared_objects()['mysql_client']) if sport == Sports.TENNIS: market = TennisMarkets.MATCH_ODDS selection = TennisSelections.PLAYER_A elif sport == Sports.BASKETBALL: market = BasketballMarkets.FULL_TIME_MONEYLINE selection = BasketballSelections.HOME_TEAM else: raise ValueError('Unknown Sport') sticker = generate_sticker(sport, (MarketScopes.EVENT, event_id), market, selection, bookmaker=self._params['bookmakers'][0]) request = [FeatureRequest('VPINBasicFeature', self._params, {})] return self._runner.get_dataframes_by_stickers(request, [sticker])[sticker]
def _compute_by_event_id(self, fixture_id, repopulate=False): """ :param fixture_id: :param repopulate: Repopulote the child features :return: """ feature_requests = [ FeatureRequest('AvailableStickersSingleBookmaker', {'bookmaker': b}, {}) for b in self._bookmakers ] result = self._runner.get_event_features(feature_requests, [fixture_id], repopulate=repopulate) return { 'stickers': [ s for sticker_list in result[fixture_id] ['AvailableStickersSingleBookmaker'].itervalues() for s in sticker_list ] }
def _compute_by_sticker(self, sticker): requests = [FeatureRequest('LobOrdersFeature', self._params, {})] lob_orders = self._runner.get_dataframes_by_stickers(requests, [sticker])[sticker] if lob_orders.empty: return pd.DataFrame([]) matched_orders = lob_orders[lob_orders['order_type'] == 'matched'] matched_orders['volume'] = matched_orders.apply(self.calculate_effective_volume, axis=1) buckets = [] time_field = 'timestamp' volume_field = 'volume' side_field = 'side' current_bucket = self._new_bucket() v_to_fill = self.bucket_size # Bucketing process for row in matched_orders.itertuples(): time = getattr(row, time_field) volume = getattr(row, volume_field) side = getattr(row, side_field) while volume > v_to_fill: current_bucket[side] += v_to_fill current_bucket['timestamp'] = unixepoch2datetime(time, milliseconds=True) buckets.append(current_bucket) current_bucket = self._new_bucket() v_to_fill = self.bucket_size volume -= v_to_fill if volume > 0: current_bucket[side] += volume v_to_fill -= volume return pd.DataFrame(buckets).set_index('timestamp')
def _compute_recursion(self, previous_tick, start_dt, end_dt): # Ignore ticker self._logger.info('Computing clock for {} to {}'.format(start_dt, end_dt)) trades_feature_request = [FeatureRequest('CryptoTrades', {'source': self._params['source'], 'ticker': self._params['ticker'], 'aggregate': True} )] all_ticks = [] if previous_tick is None: remainder = -self._params['offset'] previous_time = self._params['start'] else: remainder = previous_tick['remainder'] previous_time = previous_tick.name # Get trades aggregated according to their timestamp trades_df = self._runner.get_merged_dataframes(trades_feature_request, start_dt, end_dt) if not trades_df.empty: if self._params['units'] == CryptoCrossSide.QUOTE_CURRENCY: volume = trades_df['price'] * trades_df['size'] elif self._params['units'] == CryptoCrossSide.ASSET: volume = trades_df['size'] else: raise ValueError('Unknown unit type {}'.format(self._params['units'])) ticks, remainder = self.extract_volume_steps(volume, remainder, self._params['volume']) if not ticks.empty: ticks['previous'] = ticks.index.to_series(keep_tz=True).shift(1) ticks.ix[0, 'previous'] = previous_time all_ticks.append(ticks) if all_ticks: return pd.concat(all_ticks) else: return self.get_empty_feature()
def _default_parameters(cls, params, runner): return {'clock': FeatureRequest('FixedPeriodClock', {'frequency': '1H'})}
u'BB-EENP2613973-FTPS-2-4_0', u'BB-EENP2613973-FTPS-2-4_5', u'BB-EENP2613973-FTPS-2-5_0', u'BB-EENP2613973-FTPS-2-5_5', u'BB-EENP2613973-FTPS-2-6_0', u'BB-EENP2613973-FTPS-2-6_5', u'BB-EENP2613973-FTPS-2-7_0', u'BB-EENP2613973-FTPS-2-7_5', u'BB-EENP2613973-FTPS-2-8_0', u'BB-EENP2613973-FTPS-2-8_5', u'BB-EENP2613973-FTPS-2-9_0', u'BB-EENP2613973-FTPS-2-9_5', u'BB-EENP2613973-FTPS-2-n0_5', u'BB-EENP2613973-FTPS-2-n10_0', u'BB-EENP2613973-FTPS-2-n10_5', u'BB-EENP2613973-FTPS-2-n11_0', u'BB-EENP2613973-FTPS-2-n11_5', u'BB-EENP2613973-FTPS-2-n12_0', u'BB-EENP2613973-FTPS-2-n12_5', u'BB-EENP2613973-FTPS-2-n13_0', u'BB-EENP2613973-FTPS-2-n13_5', u'BB-EENP2613973-FTPS-2-n14_0', u'BB-EENP2613973-FTPS-2-n14_5', u'BB-EENP2613973-FTPS-2-n15_0', u'BB-EENP2613973-FTPS-2-n15_5', u'BB-EENP2613973-FTPS-2-n16_0', u'BB-EENP2613973-FTPS-2-n16_5', u'BB-EENP2613973-FTPS-2-n17_0', u'BB-EENP2613973-FTPS-2-n17_5', u'BB-EENP2613973-FTPS-2-n18_0', u'BB-EENP2613973-FTPS-2-n18_5', u'BB-EENP2613973-FTPS-2-n19_0', u'BB-EENP2613973-FTPS-2-n19_5', u'BB-EENP2613973-FTPS-2-n1_0', u'BB-EENP2613973-FTPS-2-n1_5', u'BB-EENP2613973-FTPS-2-n2_0', u'BB-EENP2613973-FTPS-2-n2_5', u'BB-EENP2613973-FTPS-2-n3_0', u'BB-EENP2613973-FTPS-2-n3_5', u'BB-EENP2613973-FTPS-2-n4_0', u'BB-EENP2613973-FTPS-2-n4_5', u'BB-EENP2613973-FTPS-2-n5_0', u'BB-EENP2613973-FTPS-2-n5_5', u'BB-EENP2613973-FTPS-2-n6_0', u'BB-EENP2613973-FTPS-2-n6_5' ] fr = FeatureRequest( 'MicroPrice', {'bookmakers': [Bookmakers.PINNACLE_SPORTS, Bookmakers.BETFAIR]}, {}) # runner.get_dataframes_by_stickers([fr], stickers) runner_.precompute_by_stickers([fr], stickers_, n_jobs=6, batch_size=1)
market = TennisMarkets.MATCH_ODDS selection = TennisSelections.PLAYER_A elif sport == Sports.BASKETBALL: market = BasketballMarkets.FULL_TIME_MONEYLINE selection = BasketballSelections.HOME_TEAM else: raise ValueError('Unknown Sport') sticker = generate_sticker(sport, (MarketScopes.EVENT, event_id), market, selection, bookmaker=self._params['bookmakers'][0]) request = [FeatureRequest('VPINBasicFeature', self._params, {})] return self._runner.get_dataframes_by_stickers(request, [sticker])[sticker] if __name__ == '__main__': from sgmtradingcore.analytics.features.runner import FeatureRunner from sgmtradingcore.analytics.features.request import FeatureRequest # stickers = ['T-EENP2862146-FT12-A.BF'] # runner = FeatureRunner() # requests = [FeatureRequest('VPINBasicFeature', # {'bookmakers': [Bookmakers.BETFAIR], 'bucket_size': 1000, 'bucket_number': 50})] # dfs = runner.get_dataframes_by_stickers(requests, stickers, repopulate=True) runner_ = FeatureRunner() troublesome_sticker = 'T-EENP2733289-FT12-A.BF' request_ = FeatureRequest('LobOrdersFeature', {'bookmakers': [Bookmakers.BETFAIR]}) runner_.get_dataframes_by_stickers([request_], [troublesome_sticker]) pass
indicator_set = IndicatorSet(indicator_spec=self._indicator_spec, rolling_params=self._rolling_params) out_df = indicator_set._apply_indicators_rolling_fixture(in_df, cols) del out_df['MicroPrice'] return out_df if __name__ == "__main__": from runner import FeatureRunner from sgmtradingcore.analytics.features.request import FeatureRequest from sgmbasketball.data.common import get_fixtures_info from sgmbasketball.utils import game_id_to_string runner = FeatureRunner() request = FeatureRequest('MeanReversionIndicators', {}, {}) events = get_fixtures_info(season=2018, comp='NBA') game_ids = [game_id_to_string(e) for e in events['fixture_id'].tolist()] sticker_request = FeatureRequest( 'AvailableStickers', {'bookmakers': [Bookmakers.PINNACLE_SPORTS, Bookmakers.BETFAIR]}, {}) stickers = runner.get_event_features(sticker_request, game_ids) flat_sticker_parts = [ sticker_parts_from_sticker(s) for events, av_stick in stickers.iteritems() for s in av_stick['AvailableStickers']['stickers'] ]
@classmethod def get_empty_feature(cls): return pd.DataFrame(columns=['traded_volume']) if __name__ == '__main__': from sgmtradingcore.analytics.features.crypto.runner import CryptoFeatureRunner import datetime import pytz runner_ = CryptoFeatureRunner() ticker = 'BTCUSD.PERP.BMEX' clock = FeatureRequest('FixedVolumeClock', {'start': datetime.datetime(2018, 5, 30, tzinfo=pytz.UTC), 'volume': 100000, 'offset': 0, 'units': CryptoCrossSide.ASSET, 'ticker': ticker, 'source': CryptoDataSource.ARCTIC}) input = FeatureRequest('CryptoTrades', {'source': CryptoDataSource.ARCTIC, 'ticker': 'BTCUSD.PERP.BMEX', 'aggregate': True}) fr = FeatureRequest('VolumeTraded', {'clock': clock, 'input': input, } ) df = runner_.get_merged_dataframes(fr, '2018-06-01', '2018-06-03', repopulate=True) print df pass
def _get_stickers_for_match(self, event_id): request = FeatureRequest('AvailableStickers', {'bookmakers': self._bookmakers}, {}) stickers = self._runner.get_event_features([request], [event_id]) return stickers[event_id]['AvailableStickers']['stickers']
return self.get_empty_feature() @classmethod def get_empty_feature(cls): return pd.DataFrame([], columns=['timestamp', 'previous', 'n_ticked', 'remainder']).set_index('timestamp') @classmethod def recommended_storage_period(cls, params): return dt.timedelta(hours=24) if __name__ == '__main__': from sgmtradingcore.analytics.features.crypto.runner import CryptoFeatureRunner from datetime import datetime import pytz fr = FeatureRequest('FixedVolumeClock', {}) runner_ = CryptoFeatureRunner() params = {'start': datetime(2018, 5, 30, tzinfo=pytz.UTC), 'ticker': 'BTCUSD.PERP.BMEX', } fr = FeatureRequest('FixedVolumeClock', params) runner_ = CryptoFeatureRunner() dfs = runner_.get_merged_dataframes([fr], '2018-06-01', '2018-06-03', repopulate=True) print dfs # fr = FeatureRequest('FixedPeriodClock', {}) # dfs2 = runner_.get_merged_dataframes([fr], '2018-06-12', '2018-06-13') # print dfs2 # pass
# # 'pressure_', # {'precache': True}, ), # FeatureRequest(MicroPrice.__name__, # {'bookmakers': FOOTBALL_BOOKMAKERS}, # {'precache': True}), # FeatureRequest(MicroPrice.__name__, # {'bookmakers': FOOTBALL_BOOKMAKERS, 'flipped': True}, # {'precache': True}), # ] # dfs = runner.get_dataframes_by_stickers(requests, stickers) # # dfs = runner.get_dataframes_by_stickers_multithread(requests, stickers, n_threads=6) # # print 'returned {} dfs'.format(len(dfs)) # print dfs requests = [ FeatureRequest('AvailableStickers', {}, {}), FeatureRequest('AvailableStickers', { 'bookmakers': [ Bookmakers.PINNACLE_SPORTS, Bookmakers.MATCHBOOK, Bookmakers.BETFAIR ] }, {}, prefix='a_'), FeatureRequest('AvailableStickers', { 'bookmakers': [ Bookmakers.PINNACLE_SPORTS, Bookmakers.MATCHBOOK, Bookmakers.BETFAIR ] }, {}, prefix='b_') ]
from sgmtradingcore.analytics.features.request import FeatureRequest from sgmtradingcore.analytics.features.crypto.runner import CryptoFeatureRunner import datetime from stratagemdataprocessing.crypto.enums import get_first_valid_datetime runner_ = CryptoFeatureRunner() tickers_ = ['BTCUSD.SPOT.BITF'] start_dt_ = get_first_valid_datetime()[tickers_[0]] end_dt_ = start_dt_ + datetime.timedelta( days=2) # datetime.datetime(2014, 6, 30, 0, 0, tzinfo=pytz.UTC) requests = [ FeatureRequest( 'CryptoTrades', { 'source': CryptoDataSource.RAW_FILES, 'ticker': tickers_[0], 'aggregate': True }) ] cache_trades = runner_.get_merged_dataframes(requests, start_dt_, end_dt_) requests = [ FeatureRequest('CryptoTrades', { 'source': CryptoDataSource.ARCTIC, 'ticker': tickers_[0] }) ] arctic_trades = runner_.get_merged_dataframes(requests, start_dt_, end_dt_) requests = [ FeatureRequest('CryptoLOB', {
'input': FeatureRequest( 'CryptoLOB', { 'ts_name': TSInput.L1_PRICE, 'ticker': 'BTCUSD.SPOT.BITF', 'source': CryptoDataSource.ARCTIC }), 'columns': ['BIDPRC1', 'ASKPRC1'], 'clock': FeatureRequest('FixedPeriodClock', {'frequency': '1S'}) } def _compute_ticks(self, groups): return groups.last() if __name__ == '__main__': from sgmtradingcore.analytics.features.crypto.runner import CryptoFeatureRunner import pytz runner = CryptoFeatureRunner(env='dev') start_dt = dt.datetime(2018, 1, 1, 0, 0, tzinfo=pytz.UTC) end_dt = dt.datetime(2018, 1, 4, 0, 0, tzinfo=pytz.UTC) requests = [FeatureRequest('BidAskPrice', {}, prefix='1s_')] res = runner.get_merged_dataframes(requests, start_dt, end_dt) # runner.delete_features(requests, tickers, start_dt, end_dt) pass
if __name__ == '__main__': from sgmtradingcore.analytics.features.crypto.runner import CryptoFeatureRunner import pytz import datetime as dt # first valid timestamp BITS 17/3/2014 tickers = ['BTCUSD.SPOT.BITS'] runner = CryptoFeatureRunner(env='dev') start_dt = dt.datetime(2018, 1, 1, 0, 0, tzinfo=pytz.UTC) end_dt = dt.datetime(2018, 2, 28, 0, 0, tzinfo=pytz.UTC) input0 = FeatureRequest('CryptoLOB', { 'ts_name': TSInput.L1_MID, 'ticker': 'BTCUSD.SPOT.BITS' }) inputRequest = FeatureRequest('OHLC', { 'frequency': '15T', 'input': input0 }) requests = [ FeatureRequest('BollingerBands', { 'window': 20, 'input': inputRequest, 'columns': ['close'] }) ]
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - ' '%(levelname)s - %(message)s') event_ids = [ 'ENP2614510', 'ENP2614512', 'ENP2614513', 'ENP2614514', 'ENP2614511', 'ENP2614515' ] requests = [ FeatureRequest( 'HistoricalOrders', { 'strategy_name': 'bball_pbp', 'strategy_desc': 'nba_deadball_lambda_1', 'strategy_code': 'nba_quarters_db', 'trading_user_id': get_environment('PROD')['trading_user_id'], 'backtest_kwargs': { 'mnemonic': 'nba_lambda_prod_quarters_3' }, 'sync_field': 'placed_time' }, {}), FeatureRequest( 'BacktestOrders', { 'strategy_name': 'bball_pbp', 'strategy_desc': 'nba_deadball_lambda_1', 'strategy_code': 'nba_quarters_db', 'trading_user_id': get_environment('PROD')['trading_user_id'], 'backtest_kwargs': { 'mnemonic': 'nba_lambda_prod_quarters_3' }, 'sync_field': 'placed_time'
if __name__ == '__main__': from sgmtradingcore.analytics.features.request import FeatureRequest from stratagemdataprocessing.crypto.enums import generate_crypto_ticker, CryptoMarkets, CryptoExchange import datetime as dt from sgmtradingcore.analytics.features.crypto.market_data import TSInput import pytz start_dt = dt.datetime(2014, 3, 17, 0, 0, tzinfo=pytz.UTC) end_dt = dt.datetime(2014, 3, 20, 0, 0, tzinfo=pytz.UTC) requests = [ FeatureRequest('HeikinAshiCandles', { 'frequency': '15T', 'data_col_name': TSInput.L1_MID, } ) ] # requests = [ # FeatureRequest('OHLC', # { # 'frequency': '15m', # 'data_col_name': 'l1_ask', # 'input_data_cfg': # { # 'is_feature_based': True, # 'base_feature_name': 'BidAskPrice', # 'base_feature_params': {'frequency': '1s'},
def get_empty_candles_report(ticker, start_dt, end_dt, freq, lob_or_trades="trades"): """ :param ticker: crypto exchange ticker :param start_dt (datetime): start date (must be rounded to the nearest day) :param end_dt (datetime): end_date (must be rounded to the nearest day) :param freq (str): candles' frequency, currently supported: '15m', '1h', '4h', '1d'. :param lob_or_trades (str): 'trades' if we want the candles based on trades, 'lob' if we want candles based on lob :return: The function finds the missing candles for the specified interval and frequency. It returns a dataframe containing the following columns: - expected_candles (number) - missing_number ( how many candles we are missing) - missing_candles (the actual intervals/single timestamps we are missing) """ if lob_or_trades == "lob": request = [ FeatureRequest( 'OHLC', { 'frequency': freq, 'base_ts': { 'is_feature_base_ts': False, 'base_ts_name': 'l1_mid' } }, {}) ] elif lob_or_trades == "trades": request = [FeatureRequest( 'OHLCTrades', {'frequency': freq}, {}, )] else: raise ValueError( "lob_or_trades {} not supported yet".format(lob_or_trades)) runner = CryptoFeatureRunner() df = runner.get_merged_dataframes(request, start_dt, end_dt) # drop None/Nan values, ie. no candles df = df.dropna() if freq == '15m': freq = '15min' # round dates ro nearest hour start_dt = datetime(*start_dt.timetuple()[:4]) end_dt = datetime(*end_dt.timetuple()[:4]) expected_timestamps = pd.date_range(start=start_dt, end=end_dt, freq=freq)[:-1] actual_timestamps = df.index expected_candles_nr = len(expected_timestamps) actual_candles_nr = len(actual_timestamps) missing_number = expected_candles_nr - actual_candles_nr missing_candles = list(set(expected_timestamps) - set(actual_timestamps)) missing_candles = sorted(missing_candles) if freq != '1d': missing_candles = get_date_range(missing_candles, freq) report_dict = { 'expected_candles': expected_candles_nr, 'missing_number': missing_number, 'missing_candles': [missing_candles] } df = pd.DataFrame.from_dict(report_dict, orient='index') df = df.reindex( index=['expected_candles', 'missing_candles', 'missing_number']) return df
def main(): logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - ' '%(levelname)s - %(message)s') tickers = ['BTCUSD.SPOT.BITS', 'BTCUSD.SPOT.BITF', 'BTCUSD.PERP.BMEX'] frequency = '1d' runner = CryptoFeatureRunner() request = [ FeatureRequest( 'OHLCTrades', { 'frequency': frequency, }, {}, ), # FeatureRequest('OHLC', # { # 'frequency': frequency, # 'base_ts': { # 'is_feature_base_ts': False, # 'base_ts_name': TSInput.L1_MID, # } # }, # {}, # prefix='t_' # ) ] _FIRST_VALID_DATETIMES = get_first_valid_datetime() _LAST_VALID_DATETIMES = get_last_valid_datetime() for ticker in tickers: runner = CryptoFeatureRunner() start_dt = _FIRST_VALID_DATETIMES[ticker] end_dt = _LAST_VALID_DATETIMES[ticker] df = runner.get_merged_dataframes(request, start_dt, end_dt) column_names = ['open', 'high', 'low', 'close'] column_names_t = ['t_open', 't_high', 't_low', 't_close'] column_names_a = ['a_open', 'a_high', 'a_low', 'a_close'] mongo_client = MongoPersister.init_from_config('arctic_crypto', auto_connect=True) arctic = ArcticStorage(mongo_client.client) arctic_trades_df = arctic.load_trades(ticker, start_dt, end_dt) timestamps = arctic_trades_df.index.to_pydatetime().tolist() prices = arctic_trades_df['price'].tolist() arctic_candles = make_ohlc_candles(timestamps=timestamps, values=prices, period=timedelta(days=1)) arctic_candles.columns = [ 'a_' + str(col) for col in arctic_candles.columns ] df1 = pd.merge(df, arctic_candles) compare_candles(ticker, df1, column_names, column_names_a)
""" def _compute_ticks(self, groups): ret = super(PriceMoveRegression, self)._compute_ticks(groups) ret.index = groups['previous'].first() return pd.DataFrame(ret['close'] - ret['open'], columns=['target']) if __name__ == '__main__': from sgmtradingcore.analytics.features.crypto.runner import CryptoFeatureRunner runner = CryptoFeatureRunner() start_date = datetime.datetime(2018, 6, 1, tzinfo=pytz.utc) end_date = start_date + datetime.timedelta(days=5) ticker = 'BTCUSD.PERP.BMEX' params = { 'input': FeatureRequest('CryptoTrades', { 'source': CryptoDataSource.ARCTIC, 'ticker': ticker }), 'columns': [TSInput.TRADE_PRICE], 'start': start_date, } fr = FeatureRequest('PriceMoveRegression', params) df = runner.get_merged_dataframes([fr], start_date, end_date, repopulate=True)