def vwao_from_bars(bars): def format_vwao(d): d['back_prices'] = [{'amount': d['total_matched'] / 2.0, 'price': d['vwao'], 'depth': 1, 'type': 'L'}] d['lay_prices'] = [{'amount': d['total_matched'] / 2.0, 'price': d['vwao'], 'depth': 1, 'type': 'B'}] d['last_price_matched'] = d['vwao'] del d['vwao'] return d logging.info('Calculating VWAO from bars..') bars['notional'] = bars.volume_matched * bars.odds agg_dict = {'country': get_first, 'event': get_first, 'course': get_first, 'scheduled_off': get_first, 'notional': lambda x: float(np.sum(x)), 'volume_matched': lambda x: float(np.sum(x)), 'selection': get_first} gb = bars.dropna(subset=['selection']).groupby(['market_id', 'selection_id']).aggregate(agg_dict) \ .rename(columns={'volume_matched': 'total_matched'}) gb['vwao'] = gb.notional / gb.total_matched del gb['notional'] return imap(format_vwao, pandas_to_dicts(gb.reset_index()))
def upload(args): args, path = args parse = lambda x: dateutil.parser.parse(x, dayfirst=True) try: directory, file_name = split(path) file_part, ext = splitext(file_name) formatter = logging.Formatter('%(asctime)s - ' + file_name + ' - %(levelname)s: %(message)s') configure_root_logger(args.logtty, args.logfile, formatter=formatter) db = MongoClient(args.host, args.port)[args.db] if ext == '.zip': logging.info('Reading zipped csv file into memory') fin = zipfile.ZipFile(path, 'r').open(file_part + '.csv') else: logging.info('Reading csv file into memory') fin = path bars = pd.read_csv(fin, parse_dates=['SCHEDULED_OFF'], date_parser=parse) bars.columns = bars.columns.map(lambda x: x.lower()) bars = bars.rename(columns={'event_id': 'market_id'}) for col in ['market_id', 'selection_id']: bars[col] = bars[col].map(str) # Make sure dtype==str # Insert other filters here: bars = bars[bars.in_play == 'PE'] bars['selection'] = bars['selection'].map(extract_horse_name) races = races_from_bars(bars).reset_index() train = training_from_races(races) vwao = vwao_from_bars(bars) try: db[args.races].insert(pandas_to_dicts(races), continue_on_error=True) except DuplicateKeyError as e: logging.error('Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.races], e)) try: db[args.train].insert(convert_types(train, {'n_runners': int}), continue_on_error=True) except DuplicateKeyError as e: logging.error('Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.train], e)) try: db[args.vwao].insert(vwao, continue_on_error=True) except DuplicateKeyError as e: logging.error('Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.vwao], e)) logging.info('Successfully uploaded to %s' % db) except Exception as e: logging.critical(e) raise
def run_backtest(context): n_bkt, args, mparams = context formatter = logging.Formatter('%(asctime)s - n_bkt=' + str(n_bkt) + ' - %(levelname)s: %(message)s') configure_root_logger(args.logtty, args.logfile, MongoClient(args.host, args.port)[args.db][args.logmongo] if args.logmongo is not None else None, formatter=formatter) db = MongoClient(args.host, args.port)[args.db] where_clause = defaultdict(lambda: {}) country, start_date, end_date = 'GB', parse_date(args.start), parse_date(args.end) if start_date is not None: where_clause['scheduled_off']['$gte'] = start_date if end_date is not None: where_clause['scheduled_off']['$lte'] = end_date if country is not None: where_clause['country'] = country sorted_races = db[args.train].find(where_clause, sort=[('scheduled_off', 1)], timeout=False) exec_services = HistoricalExecutionService(db) strat = strategy.Balius(mu=mparams['mu'], sigma=mparams['sigma'], beta=mparams['beta'], tau=mparams['tau'], draw_probability=mparams['draw_prob'], risk_aversion=mparams['risk_aversion'], min_races=mparams['min_races'], max_exposure=mparams['max_exposure']) st = time.clock() strategy.backtest(exec_services, strat, sorted_races) en = time.clock() logging.info('Backtest finished in %.2f seconds' % (en - st)) strat_dict = strat.to_dict() strat_id = db[STRATEGIES_COLL].insert(strat_dict) logging.info('Strategy serialised to %s with id=%s' % (db[STRATEGIES_COLL], strat_id)) bets = price_historical_bets(db, exec_services.get_mu_bets()[0]) scorecard = make_scorecard(bets) now = datetime.datetime.utcnow() scorecard.update({'params': {'ts': strat_dict['hm']['ts'], 'risk': strat_dict['risk']}, 'timestamp': now, 'run_seconds': en - st, 'strategy_id': strat_id}) scorecard_id = db[SCORECARDS_COLL].insert(scorecard) logging.info('Scorecard inserted in %s with id=%s' % (db[SCORECARDS_COLL], scorecard_id)) db[BETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, bets)) logging.info('Associated bets inserted in %s' % db[BETS_COLL]) markets = market_breakdown(bets).reset_index() markets = pandas_to_dicts(markets, {'n_runners': int}) db[MARKETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, markets)) logging.info('Associated market breakdown inserted in %s' % db[MARKETS_COLL])
def upload(args): first = lambda x: x.iget(0) args, path = args try: _, file_name = split(path) formatter = logging.Formatter('%(asctime)s - ' + file_name + ' - %(levelname)s: %(message)s') configure_root_logger(formatter=formatter) db = MongoClient(args.host, args.port)[args.db] logging.info('Reading csv file into memory') races = pd.read_csv(path, sep='\t', parse_dates=[[0, 1]], dayfirst=True) if len(races) <=2 : logging.warning('No races in file. Skipping') return races.rename(columns={'race_date_race_time': 'scheduled_off', 'horse_name': 'selection', 'place': 'ranking'}, inplace=True) races['selection'] = races['selection'].map(parse_horse_name) races['ranking'] = races['ranking'].map(parse_place) races = pd.DataFrame.from_dict([{'course': k[0], 'scheduled_off': k[1], 'selection': v['selection'][v.ranking >= 0].tolist(), 'ranking': (v['ranking'][v.ranking >= 0] - 1).tolist()} for k, v in races.groupby(['track', 'scheduled_off'])]) #print(races) dtypes = list(races.dtypes[races.dtypes == np.int64].index) type_mappers = dict(zip(dtypes, [int] * len(dtypes))) db[args.races].insert(pandas_to_dicts(races)) logging.info('Successfully uploaded to %s' % db) except Exception as e: logging.critical(e) raise
def vwao_from_bars(bars): def format_vwao(d): d['back_prices'] = [{ 'amount': d['total_matched'] / 2.0, 'price': d['vwao'], 'depth': 1, 'type': 'L' }] d['lay_prices'] = [{ 'amount': d['total_matched'] / 2.0, 'price': d['vwao'], 'depth': 1, 'type': 'B' }] d['last_price_matched'] = d['vwao'] del d['vwao'] return d logging.info('Calculating VWAO from bars..') bars['notional'] = bars.volume_matched * bars.odds agg_dict = { 'country': get_first, 'event': get_first, 'course': get_first, 'scheduled_off': get_first, 'notional': lambda x: float(np.sum(x)), 'volume_matched': lambda x: float(np.sum(x)), 'selection': get_first } gb = bars.dropna(subset=['selection']).groupby(['market_id', 'selection_id']).aggregate(agg_dict) \ .rename(columns={'volume_matched': 'total_matched'}) gb['vwao'] = gb.notional / gb.total_matched del gb['notional'] return imap(format_vwao, pandas_to_dicts(gb.reset_index()))
def upload(args): args, path = args parse = lambda x: dateutil.parser.parse(x, dayfirst=True) try: directory, file_name = split(path) file_part, ext = splitext(file_name) formatter = logging.Formatter('%(asctime)s - ' + file_name + ' - %(levelname)s: %(message)s') configure_root_logger(args.logtty, args.logfile, formatter=formatter) db = MongoClient(args.host, args.port)[args.db] if ext == '.zip': logging.info('Reading zipped csv file into memory') fin = zipfile.ZipFile(path, 'r').open(file_part + '.csv') else: logging.info('Reading csv file into memory') fin = path bars = pd.read_csv(fin, parse_dates=['SCHEDULED_OFF'], date_parser=parse) bars.columns = bars.columns.map(lambda x: x.lower()) bars = bars.rename(columns={'event_id': 'market_id'}) for col in ['market_id', 'selection_id']: bars[col] = bars[col].map(str) # Make sure dtype==str # Insert other filters here: bars = bars[bars.in_play == 'PE'] bars['selection'] = bars['selection'].map(extract_horse_name) races = races_from_bars(bars).reset_index() train = training_from_races(races) vwao = vwao_from_bars(bars) try: db[args.races].insert(pandas_to_dicts(races), continue_on_error=True) except DuplicateKeyError as e: logging.error( 'Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.races], e)) try: db[args.train].insert(convert_types(train, {'n_runners': int}), continue_on_error=True) except DuplicateKeyError as e: logging.error( 'Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.train], e)) try: db[args.vwao].insert(vwao, continue_on_error=True) except DuplicateKeyError as e: logging.error( 'Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.vwao], e)) logging.info('Successfully uploaded to %s' % db) except Exception as e: logging.critical(e) raise
def run_backtest(context): n_bkt, args, mparams = context formatter = logging.Formatter('%(asctime)s - n_bkt=' + str(n_bkt) + ' - %(levelname)s: %(message)s') configure_root_logger( args.logtty, args.logfile, MongoClient(args.host, args.port)[args.db][args.logmongo] if args.logmongo is not None else None, formatter=formatter) db = MongoClient(args.host, args.port)[args.db] where_clause = defaultdict(lambda: {}) country, start_date, end_date = 'GB', parse_date(args.start), parse_date( args.end) if start_date is not None: where_clause['scheduled_off']['$gte'] = start_date if end_date is not None: where_clause['scheduled_off']['$lte'] = end_date if country is not None: where_clause['country'] = country sorted_races = db[args.train].find(where_clause, sort=[('scheduled_off', 1)], timeout=False) exec_services = HistoricalExecutionService(db) strat = strategy.Balius(mu=mparams['mu'], sigma=mparams['sigma'], beta=mparams['beta'], tau=mparams['tau'], draw_probability=mparams['draw_prob'], risk_aversion=mparams['risk_aversion'], min_races=mparams['min_races'], max_exposure=mparams['max_exposure']) st = time.clock() strategy.backtest(exec_services, strat, sorted_races) en = time.clock() logging.info('Backtest finished in %.2f seconds' % (en - st)) strat_dict = strat.to_dict() strat_id = db[STRATEGIES_COLL].insert(strat_dict) logging.info('Strategy serialised to %s with id=%s' % (db[STRATEGIES_COLL], strat_id)) bets = price_historical_bets(db, exec_services.get_mu_bets()[0]) scorecard = make_scorecard(bets) now = datetime.datetime.utcnow() scorecard.update({ 'params': { 'ts': strat_dict['hm']['ts'], 'risk': strat_dict['risk'] }, 'timestamp': now, 'run_seconds': en - st, 'strategy_id': strat_id }) scorecard_id = db[SCORECARDS_COLL].insert(scorecard) logging.info('Scorecard inserted in %s with id=%s' % (db[SCORECARDS_COLL], scorecard_id)) db[BETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, bets)) logging.info('Associated bets inserted in %s' % db[BETS_COLL]) markets = market_breakdown(bets).reset_index() markets = pandas_to_dicts(markets, {'n_runners': int}) db[MARKETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, markets)) logging.info('Associated market breakdown inserted in %s' % db[MARKETS_COLL])