def avg_ltp_for_winner(): start = -3 * 60 end = 10 * 60 step = 10 x = np.arange(start, end, step) buckets = (x).tolist() m = MongoManager(use_remote=True, use_archive=False) from_date = datetime.now() - timedelta(days=1000) # datetime(2018, 2, 28) to_date = datetime.now() ltp = m.get_avg_ltp_by_time_to_start_bucketed('LTP', 'avg', buckets, from_date, to_date, countrycode=['UK', 'IE'], col='price_scrape_tickdata') print(ltp) LTPs = [v['avg_ltp'] for v in ltp] print("Best time to place lays is {} minutes before race start.".format( np.argmin(LTPs))) plt.plot(x, LTPs) plt.show()
def load_enriched_ts(self, from_year=2000, to_year=2100, strategy='back', clip=-200, localhost=True, use_archive=True, countrycode=None): self.strategy = strategy m = MongoManager(use_remote=not localhost, use_archive=use_archive) log.info("Downloading...") df = m.download_enriched_TS(datetime(from_year, 1, 1, 0, 0), datetime(to_year, 12, 31, 23, 59), countrycode=countrycode) log.info("Loaded bets data: {}".format(len(df))) df = df.drop_duplicates(['marketid', 'selection_id']) df = df.sort_values('marketstarttime') log.info("After removing duplicates: {}".format(len(df))) if strategy == 'lay': df['act'] = 1 - df['winner'] else: df['act'] = df['winner'] self.X = df[['LTP t-0', 'average', 'minimum', 'maximum', 'median', 'std', 'participants', 'skew', 'kurtosis', 'overrun']].values self.Y = df[['act', strategy]].values df['lay'] = df['lay'].clip(clip, 1) # clip lays at clip value self.weights_original = df[strategy].values # contains back or lay payoffs self.cp = CustomPayoffs(strategy)
def collect_results(): m = MongoManager() markets = m.mongodb_price['price_scrape'].distinct('marketid') all_results = m.mongodb_price['results_scrape'].distinct('marketid') to_get = set(markets) - set(all_results) log.info("To get {} results".format(len(to_get))) if not to_get: return results = {} for mk in to_get: results.update(get_market_result(mk)) # time.sleep(0.2) log.info("Got {} results".format(len(results))) for k, v in results.items(): d = {} winner = None losers = [] for k1, v1 in v.items(): if v1 == 'WINNER': winner = k1 if v1 == 'LOSER': losers.append(k1) d.update({'marketid': k}) d['timestamp'] = datetime.datetime.now() d['winner'] = winner d['losers'] = losers if winner is not None: # filter out races that have not yet completed m.insert_document('results_scrape', d, database='price')
def upcoming_races(): m = MongoManager() tables = [] titles = ['', 'Upcoming races'] df = pd.DataFrame.from_dict(m.upcoming_races()) tables.append(df.to_html()) return render_template('table.html', tables=tables, titles=titles)
def test_mongo_get_bets0(self): m = MongoManager(use_archive=False) m.reconnect(use_archive=False) data = self.mongodb.get_ts_for_races_about_to_start( now=datetime.datetime.strptime('2014-02-28 16:54:15', "%Y-%m-%d %H:%M:%S")) self.assertEqual(0, len(data))
def test_get_last_LTPs(self): m = MongoManager(use_archive=True) m.reconnect(use_archive=False) now = datetime.datetime.strptime('2018-02-28 16:54:15', "%Y-%m-%d %H:%M:%S") LTPs = m.get_last_LTPs(selection_id=16377754, amount_of_prices=3, now=now) self.assertEqual(len(LTPs), 3)
def find_race_to_bet(now=None, min_before_start=0, countrycodes=['GB', 'IE']): """ Returns races that will start within the next minute as numpy array to be passed into make_bet_recommendation""" log.debug("Start downloading races to bet") now = dt.datetime.now() if not now else now m = MongoManager() data = m.get_ts_for_races_about_to_start(now, countrycodes, min_before_start) # Group quotes by race ($marketid) and horse ($selection_id) # Fallback to back_prices0 if LTP = None (no traded price) races = {} for race in data: race_dict = {} horses = sorted(list(set([x['Horse'] for x in race['TS']]))) race_start_time = sorted( list(set([x['RaceStartTime'] for x in race['TS']]))) race_event_id = sorted(list(set([x['EventID'] for x in race['TS']]))) if len(race_start_time) != 1: log.warning( "Inconsistent start time for race %s! Double check its quotes!" % race['_id']) race_dict['RaceStartTime'] = race_start_time[0] race_dict['EventID'] = race_event_id[0] race_dict['SelectionIds'] = horses for horse in horses: # todo: this is inconsistent with training ts = pd.Series({ x['Time']: x['LTP'] if x['LTP'] else x['BestBack'] for x in race['TS'] if x['Horse'] == horse }) # Check/log NaNs ratio if significant try: if float(np.count_nonzero(np.isnan(ts)) / len(ts)) >= 0.25: log.warning("NaNs ratio >= 25%% found for TS for MarketID: %s - EventID: %s - SelectionID: %s!" % \ (race['_id'], race_dict['EventID'], horse)) except: log.warning("All entries None in TS for MarketID: %s - EventID: %s - SelectionID: %s!" % \ (race['_id'], race_dict['EventID'], horse)) race_dict[horse] = ts races[race['_id']] = race_dict if not races: log.debug("No races available for betting!") return None, None, None, None, None races_arr, selection_ids, market_ids, event_ids, race_start_times = reformat_to_array( races) return races_arr, selection_ids, market_ids, event_ids, race_start_times
def update_bets_with_settlement_from_betfair(): log.info("Update bets with settlements from betfair") c = Container() settled_df, fees = c.get_cleared_orders() m = MongoManager() for index, row in settled_df.iterrows(): m.update_orders(str(row['event_id']), row['selection_id'], str(row['market_id']), 'SUCCESS', row['price_matched'], row['price_requested'], row['size_settled'], row['profit'])
def __init__(self, source, destination, use_local, use_archive): log.info("source {}".format(source)) log.info("destination {}".format(destination)) log.info("use_local {}".format(use_local)) log.info("use_archive_db {}".format(use_archive)) self.use_archive_db = use_archive self.m = MongoManager(use_remote=not use_local, use_archive=use_archive) self.source_collection = source self.destination_collection = destination
def update_place_bets_from_orders(): log.info("Update place_bets from orders collection") m = MongoManager() bf_profit = m.get_pnl_per_race_from_orders() for item in bf_profit: m.update_document(collection_name='place_bets', id_name='market_id', id_value=item['_id'], field_name='bf_profit', field_value=item['bf_profit'])
def query_neural_network_for_bets(X): """ input X has to be a np array of shape [number_of_requested_predictions, elements] with ts starting at t-000. Returns betting recommendation in the form of bet,lay,bet,lay... as booleans in the same order as X Return shape for 2 bets and 44 horses would be (2,88) """ config = get_config() nn_class = getattr(sys.modules[__name__], config.get('Betting', 'model_class')) n = nn_class() n.load_model() m = MongoManager() d = {} d['input'] = X.tolist() d['timestamp'] = datetime.now() predicted = (n.predict(X)) d['output'] = predicted.tolist() log.info("Mean predicted profit across all bets: {}".format( np.mean(predicted))) if np.isnan(np.mean(predicted)): log.error("Tensorflow output: {}".format(predicted)) log.error("Terminating thread") sys.exit() place_bet = predicted return place_bet, n.model_name
def race_dataframes(max_runners, slice_hours): m = MongoManager() collection = m.mongodb_price.get_collection("Backtesting") m.mongodb_price.drop_collection("OddsTS2017") odds_collection = m.mongodb_price.get_collection("OddsTS2017") winner_sps = [] for k, r in get_races(collection): try: if len(r.runners) > max_runners: # Too many runners in this race continue if dt.datetime.strptime(r.marketTime, "%Y-%m-%dT%H:%M:%S.%fZ") < dt.datetime( 2017, 1, 1): continue #winner_sps.append((r.marketId, r.get_starting_prices()[r.get_winner()])) race_odds_df = r.get_hours_before(hours=slice_hours) race_odds_df.index = race_odds_df.index.map( lambda x: x.to_datetime().strftime("%H:%M")) #winner_index = list(r.get_odds_dataframe().columns).index(r.get_winner()) odds_collection.insert_one({ 'ts': race_odds_df.to_dict(), 'winner': r.get_winner() }) except: continue pd.DataFrame.from_records(winner_sps)
def lay_saftey_check(selection_id, last_price, current_lay_price, best_back): """ takes election ID and last scraped LTP as input, returns maximum lay """ m = MongoManager() ltps = m.get_last_LTPs(selection_id, amount_of_prices=5) ltps.append(last_price) log.info("LTPs from db (not used): {}".format(ltps)) try: # cap the value of the list to 500 and clean ltps from any erroneous/non-numerical value ltps = [ min(x, 500) for x in ltps if isinstance(x, numbers.Number) and not np.isnan(x) ] # take the median median = np.median(ltps) except: log.warning( "Unable to calculate median from ltps: {}. Assigning a safety threshold" .format(ltps)) median = 500 last_price_multiplier = config.getfloat("Restrictions", "last_price_multiplier") multiple_to_best_competitor = config.getfloat( "Restrictions", "multiple_to_best_compatitor") absolute_maximum = config.getfloat("Restrictions", "absolute_maximum") max_lay = min(current_lay_price, absolute_maximum, last_price * last_price_multiplier, best_back * multiple_to_best_competitor) if max_lay < current_lay_price: log.warning( "Restrict lay from {0} to {1}. | LTPs: {2} | best back: {3}". format(current_lay_price, max_lay, ltps, best_back)) log.info( "current_lay_price (back level n for replacing_unfilled): {}".format( current_lay_price)) log.info("LTP median (not used): {}".format(median)) log.info("Last price (ltp or back): {}".format(last_price)) log.info("best back (=best competitor): {}".format(best_back)) return max_lay
def save_races(self, col): m = MongoManager(use_archive=True, use_remote=False) for race in tqdm(self.races): out = [] d = {} d['winner'] = 'na' d['losers'] = [] for k, v in race.runners.items(): if v.status == 'LOSER': d['losers'].append(k) elif v.status == 'WINNER': d['winner'] = k df = race.get_odds_dataframe(False) selection_ids = df.columns # df['overrun_at_start'] = sum(1 / race.get_starting_prices()) df['countrycode'] = race.countryCode df['marketstarttime'] = race.marketTime df['marketid'] = race.marketId df['marketstarttime'] = df['marketstarttime'].values.astype('datetime64[s]') for selection_id in selection_ids: df_new = df[[selection_id, 'marketstarttime', 'countrycode', 'marketid']].reset_index() df_new.columns = ['timestamp', 'LTP', 'marketstarttime', 'countrycode', 'marketid'] df_new['timestamp'] = df_new['timestamp'].values.astype('datetime64[s]') df_new['selection_id'] = selection_id df_new['seconds_until_start'] = df_new['marketstarttime'] - df_new['timestamp'] df_new['seconds_until_start'] = df_new['seconds_until_start'].astype('timedelta64[s]') if selection_id == d['winner']: df_new['winner'] = True else: df_new['winner'] = False if selection_id in d['losers']: df_new['loser'] = True else: df_new['loser'] = False df_new = df_new[df_new['seconds_until_start'] < 70 * 60] if not d['winner'] == 'na': # only races that have a winner dirty = df_new.to_dict(orient='records') clean = [x for x in dirty if type(x['LTP'] == float) and x['LTP'] > 1] # remove nan and 0 LTPs out.extend(clean) if len(out) > 0: m.insert_list_of_documents(col, out)
def propagate_race_results(): log.info("Propagating race results") m = MongoManager() races = m.get_races_without_results() results = m.load_race_results([r['market_id'] for r in races]) log.info("Found {} results".format(len(results))) for r in results: m.update_placed_bet_with_result(r['marketid'], r['winner'], r['losers'])
def eval_theoratical_pnl(args): log.info("Evaluating pnl") m = MongoManager() races = m.get_bets_to_calculate_pnl( overwrite_calculated_pnls=args['--overwrite_calculated_pnls']) for race in tqdm(races): back_pnl = [0] lay_pnl = [0] losers = race['losers'] for i, horse in enumerate(race['selection_ids']): won = horse == race['winner'] loser = horse in losers back_odds = race['ltps'][ i] # back_prices would understate the theoretical pnl lay_odds = race['ltps'][ i] # lay_prices would understate the theoretical pnl valid = 1 if lay_odds > 1 else 0 # exclude zero odds from theoretical pnl theoretical_pnl = [0] fees = config.getfloat("Betting", "fees") stake = float(race['stake']) if won and race['bets'][i * 2]: # back on winner change = (back_odds * stake - stake) * (1 - fees) back_pnl.append(change * valid) theoretical_pnl.append(change * valid) if won and race['bets'][i * 2 + 1]: # lay on winner change = -(lay_odds * stake - stake) lay_pnl.append(change * valid) theoretical_pnl.append(change * valid) if loser and race['bets'][i * 2]: # back on loser change = -stake # pylint: disable=E1130 back_pnl.append(change * valid) theoretical_pnl.append(change * valid) if loser and race['bets'][i * 2 + 1]: # lay on loser change = stake * (1 - fees) lay_pnl.append(change * valid) theoretical_pnl.append(change * valid) m.update_document2('orders', 'selection', horse, 'market_id', race['market_id'], 'theoretical_pnl', sum(theoretical_pnl)) back_pnl_total = sum(back_pnl) lay_pnl_total = sum(lay_pnl) m.update_placed_bet_with_pnl(race['market_id'], back_pnl_total, lay_pnl_total)
def get_data(countries, extra_columns=True): m = MongoManager() results = m.mongodb.price_scrape.find(({ 'countrycode': { '$in': ['GB', 'IE', 'US', 'AU'] }, 'max_bookie_price': { '$exists': True }, 'seconds_until_start': { '$lt': 60 } })) records = [] keys = [ 'LTP', 'VWAP', 'back_prices0', 'back_sizes0', 'eventid', 'lay_prices0', 'lay_sizes0', 'marketid' ] if extra_columns: keys.extend([ 'max_bookie_price', 'mean_bookie_price', 'median_bookie_price', 'min_bookie_price', 'selection_id', 'total_matched', 'far_price', 'near_price' ]) for r in results: record = {k: v for k, v in r.items() if k in keys} records.append(record) df = pd.DataFrame.from_records(records) results = m.mongodb.results_scrape.find( {'marketid': { '$in': [x for x in df['marketid'].values] }}) race_records = [] for r in results: record = {(r['marketid'], r['winner']): 'WIN'} for l in r['losers']: record[(r['marketid'], l)] = 'LOSE' race_records.append(record) race_results = pd.concat([pd.Series(x) for x in race_records]) race_results.name = 'outcome' df = df.set_index(['marketid', 'selection_id']) race_results.index.names = df.index.names #df=df.join(race_results).dropna() return df.join(race_results)
def get_classifier_df(): m = MongoManager() results = m.mongodb.price_scrape.find(({ 'countrycode': { '$in': ['GB', 'IE'] }, 'seconds_until_start': { '$lt': 60 } })) records = [] keys = [ 'LTP', 'VWAP', 'back_prices0', 'back_sizes0', 'eventid', 'lay_prices0', 'lay_sizes0', 'marketid', 'max_bookie_price', 'mean_bookie_price', 'median_bookie_price', 'min_bookie_price', 'selection_id', 'total_matched' ] for r in results: record = {k: v for k, v in r.items() if k in keys} records.append(record) df = pd.DataFrame.from_records(records) results = m.mongodb.results_scrape.find( {'marketid': { '$in': [x for x in df['marketid'].values] }}) race_records = [] for r in results: record = {(r['marketid'], r['winner']): 'WIN'} for l in r['losers']: record[(r['marketid'], l)] = 'LOSE' race_records.append(record) race_results = pd.concat([pd.Series(x) for x in race_records]) race_results.name = 'outcome' df = df.set_index(['marketid', 'selection_id']) race_results.index.names = df.index.names df = df.join(race_results).dropna() # df[(df['back_prob'] - df['bookie_prob']) > 0.025] df['back_prob'] = 1 / df['back_prices0'] df['lay_prob'] = 1 / df['lay_prices0'] df['bookie_prob'] = 1 / df['median_bookie_price'] df['vwap_prob'] = 1 / df['VWAP'] df['ltp_prob'] = 1 / df['LTP'] return df
def get_races(self): m = MongoManager() collection = m.mongodb_price.get_collection("Backtesting") timeseries = [] if self.markets.empty: print( "No markets set, have they been processed? Call .get_runners_and_markets" ) return pd.DataFrame() pbar = tqdm(total=len(self.markets)) with tarfile.open(self.tar_file, 'r') as tf: for m, e in self.markets: pbar.update() try: data = bz2.decompress( tf.extractfile(self.prefix.format(e, m)).read()) except (ValueError, KeyError): continue bfr = BetFairRace.create_from_json(data.decode('utf8')) try: collection.insert_one({**bfr.to_dict(), **{'marketId': m}}) except Exception as e: pass
def propagate_race_results_to_price_scrape(col='price_scrape'): """ add winner and loser field to price_scrape collection""" m = MongoManager() from_date = datetime.now() - timedelta(days=30) log.info("Getting prices without race results since {}".format(from_date)) races = m.get_price_scrape_without_results(from_date, col) log.info("Got {} races. Loading results and updating {} collection".format( len(races), col)) if len(races) < 1: log.info("No races to process") return c = int(np.ceil(len(races) / 100)) for race_chunk in tqdm(chunk(races, c)): results = m.load_race_results(race_chunk) if len(results) < 1: log.info("No results to process") continue for r in results: m.update_price_scrape_with_result(col, r['marketid'], r['winner'], r['losers'])
def update_placed_orders(self, armed=False): config = get_config() m = MongoManager() open_orders = self.container.get_open_orders() use_level = config.get("Betting", "use_level").split(',') # minutes_to_start if len(open_orders) > 0: log.info("Currently open order: {}".format(len(open_orders))) for market, orders in open_orders.items(): log.info("Updating order for market: {}".format(market)) new_prices = self.container.get_single_market(market) updated_prices = [] updated_betids = [] for o in orders: selection_id = o.selection_id secs_to_start = m.get_secs_to_start(market) log.info("Seconds to start: {}".format(secs_to_start)) if not secs_to_start or secs_to_start < \ -config.getint("Betting", "update_until_secs_after_start"): # pylint: disable=E1130 log.info("Ignoring order\n----------------") continue log.info("Selection ID: {}".format(selection_id)) ltp_price = 0 lays = [0, 0, 0] backs = [0, 0, 0] for r in new_prices[0].runners: if r.selection_id != selection_id: continue backs = [ r.ex.available_to_back[x].price for x in range(len(r.ex.available_to_back)) ] lays = [ r.ex.available_to_lay[x].price for x in range(len(r.ex.available_to_lay)) ] ltp_price = r.last_price_traded log.info("New backs: {}".format(backs)) log.info("New lays: {}".format(lays)) min_to_start = math.floor(secs_to_start / 60) log.info("Minutes until start: {}".format(min_to_start)) try: level_at_minutes_to_start = int(use_level[min_to_start]) log.info( "Using Level: {}".format(level_at_minutes_to_start)) except IndexError: log.info( "use_level not set for {} minutes to start. Aborting". format(min_to_start)) return level_at_minutes_to_start = min(level_at_minutes_to_start, len(backs) + 1) log.info( "Final level to use: {}".format(level_at_minutes_to_start)) new_price = ltp_price if level_at_minutes_to_start == 0 else backs[ level_at_minutes_to_start - 1] log.info("Updating to new price: {}".format(new_price)) new_price = lay_saftey_check(selection_id=o.selection_id, last_price=ltp_price, current_lay_price=new_price, best_back=backs[0]) new_price = price_adjustment(new_price) updated_betids.append(o.bet_id) updated_prices.append(new_price) if armed: m.update_order_price(o.market_id, o.selection_id, o.bet_id, min_to_start, new_price) if len(updated_betids) > 0: log.info("Updated bet_ids: {}".format(updated_betids)) log.info("Updated prices: {}".format(updated_prices)) if armed: self.container.replace_orders(market, updated_betids, updated_prices)
def setUp(cls): cls.mongodb = MongoManager(use_archive=True)
h2h_test = h2h[h2h['RACEDATE'] >= cutoff_date] # Reduced Dataset without track-specific details (to be incorporated later) h2h_red = h2h.drop(track_cols, axis=1) h2h_train_red = h2h_train.drop(track_cols, axis=1) h2h_test_red = h2h_test.drop(track_cols, axis=1) # Save data as json json.dump(h2h.to_json(orient='index'), open(path + 'h2h.json', 'wb')) json.dump(h2h_train.to_json(orient='index'), open(path + 'h2h_train.json', 'wb')) json.dump(h2h_test.to_json(orient='index'), open(path + 'h2h_test.json', 'wb')) json.dump(h2h_red.to_json(orient='index'), open(path + 'h2h_red.json', 'wb')) json.dump(h2h_train_red.to_json(orient='index'), open(path + 'h2h_train_red.json', 'wb')) json.dump(h2h_test_red.to_json(orient='index'), open(path + 'h2h_test_red.json', 'wb')) # Upload data into MongoDB client = MongoManager() client.upload_dataframe(h2h, 'h2h') client.upload_dataframe(h2h_red, 'h2h_red') #----------# # ANALYSIS # #----------# training_set = h2h_train_red.values test_set = h2h_test_red.values # NOW LET'S HAVE FUN!
class DEBase(): def __init__(self, source, destination, use_local, use_archive): log.info("source {}".format(source)) log.info("destination {}".format(destination)) log.info("use_local {}".format(use_local)) log.info("use_archive_db {}".format(use_archive)) self.use_archive_db = use_archive self.m = MongoManager(use_remote=not use_local, use_archive=use_archive) self.source_collection = source self.destination_collection = destination def map_reduce(self, has_bookie=False): # check for latest entry in historical database # self.last_datetime = self.m.get_latest_date_in_enriched_prices(self.destination_collection) # if not self.last_datetime: self.last_datetime = dt.datetime(2018, 1, 1) log.info("Get available marketids") new_marketids = self.m.get_distinct('marketid', self.source_collection, self.last_datetime, must_have_bookie=has_bookie) log.info("got {}".format(len(new_marketids))) log.info("Get marketids already in enriched database") already_available_markeids = self.m.get_distinct('marketid', self.destination_collection, self.last_datetime, must_have_bookie=False) log.info("got {}".format(len(already_available_markeids))) marketids_to_collect = list(new_marketids - already_available_markeids) log.info("Start download from date: {}".format(self.last_datetime)) log.info("Download new marketids to enrich in chunks. Total marketids to process: {}".format(len(marketids_to_collect))) # set to higher value if larger amount needs to be processed simulataneiously, but if keys # are missing at certain time periods (such as mean_bookie_price t-0) the whole batch will be ignored batchsize = 1 c = int(np.ceil(len(marketids_to_collect) / batchsize)) for marketids in tqdm(chunk(marketids_to_collect, c)): self.historicals = self.m.download_raw_prices(self.last_datetime, marketids, self.source_collection, max_rows=999999999, has_bookie=has_bookie) self.df = pd.DataFrame.from_dict(self.historicals) del self.historicals l=len(self.df) if l==0: # log.warning("No new races found that match the criteria in current chunk") continue else: log.info("Total bets enriching: {}".format(l)) try: self.unstack_timeseries_to_closest_minutes() except ValueError: continue # log.warning("error") try: self.calculate_metrics() except KeyError: log.warning("Key Error. Ignoring the whole batch. Make sure batch size is 1.") continue self.calculate_payoffs() self.output_formatting() db = 'archive' if self.use_archive_db else 'price' self.df['timestamp'] = dt.datetime.now() self.m.upload_dataframe(self.df, self.destination_collection, db) def unstack_timeseries_to_closest_minutes(self, fields_to_stack=['LTP']): """ convert the times to an index t-000, t-001, t-002...""" self.df['minutes_to_start'] = 't-' + np.floor(self.df['seconds_until_start'] / 60).astype('int').astype('str') fields_to_stack.extend(['countrycode', 'loser', 'marketid', 'marketstarttime', 'selection_id', 'winner', 'minutes_to_start']) self.df = self.df[fields_to_stack] self.df = self.df.sort_values('minutes_to_start', ascending=False) self.df = self.df.drop_duplicates(['selection_id', 'marketid', 'minutes_to_start'], keep='last') self.df = self.df.set_index(['marketid', 'selection_id', 'countrycode', 'loser', 'marketstarttime', 'winner', 'minutes_to_start']) self.df = self.df.unstack(level=6).reset_index() self.df.columns = [' '.join(col).strip() for col in self.df.columns.values] def calculate_metrics(self): """ stack timeseries and horses along columns in multi index dataframe """ self.df['LTP'] = self.df['LTP t-0'] self.df['horse'] = self.df.groupby('marketid').cumcount() # add horse number in each group self.df_indexed = self.df.set_index(['marketid', 'horse']) log.info("Calculating 1d metrics") average = self.df_indexed['LTP'].mean(axis=0, level=0).to_frame().reset_index() average.columns = ['marketid', 'average'] minimum = self.df_indexed['LTP'].min(axis=0, level=0).to_frame().reset_index() minimum.columns = ['marketid', 'minimum'] maximum = self.df_indexed['LTP'].max(axis=0, level=0).to_frame().reset_index() maximum.columns = ['marketid', 'maximum'] median = self.df_indexed['LTP'].median(axis=0, level=0).to_frame().reset_index() median.columns = ['marketid', 'median'] std = self.df_indexed['LTP'].std(axis=0, level=0).to_frame().reset_index() std.columns = ['marketid', 'std'] participants = self.df_indexed.count(level=0)['winner'].to_frame().reset_index() participants.columns = ['marketid', 'participants'] skew = self.df_indexed['LTP'].skew(axis=0, level=0).to_frame().reset_index() skew.columns = ['marketid', 'skew'] kurtosis = self.df_indexed['LTP'].kurtosis(axis=0, level=0).to_frame().reset_index() kurtosis.columns = ['marketid', 'kurtosis'] overrun = (1 / self.df_indexed['LTP']).sum(axis=0, level=0).to_frame().reset_index() overrun.columns = ['marketid', 'overrun'] # stat metrics of historical elements log.info("Calculating 2d metrics") all_price_headings = ['LTP t-' + str(i) for i in range(60)] log.info("Calculating average_2d") average_2d = self.df_indexed[all_price_headings].stack().mean(level=0).to_frame().reset_index() average_2d.columns = ['marketid', 'average_2d'] log.info("Calculating min_2d") min_2d = self.df_indexed[all_price_headings].stack().min(level=0).to_frame().reset_index() min_2d.columns = ['marketid', 'min_2d'] log.info("Calculating max_2d") max_2d = self.df_indexed[all_price_headings].stack().max(level=0).to_frame().reset_index() max_2d.columns = ['marketid', 'max_2d'] log.info("Calculating median_2d") median_2d = self.df_indexed[all_price_headings].stack().median(level=0).to_frame().reset_index() median_2d.columns = ['marketid', 'median_2d'] log.info("Calculating std_2d") std_2d = self.df_indexed[all_price_headings].stack().std(level=0).to_frame().reset_index() std_2d.columns = ['marketid', 'std_2d'] log.info("Calculating skew_2d") skew_2d = self.df_indexed[all_price_headings].stack().skew(level=0).to_frame().reset_index() skew_2d.columns = ['marketid', 'skew_2d'] log.info("Calculating kurtosis_2d") kurtosis_2d = self.df_indexed[all_price_headings].stack().kurtosis(level=0).to_frame().reset_index() kurtosis_2d.columns = ['marketid', 'kurtosis_2d'] log.info("Merging 1d metrics") self.df = self.df.merge(average, on='marketid') self.df = self.df.merge(minimum, on='marketid') self.df = self.df.merge(maximum, on='marketid') self.df = self.df.merge(median, on='marketid') self.df = self.df.merge(std, on='marketid') self.df = self.df.merge(participants, on='marketid') self.df = self.df.merge(skew, on='marketid') self.df = self.df.merge(kurtosis, on='marketid') self.df = self.df.merge(overrun, on='marketid') log.info("Merging 2d metrics") self.df = self.df.merge(average_2d, on='marketid') self.df = self.df.merge(min_2d, on='marketid') self.df = self.df.merge(max_2d, on='marketid') self.df = self.df.merge(median_2d, on='marketid') self.df = self.df.merge(std_2d, on='marketid') self.df = self.df.merge(skew_2d, on='marketid') self.df = self.df.merge(kurtosis_2d, on='marketid') def calculate_payoffs(self): self.df = self.df.fillna(0) config = get_config() fees = config.getfloat("Betting", "fees") stake = 1 # hard coded for training for comparison purposes """ Creates X and Y in assigning the payoff to Y by removing the winner column from original df """ payoff_back = np.where(self.df['winner'].values, (stake * self.df['LTP'].values - stake) * (1 - fees), # back winner -np.ones((self.df['LTP'].values.shape)) * stake) # back loser payoff_back = np.where(self.df['winner'] != -1, payoff_back, 0) # replace na with 0 payoff # payoff when lay: -odds when win, 1 when loss payoff_lay = np.where(self.df['winner'].values, -(stake * self.df['LTP'].values - stake), # lay winner stake * np.ones((self.df['LTP'].values.shape)) * (1 - fees)) # lay loser payoff_lay = np.where(self.df['winner'] != -1, payoff_lay, 0) # replace na with 0 payoff # keep the lay odds to analyse the potential risk on the lays we bet on winning_lay_risk = np.minimum((self.df['LTP'].values - 1) * (-1), np.zeros(payoff_lay.shape)) self.df['back'] = payoff_back self.df['lay'] = payoff_lay self.df['lay_risk'] = winning_lay_risk def output_formatting(self): self.df = self.df[[ 'marketstarttime', 'countrycode', 'marketid', 'selection_id', 'LTP t-0', 'LTP t-7', 'average', 'minimum', 'maximum', 'median', 'std', 'participants', 'skew', 'kurtosis', 'average_2d', 'min_2d', 'max_2d', 'median_2d', 'std_2d', 'skew_2d', 'kurtosis_2d', 'overrun', 'winner', 'back', 'lay', 'lay_risk' ]] self.df = self.df.drop_duplicates(['marketid', 'selection_id'])
def de_duplicate(): m = MongoManager(use_remote=False, use_archive=True) df = m.get_dataframe("price_scrape_enriched2") df = df.drop_duplicates(['marketid', 'selection_id']) m.upload_dataframe(df, "price_scrape_enriched_deduplicated")
def collect_prices(collection_name='price_scrape', single_marketid=False): from horse_racing.betfair_manager.engine import Container as BFContainer from horse_racing.matchbook_manager.engine import Container as MBContainer try: bfm = BFContainer() except: log.error("Unable to log into betfair") if not single_marketid: try: mbm = MBContainer() except: log.error("Unable to log into matchbook") if single_marketid: events, markets = bfm.get_single_race(single_marketid) else: events, markets = bfm.get_all_races() try: prices = bfm.update_markets(events, markets) except AttributeError: log.info("No markets to scrape") return if not single_marketid: try: mb_prices = mbm.get_races() except: mb_prices = {} log.error("failed to get mb prices") else: mb_prices = {} event_mapping = {} bf_selection_name_mapping = {} for e, ms in markets.items(): for m in ms: mb_market = None try: mb_market = mb_prices[( m[1].event.venue, m[1].market_start_time.strftime("%Y-%m-%dT%H:%M:00.000Z"))] except KeyError: log.info("Didn't find matchbook market for {}@{}".format( m[1].market_start_time, m[1].event.venue)) event_mapping[m[0]] = e, m[1].market_start_time, m[ 1].event.country_code, mb_market bf_selection_name_mapping.update( {r.selection_id: r.runner_name for r in m[1]['runners']}) for p in tqdm(prices): # find turf bookmaker odds turf_odds_df = pd.DataFrame() race_status = {} for m_id, m in markets[event_mapping[p.market_id][0]]: if m_id != p.market_id: continue market_start_time = event_mapping[p.market_id][1] venue = m.event.venue.lower() config = get_config() turf_countries = config.get('Scraping', 'turf_countries').split() if m.event.country_code not in turf_countries: continue # TODO, check and process turf odds try: turf_odds_df, race_status = get_race_odds( venue, market_start_time, m.runners) except: pass mb_market = event_mapping[p.market_id][3] for r in p.runners: back_prices, back_sizes, lay_prices, lay_sizes = [-1, -1, -1], [ -1, -1, -1 ], [-1, -1, -1], [-1, -1, -1] try: vwap = get_VWAP(r.ex.traded_volume) except ZeroDivisionError: vwap = 0. for i in range(3): try: back_prices[i] = r.ex.available_to_back[i].price except IndexError: back_prices[i] = None try: lay_prices[i] = r.ex.available_to_lay[i].price except IndexError: lay_prices[i] = None try: back_sizes[i] = r.ex.available_to_back[i].size except IndexError: back_sizes[i] = None try: lay_sizes[i] = r.ex.available_to_lay[i].size except IndexError: lay_sizes[i] = None try: sp_dict = { 'near_price': r.sp.near_price, 'far_price': r.sp.far_price, 'actual_SP': r.sp.actual_SP, 'back_SP_amounts': [(a.price, a.size) for a in r.sp.back_stake_taken], 'lay_SP_amounts': [(a.price, a.size) for a in r.sp.back_stake_taken] } except: sp_dict = {} seconds_until_start = (event_mapping[p.market_id][1] - datetime.datetime.now()).total_seconds() event_id = event_mapping[p.market_id][0] d = { 'marketid': p.market_id, 'selection_id': r.selection_id, 'LTP': r.last_price_traded, 'back_sizes0': back_sizes[0], 'back_prices0': back_prices[0], 'back_sizes1': back_sizes[1], 'back_prices1': back_prices[1], 'back_sizes2': back_sizes[2], 'back_prices2': back_prices[2], 'lay_sizes0': lay_sizes[0], 'lay_prices0': lay_prices[0], 'lay_sizes1': lay_sizes[1], 'lay_prices1': lay_prices[1], 'lay_sizes2': lay_sizes[2], 'lay_prices2': lay_prices[2], 'timestamp': datetime.datetime.now(), 'eventid': event_id, 'marketstarttime': event_mapping[p.market_id][1], 'countrycode': event_mapping[p.market_id][2], 'seconds_until_start': seconds_until_start, 'total_matched': r.total_matched, 'VWAP': vwap, } d.update(sp_dict) if not turf_odds_df.empty: # Add the turf odds try: bookie_prices = turf_odds_df.loc[ r.selection_id].values.astype(float) bookie_prices[bookie_prices == 0] = np.nan # replace 0 with nan d['bookies'] = pd.DataFrame({'name': turf_odds_df.loc[r.selection_id].index, 'price': bookie_prices}). \ to_dict(orient='records)') d['mean_bookie_price'] = np.nanmean(bookie_prices) d['median_bookie_price'] = np.nanmedian(bookie_prices) d['min_bookie_price'] = np.nanmin(bookie_prices) d['max_bookie_price'] = np.nanmax(bookie_prices) except KeyError: log.info("No turf odds for selection: {}".format( r.selection_id)) if mb_market: d['mb_market_id'] = mb_market['id'] runner_name = bf_selection_name_mapping[r.selection_id] mb_selection_id = None for mr in mb_market['runners']: if mr['name'].lstrip('1234567890- ') == runner_name: mb_selection_id = mr['id'] d['mb_selection_id'] = mb_selection_id d['mb_volume'] = mr['volume'] back_index, lay_index = 0, 0 for mp in mr['prices']: if mp['side'] == 'back': d['mb_back_prices{}'.format( back_index)] = mp['decimal-odds'] d['mb_back_sizes{}'.format( back_index)] = mp['available-amount'] back_index += 1 else: d['mb_lay_prices{}'.format( back_index)] = mp['decimal-odds'] d['mb_lay_sizes{}'.format( back_index)] = mp['available-amount'] lay_index += 1 if not mb_selection_id: log.warning( "Didn't find runner information for bf selection id {}" .format(r.selection_id)) d.update(race_status) log.debug(d) m = MongoManager() success = m.insert_document(collection_name, d) log.debug(success)
def place_bets(bets, selection_ids, prices, market_ids, event_id, race_start_time, container, armed=False, model_name=None, reference_price_for_initial_bet=0): log.info("Placing bets: {}".format(bets)) horses = len(selection_ids) back_prices, lay_prices, back1, lay1, back2, lay2, ltp_prices = prices bets = bets[0:horses * 2] # cut off padded horses log.info("len bets: {}".format(len(bets))) log.info("len selection_ids: {}".format(len(selection_ids))) unit_stake = config.getfloat("Betting", "stake") m = MongoManager() d = {} d['bets'] = bets d['selection_ids'] = selection_ids d['timestamp'] = datetime.datetime.now() d['armed'] = armed d['back_prices'] = back_prices.tolist() d['lay_prices'] = lay_prices.tolist() d['market_id'] = market_ids d['event_id'] = event_id d['ltps'] = ltp_prices.tolist() d['stake'] = float(unit_stake) # convert to datetime for mongodb saving if not type(race_start_time) == datetime.datetime: race_start_datetime = datetime.datetime.combine( datetime.date.today(), race_start_time) else: race_start_datetime = race_start_time d['race_start'] = race_start_datetime m.insert_document('place_bets', d, database='pnl') if armed: # # c = Container() for i, b in enumerate(bets): if not b: continue log.info("++++++++++++++++++++++++++++++++++++++++++++") bet_type = 'BACK' if i % 2 == 0 else 'LAY' price = back_prices[i // 2] if bet_type == 'BACK' else lay_prices[i // 2] ask_price = price stake = unit_stake if bet_type == 'LAY': price = lay_saftey_check(selection_id=selection_ids[i // 2], last_price=ltp_prices[i // 2], current_lay_price=price, best_back=back_prices[i // 2]) theoretical_ltp_price = price # additional reducrion for early orders if reference_price_for_initial_bet == '0': log.info("Using ltp as reference") elif reference_price_for_initial_bet == '1': price = min(price, back_prices[i // 2]) if bet_type == 'LAY' else min( price, lay_prices[i // 2]) log.info("Using level 1 as reference") elif reference_price_for_initial_bet == '2': price = min(price, back1[i // 2]) if bet_type == 'LAY' else min( price, lay1[i // 2]) log.info("Using level2 as reference") elif reference_price_for_initial_bet == '3': price = min(price, back2[i // 2]) if bet_type == 'LAY' else min( price, lay2[i // 2]) log.info("Using level3 as reference") else: log.error("Invalid value for reference_price_for_initial_bet") price = price_adjustment(price) log.info( "Placing order on betfair: marketid: {}, selection: {}, price: {}" .format(market_ids, selection_ids[i // 2], price)) result = container.place_limit_order(market_ids, selection_ids[i // 2], stake, price, side=bet_type, strategy=model_name) try: m.insert_document( 'orders', { 'market_id': market_ids, 'event_id': event_id, 'race_start': race_start_time, 'selection': selection_ids[i // 2], 'timestamp': datetime.datetime.now(), 'stake': float(stake), 'price': price, 'theoretical_ltp_price': theoretical_ltp_price, 'ltp': ltp_prices[i // 2], 'original_ask_price': ask_price, 'side': bet_type, 'result': result.status, 'error_code': result.error_code, 'size_matched': result.instruction_reports[0].size_matched, 'average_price_matched': result.instruction_reports[0].average_price_matched, 'betid': result.instruction_reports[0].bet_id, 'model_name': model_name }, database='pnl') except: log.warning( "inserting orders document failed, marketid: {}".format( market_ids)) log.info("Result: {}, {}, matched {} on avg of {}".format( result.status, result.error_code, result.instruction_reports[0].size_matched, result.instruction_reports[0].average_price_matched)) try: if result.status == "SUCCESS": m.update_document('successful_bets', 'market_id', market_ids, "successful_bets", selection_ids[i // 2], as_array=True) except: log.warning( "Updating place_bets collection failed, marketid: {}". format(market_ids)) return True
def list_all_bets(): m = MongoManager() orders = m.get_all_orders() df = pd.DataFrame.from_dict(orders) df.fillna(value=0.0, inplace=True) log.info("Processing through orders: {}".format(len(df))) # Sanity Check if len(df) == 0: log.warning("No Pnl can be calculated as dataframe is empty") sys.exit() # Common variables fees = config.getfloat("Betting", "fees") x = np.array(range(len(df))) titles = [ 'bf_win', 'theoretical_win', 'bf_loss', 'theoretical_loss', 'price' ] xlabels = df['selection'].tolist() ylabels = [ 'Realized Profit', 'Theoretical Profit', 'Realized Loss', 'Theoretical Loss', 'Price' ] colors = ['r', 'g', 'r', 'g', 'b'] dir_path = os.path.dirname(os.path.realpath(__file__)) df['bf_win'] = df['bf_profit'].clip(lower=0) * (1 - fees) df['bf_loss'] = df['bf_profit'].clip(upper=0) df['theoretical_win'] = df['theoretical_pnl'].clip(lower=0) df['theoretical_loss'] = df['theoretical_pnl'].clip(upper=0) # PnL Plots Borders win_borders = None loss_borders = None if not np.all(np.isnan(df['bf_profit'])) and not np.all( np.isnan(df['theoretical_pnl'])): win_borders = [ 0, int( np.nanmax([ np.nanmax(df['bf_win']), np.nanmax(df['theoretical_win']) ])) ] loss_borders = [ int( np.nanmin([ np.nanmin(df['bf_loss']), np.nanmin(df['theoretical_loss']) ])), 0 ] # Plot Actual/Theoretical Wins # Overall View f, ax = plt.subplots() w = 0.3 ax.bar(x, df['bf_win'], width=w, color='r', align='center') ax.bar(x + w, df['theoretical_win'], width=w, color='g', align='center') ax.autoscale(tight=True) ax.set_title('Wins in the last 24h') ax.set_ylabel('Profits') ax.minorticks_on() ax.set_xticks([]) plt.setp(ax.get_xticklabels(), visible=False) plt.legend(['Realized', 'Theoretical']) if win_borders: ax.set_ylim([win_borders[0], 1.05 * win_borders[1]]) plt.savefig(os.path.join(dir_path, '../static/orders_wins_overall.png'), dpi=180) # Individual View f, axs = plt.subplots(nrows=2) for i in range(0, 2): ax = axs[i] ax.bar(x, df[titles[i]], color=colors[i]) ax.set_ylabel(ylabels[i]) ax.minorticks_on() ax.set_xticks([]) plt.setp(ax.get_xticklabels(), visible=False) if win_borders: ax.set_ylim([win_borders[0], 1.05 * win_borders[1]]) axs[0].set_title('Wins in the last 24 hours') f.subplots_adjust(hspace=0) f.subplots_adjust(wspace=0) plt.savefig(os.path.join(dir_path, '../static/orders_wins.png'), dpi=180) # Plot Actual/Theoretical Losses # Overall View f, ax = plt.subplots() w = 0.6 ax.bar(x, df['bf_loss'], width=w, color='r', align='center') ax.bar(x + w, df['theoretical_loss'], width=w, color='g', align='center') ax.autoscale(tight=True) ax.set_title('Losses in the last 24h') ax.set_ylabel('Losses') ax.minorticks_on() ax.set_xticks([]) plt.setp(ax.get_xticklabels(), visible=False) plt.legend(['Realized', 'Theoretical']) if win_borders: ax.set_ylim([loss_borders[0] * 1.05, loss_borders[1]]) plt.savefig(os.path.join(dir_path, '../static/orders_losses_overall.png'), dpi=180) # Individual View f, axs = plt.subplots(nrows=2) for i in range(0, 2): ax = axs[i] ax.bar(x, df[titles[i + 2]], color=colors[i + 2]) ax.set_ylabel(ylabels[i + 2]) ax.minorticks_on() ax.set_xticks([]) plt.setp(ax.get_xticklabels(), visible=False) if loss_borders: ax.set_ylim([loss_borders[0] * 1.05, loss_borders[1]]) f.subplots_adjust(hspace=0) f.subplots_adjust(wspace=0) plt.savefig(os.path.join(dir_path, '../static/orders_losses.png'), dpi=180) # Plot Bet Prices f, ax = plt.subplots() ax.bar(x, df[titles[-1]], color=colors[-1]) ax.set_ylabel(ylabels[-1]) ax.set_xlabel('Bets (Selection IDs)') ax.set_xticks(range(len(df))) ax.set_xticklabels(xlabels, rotation=90, fontsize=2) f.subplots_adjust(hspace=0) f.subplots_adjust(wspace=0) plt.savefig(os.path.join(dir_path, '../static/bets.png'), dpi=180)
def find_outliers(): m = MongoManager() prices = m.mongodb.price_scrape.find({'selection_id': 7767293}) price_df = pd.DataFrame.from_records(prices) pass
def pnl_charts(start=0, end=0): log.info("Create charts") dir_path = os.path.dirname(os.path.realpath(__file__)) m = MongoManager() # 24h bar chart data = m.get_all_pnl() df = pd.DataFrame(data) fees = config.getfloat("Betting", "fees") df.ix[df['bf_profit'] > 0] = df.ix[df['bf_profit'] > 0] * (1 - fees) if len(df) > 0: total_pnl = df['sum_total'].sum().round(1) total_back = df['sum_back'].sum().round(1) total_lay = df['sum_lay'].sum().round(1) bf_profit = df['bf_profit'].sum().round(1) ax = df.plot.bar() ax.set_title("Today's bets") ax.set_ylabel('Return') ax.set_xlabel('Races') ax.axhline(y=0, color='r', linestyle='-') # ax.text(0.5, -10, "Total PnL: {}".format(total_pnl)) # ax.text(0.5, -13, "Total Back: {}".format(total_back)) # ax.text(0.5, -16, "Total Lay: {}".format(total_lay)) # ax.text(0.5, -19, "Total BF: {}".format(bf_profit)) dir_path = os.path.dirname(os.path.realpath(__file__)) ax.get_figure().savefig(os.path.join(dir_path, '../static/chart.png'), dpi=180) # cumulative line chart df2 = df.cumsum() ax = df2.plot() ax.set_title('Horse racing return last 24h') ax.set_ylabel('Return') ax.set_xlabel('Races') ax.axhline(y=0, color='r', linestyle='-') dir_path = os.path.dirname(os.path.realpath(__file__)) ax.get_figure().savefig(os.path.join(dir_path, '../static/chart_cumulative.png'), dpi=180) else: # create empty files log.warning("No bets in the last 24 hours") open(os.path.join(dir_path, '../static/chart.png'), 'w').close() open(os.path.join(dir_path, '../static/chart_cumulative.png'), 'w').close() # cumulative line chart since last year from_date = config.get("PNL", "from_date") from_date = datetime.strptime(from_date, "%Y-%m-%d") data = m.get_all_pnl(from_date) df = pd.DataFrame(data) ranksum_stat, p_stat = ttest_ind(df['bf_profit'].fillna(0).values, np.zeros(len(df['bf_profit'].values))) df = df.cumsum() ax = df.plot() balance_dict = get_account_balance() balance = balance_dict['available_to_bet_balance'] - balance_dict[ 'exposure'] ax.set_title( """Horse racing return last 365 days\np-stat: {:.2f} - {}\nCurrent Balance: {}""" .format(p_stat, p_stat < 0.05, balance)) ax.set_ylabel('Return') ax.set_xlabel('Races') ax.axhline(y=0, color='r', linestyle='-') ax.get_figure().savefig(os.path.join( dir_path, '../static/chart_cumulative_year.png'), dpi=180)