def upload(args):
    args, path = args
    parse = lambda x: dateutil.parser.parse(x, dayfirst=True)

    try:
        directory, file_name = split(path)
        file_part, ext = splitext(file_name)

        formatter = logging.Formatter('%(asctime)s - ' + file_name + ' - %(levelname)s: %(message)s')
        configure_root_logger(args.logtty, args.logfile, formatter=formatter)
        db = MongoClient(args.host, args.port)[args.db]

        if ext == '.zip':
            logging.info('Reading zipped csv file into memory')
            fin = zipfile.ZipFile(path, 'r').open(file_part + '.csv')
        else:
            logging.info('Reading csv file into memory')
            fin = path

        bars = pd.read_csv(fin, parse_dates=['SCHEDULED_OFF'], date_parser=parse)
        bars.columns = bars.columns.map(lambda x: x.lower())
        bars = bars.rename(columns={'event_id': 'market_id'})
        for col in ['market_id', 'selection_id']:
            bars[col] = bars[col].map(str)  # Make sure dtype==str

        # Insert other filters here:
        bars = bars[bars.in_play == 'PE']
        bars['selection'] = bars['selection'].map(extract_horse_name)

        races = races_from_bars(bars).reset_index()
        train = training_from_races(races)
        vwao = vwao_from_bars(bars)

        try:
            db[args.races].insert(pandas_to_dicts(races), continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error('Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.races], e))

        try:
            db[args.train].insert(convert_types(train, {'n_runners': int}), continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error('Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.train], e))

        try:
            db[args.vwao].insert(vwao, continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error('Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.vwao], e))

        logging.info('Successfully uploaded to %s' % db)
    except Exception as e:
        logging.critical(e)
        raise
def run_backtest(context):
    n_bkt, args, mparams = context

    formatter = logging.Formatter('%(asctime)s - n_bkt=' + str(n_bkt) + ' - %(levelname)s: %(message)s')
    configure_root_logger(args.logtty, args.logfile,
                          MongoClient(args.host, args.port)[args.db][args.logmongo] if args.logmongo is not None
                          else None, formatter=formatter)

    db = MongoClient(args.host, args.port)[args.db]

    where_clause = defaultdict(lambda: {})
    country, start_date, end_date = 'GB', parse_date(args.start), parse_date(args.end)
    if start_date is not None:
        where_clause['scheduled_off']['$gte'] = start_date
    if end_date is not None:
        where_clause['scheduled_off']['$lte'] = end_date
    if country is not None:
        where_clause['country'] = country
    sorted_races = db[args.train].find(where_clause, sort=[('scheduled_off', 1)], timeout=False)

    exec_services = HistoricalExecutionService(db)
    strat = strategy.Balius(mu=mparams['mu'], sigma=mparams['sigma'], beta=mparams['beta'], tau=mparams['tau'],
                            draw_probability=mparams['draw_prob'], risk_aversion=mparams['risk_aversion'],
                            min_races=mparams['min_races'], max_exposure=mparams['max_exposure'])
    st = time.clock()
    strategy.backtest(exec_services, strat, sorted_races)
    en = time.clock()
    logging.info('Backtest finished in %.2f seconds' % (en - st))

    strat_dict = strat.to_dict()
    strat_id = db[STRATEGIES_COLL].insert(strat_dict)
    logging.info('Strategy serialised to %s with id=%s' % (db[STRATEGIES_COLL], strat_id))

    bets = price_historical_bets(db, exec_services.get_mu_bets()[0])
    scorecard = make_scorecard(bets)
    now = datetime.datetime.utcnow()
    scorecard.update({'params': {'ts': strat_dict['hm']['ts'], 'risk': strat_dict['risk']},
                      'timestamp': now,
                      'run_seconds': en - st,
                      'strategy_id': strat_id})
    scorecard_id = db[SCORECARDS_COLL].insert(scorecard)
    logging.info('Scorecard inserted in %s with id=%s' % (db[SCORECARDS_COLL], scorecard_id))

    db[BETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, bets))
    logging.info('Associated bets inserted in %s' % db[BETS_COLL])

    markets = market_breakdown(bets).reset_index()
    markets = pandas_to_dicts(markets, {'n_runners': int})
    db[MARKETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, markets))
    logging.info('Associated market breakdown inserted in %s' % db[MARKETS_COLL])
def upload(args):
    first = lambda x: x.iget(0)
    args, path = args


    try:
        _, file_name = split(path)

        formatter = logging.Formatter('%(asctime)s - ' + file_name + ' - %(levelname)s: %(message)s')
        configure_root_logger(formatter=formatter)
        db = MongoClient(args.host, args.port)[args.db]

        logging.info('Reading csv file into memory')
        races = pd.read_csv(path, sep='\t', parse_dates=[[0, 1]], dayfirst=True)
        if len(races) <=2 :
            logging.warning('No races in file. Skipping')
            return

        races.rename(columns={'race_date_race_time': 'scheduled_off',
                              'horse_name': 'selection',
                              'place': 'ranking'}, inplace=True)

        races['selection'] = races['selection'].map(parse_horse_name)
        races['ranking'] = races['ranking'].map(parse_place)

        races = pd.DataFrame.from_dict([{'course': k[0],
                                         'scheduled_off': k[1],
                                         'selection': v['selection'][v.ranking >= 0].tolist(),
                                         'ranking': (v['ranking'][v.ranking >= 0] - 1).tolist()}
                                        for k, v in races.groupby(['track', 'scheduled_off'])])

        #print(races)

        dtypes = list(races.dtypes[races.dtypes == np.int64].index)
        type_mappers = dict(zip(dtypes, [int] * len(dtypes)))
        db[args.races].insert(pandas_to_dicts(races))

        logging.info('Successfully uploaded to %s' % db)
    except Exception as e:
        logging.critical(e)
        raise
    import zipfile
    from os.path import split, splitext
    import argparse
    from multiprocessing import Pool, cpu_count

    parser = argparse.ArgumentParser(description='Uploads Betfair historical data to a MongoDB database')
    parser.add_argument('files', metavar='FILES', type=str, nargs='+', help='zip/csv/pd files to upload')
    parser.add_argument('--host', type=str, action='store', default='localhost', help='MongoDB host (default=localhost)')
    parser.add_argument('--port', type=int, action='store', default=33000, help='MongoDB port (default=33000)')
    parser.add_argument('--db', type=str, action='store', default='betfair', help='db (default=betfair)')
    parser.add_argument('--jobs', type=int, action='store', default=-1, help='how many jobs to use')
    parser.add_argument('--races', type=str, action='store', default='races', help='races collection (default=races)')
    parser.add_argument('--train', type=str, action='store', default='train',
                        help='training set collection (default=train)')
    parser.add_argument('--vwao', type=str, action='store', default='vwao',
                        help='volume-weighted-average-odds (vwao) collection (default=vwao)')
    parser.add_argument('--logfile', type=str, action='store', default=None, help='specifies what log file to use')
    parser.add_argument('--logtty', help='prints logging info to the terminal', action='store_true')
    args = parser.parse_args()

    configure_root_logger(args.logtty, args.logfile)
    if len(args.files) > 1:
        cpus = min(cpu_count(), len(args.files)) if args.jobs < 0 else args.jobs

        logging.info('Creating a pool with %d worker processes..' % cpus)
        pool = Pool(processes=cpus)
        pool.map(upload, zip([args] * len(args.files), args.files))
    else:
        upload((args, args.files[0]))

Exemple #5
0
def upload(args):
    args, path = args
    parse = lambda x: dateutil.parser.parse(x, dayfirst=True)

    try:
        directory, file_name = split(path)
        file_part, ext = splitext(file_name)

        formatter = logging.Formatter('%(asctime)s - ' + file_name +
                                      ' - %(levelname)s: %(message)s')
        configure_root_logger(args.logtty, args.logfile, formatter=formatter)
        db = MongoClient(args.host, args.port)[args.db]

        if ext == '.zip':
            logging.info('Reading zipped csv file into memory')
            fin = zipfile.ZipFile(path, 'r').open(file_part + '.csv')
        else:
            logging.info('Reading csv file into memory')
            fin = path

        bars = pd.read_csv(fin,
                           parse_dates=['SCHEDULED_OFF'],
                           date_parser=parse)
        bars.columns = bars.columns.map(lambda x: x.lower())
        bars = bars.rename(columns={'event_id': 'market_id'})
        for col in ['market_id', 'selection_id']:
            bars[col] = bars[col].map(str)  # Make sure dtype==str

        # Insert other filters here:
        bars = bars[bars.in_play == 'PE']
        bars['selection'] = bars['selection'].map(extract_horse_name)

        races = races_from_bars(bars).reset_index()
        train = training_from_races(races)
        vwao = vwao_from_bars(bars)

        try:
            db[args.races].insert(pandas_to_dicts(races),
                                  continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error(
                'Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s'
                % (db[args.races], e))

        try:
            db[args.train].insert(convert_types(train, {'n_runners': int}),
                                  continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error(
                'Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s'
                % (db[args.train], e))

        try:
            db[args.vwao].insert(vwao, continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error(
                'Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s'
                % (db[args.vwao], e))

        logging.info('Successfully uploaded to %s' % db)
    except Exception as e:
        logging.critical(e)
        raise
Exemple #6
0
                        type=str,
                        action='store',
                        default='train',
                        help='training set collection (default=train)')
    parser.add_argument(
        '--vwao',
        type=str,
        action='store',
        default='vwao',
        help='volume-weighted-average-odds (vwao) collection (default=vwao)')
    parser.add_argument('--logfile',
                        type=str,
                        action='store',
                        default=None,
                        help='specifies what log file to use')
    parser.add_argument('--logtty',
                        help='prints logging info to the terminal',
                        action='store_true')
    args = parser.parse_args()

    configure_root_logger(args.logtty, args.logfile)
    if len(args.files) > 1:
        cpus = min(cpu_count(), len(
            args.files)) if args.jobs < 0 else args.jobs

        logging.info('Creating a pool with %d worker processes..' % cpus)
        pool = Pool(processes=cpus)
        pool.map(upload, zip([args] * len(args.files), args.files))
    else:
        upload((args, args.files[0]))
        logging.info('Successfully uploaded to %s' % db)
    except Exception as e:
        logging.critical(e)
        raise


if __name__ == '__main__':
    import zipfile
    from os.path import split, splitext
    import argparse
    from multiprocessing import Pool, cpu_count
    from pymongo import MongoClient

    parser = argparse.ArgumentParser(description='Uploads horseracebase.com historical data to a MongoDB database')
    parser.add_argument('files', metavar='FILES', type=str, nargs='+', help='zip/csv/pd files to upload')
    parser.add_argument('--host', type=str, action='store', default='localhost', help='MongoDB host (default=localhost)')
    parser.add_argument('--port', type=int, action='store', default=33000, help='MongoDB port (default=33000)')
    parser.add_argument('--db', type=str, action='store', default='betfair', help='db (default=betfair)')
    parser.add_argument('--jobs', type=int, action='store', default=-1, help='how many jobs to use')
    parser.add_argument('--races', type=str, action='store', default='horseracebase',
                        help='races collection (default=horseracebase)')
    args = parser.parse_args()

    configure_root_logger()
    cpus = min(cpu_count(), len(args.files)) if args.jobs < 0 else args.jobs

    logging.info('Creating a pool with %d worker processes..' % cpus)
    pool = Pool(processes=cpus)
    pool.map(upload, zip([args] * len(args.files), args.files))
Exemple #8
0
def run_backtest(context):
    n_bkt, args, mparams = context

    formatter = logging.Formatter('%(asctime)s - n_bkt=' + str(n_bkt) +
                                  ' - %(levelname)s: %(message)s')
    configure_root_logger(
        args.logtty,
        args.logfile,
        MongoClient(args.host, args.port)[args.db][args.logmongo]
        if args.logmongo is not None else None,
        formatter=formatter)

    db = MongoClient(args.host, args.port)[args.db]

    where_clause = defaultdict(lambda: {})
    country, start_date, end_date = 'GB', parse_date(args.start), parse_date(
        args.end)
    if start_date is not None:
        where_clause['scheduled_off']['$gte'] = start_date
    if end_date is not None:
        where_clause['scheduled_off']['$lte'] = end_date
    if country is not None:
        where_clause['country'] = country
    sorted_races = db[args.train].find(where_clause,
                                       sort=[('scheduled_off', 1)],
                                       timeout=False)

    exec_services = HistoricalExecutionService(db)
    strat = strategy.Balius(mu=mparams['mu'],
                            sigma=mparams['sigma'],
                            beta=mparams['beta'],
                            tau=mparams['tau'],
                            draw_probability=mparams['draw_prob'],
                            risk_aversion=mparams['risk_aversion'],
                            min_races=mparams['min_races'],
                            max_exposure=mparams['max_exposure'])
    st = time.clock()
    strategy.backtest(exec_services, strat, sorted_races)
    en = time.clock()
    logging.info('Backtest finished in %.2f seconds' % (en - st))

    strat_dict = strat.to_dict()
    strat_id = db[STRATEGIES_COLL].insert(strat_dict)
    logging.info('Strategy serialised to %s with id=%s' %
                 (db[STRATEGIES_COLL], strat_id))

    bets = price_historical_bets(db, exec_services.get_mu_bets()[0])
    scorecard = make_scorecard(bets)
    now = datetime.datetime.utcnow()
    scorecard.update({
        'params': {
            'ts': strat_dict['hm']['ts'],
            'risk': strat_dict['risk']
        },
        'timestamp': now,
        'run_seconds': en - st,
        'strategy_id': strat_id
    })
    scorecard_id = db[SCORECARDS_COLL].insert(scorecard)
    logging.info('Scorecard inserted in %s with id=%s' %
                 (db[SCORECARDS_COLL], scorecard_id))

    db[BETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, bets))
    logging.info('Associated bets inserted in %s' % db[BETS_COLL])

    markets = market_breakdown(bets).reset_index()
    markets = pandas_to_dicts(markets, {'n_runners': int})
    db[MARKETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, markets))
    logging.info('Associated market breakdown inserted in %s' %
                 db[MARKETS_COLL])
Exemple #9
0
        '--logmongo',
        type=str,
        action='store',
        default=None,
        help='specifies what collection to use for logging to MongoDB')
    parser.add_argument('--logtty',
                        help='prints logging info to the terminal',
                        action='store_true')
    parser.add_argument('train',
                        type=str,
                        action='store',
                        help='training set collection')
    args = parser.parse_args()

    configure_root_logger(
        args.logtty, args.logfile,
        MongoClient(args.host, args.port)[args.db][args.logmongo]
        if args.logmongo is not None else None)

    keys = [
        'mu', 'sigma', 'beta', 'tau', 'draw_prob', 'risk_aversion',
        'min_races', 'max_exposure'
    ]
    mparams = [
        args.mu, args.sigma, args.beta, args.tau, args.draw_prob,
        args.risk_aversion, args.min_races, args.max_exposure
    ]
    n_backtests = reduce(lambda x, y: x * y, map(len, mparams))
    logging.info(
        'The specified ranges of parameters yield %d different backtests.' %
        n_backtests)
                        help='draw probability (default=%.2f)' % DEFAULT_DRAW)
    parser.add_argument('--risk-aversion', type=arg_linspace, action='store', default=[0.1], metavar='RA',
                        help='risk aversion')
    parser.add_argument('--min-races', type=arg_linspace, action='store', default=[3], metavar='N',
                        help='minimum no. of races required per horse before betting')
    parser.add_argument('--max-exposure', type=arg_linspace, action='store', default=[50], metavar='EXP',
                        help='maximum exposure')
    parser.add_argument('--logfile', type=str, action='store', default=None, help='specifies what log file to use')
    parser.add_argument('--logmongo', type=str, action='store', default=None,
                        help='specifies what collection to use for logging to MongoDB')
    parser.add_argument('--logtty', help='prints logging info to the terminal', action='store_true')
    parser.add_argument('train', type=str, action='store', help='training set collection')
    args = parser.parse_args()

    configure_root_logger(args.logtty, args.logfile,
                          MongoClient(args.host, args.port)[args.db][args.logmongo] if args.logmongo is not None
                          else None)

    keys = ['mu', 'sigma', 'beta', 'tau', 'draw_prob', 'risk_aversion', 'min_races', 'max_exposure']
    mparams = [args.mu, args.sigma, args.beta, args.tau, args.draw_prob,
               args.risk_aversion, args.min_races, args.max_exposure]
    n_backtests = reduce(lambda x, y: x * y, map(len, mparams))
    logging.info('The specified ranges of parameters yield %d different backtests.' % n_backtests)

    packed_args = ((n_bkt, args, dict(zip(keys, values))) for n_bkt, values in enumerate(product(*mparams)))
    if n_backtests > 1:
        n_processes = min(cpu_count(), n_backtests) if args.jobs < 0 else args.jobs
        logging.info('Creating a pool with %d worker processes..' % n_processes)
        pool = Pool(processes=n_processes)

        pool.map(run_backtest, packed_args)
Exemple #11
0

HOST = 'localhost'
PORT = 33000
DB = 'betfair'


def ensure_index(collection, index, unique=False):
    if unique:
        logging.info('Ensuring unique index on %s: %s' % (collection, index))
    else:
        logging.info('Ensuring index on %s: %s' % (collection, index))
    collection.ensure_index(index, unique=unique, drop_dups=unique)


configure_root_logger(True)

db = MongoClient(HOST, PORT)[DB]
logging.info('Initializing indexes in database %s' % db)

ensure_index(db[VWAO_COLL], [('scheduled_off', 1)])
ensure_index(db[VWAO_COLL], [('market_id', 1), ('selection', 1)], unique=True)
ensure_index(db[VWAO_COLL], [('market_id', 1), ('selection_id', 1)], unique=True)

ensure_index(db[RACES_COLL], [('market_id', 1)], unique=True)

ensure_index(db[TRAIN_COLL], [('scheduled_off', 1)])
ensure_index(db[TRAIN_COLL], [('market_id', 1)], unique=True)