def test_two_skew(): density1 = skew_normal_density(L=50, unit=1.0, scale=5.0, a=1.0) density2 = skew_normal_density(L=50, unit=1.0, scale=5.0, loc=1.2, a=-1.0) density = convolve_two(density1=density1, density2=density2, L=100) mu_before = mean_of_density(density1, unit=1) + mean_of_density(density2, unit=1) mu_after = mean_of_density(density, unit=1) assert abs(mu_after - mu_before) < 1e-4
def demo(): skew1 = skew_normal_density(L=L, unit=unit, a=1.5) skew2 = skew_normal_density(L=L, unit=unit, a=1.5, loc=-0.5) skew3 = skew_normal_density(L=L, unit=unit, a=1.5, loc=-1.0) best, multiplicity = winner_of_many([skew1, skew2, skew3]) densitiesPlot( [skew1 / unit, skew2 / unit, skew3 / unit, best / unit, multiplicity], unit, legend=['1', '2,', '3', 'best', 'multiplicity'])
def test_implicit_payoffs(): skew1 = skew_normal_density(L=L, unit=unit, a=1.5) skew2 = skew_normal_density(L=L, unit=unit, a=1.5, loc=-0.5) skew3 = skew_normal_density(L=L, unit=unit, a=1.5, loc=-1.0) densityAll, multiplicityAll = winner_of_many([skew1, skew2, skew3]) payoffs = implicit_state_prices(density=skew1, densityAll=densityAll, multiplicityAll=multiplicityAll, cdf=None, cdfAll=None, offsets=None)
def demo(): skew1 = skew_normal_density(L=L, unit=unit, a=1.5) skew2 = skew_normal_density(L=L, unit=unit, a=1.5, loc=-0.5) skew3 = skew_normal_density(L=L, unit=unit, a=1.5, loc=-1.0) densityAll, multiplicityAll = winner_of_many([skew1, skew2, skew3]) payoffs = implicit_state_prices(density=skew1, densityAll=densityAll, multiplicityAll=multiplicityAll, cdf=None, cdfAll=None, offsets=None) import matplotlib.pyplot as plt plt.plot(payoffs) plt.show()
def test_monte_carlo(): skew1 = skew_normal_density(L=25, unit=unit, a=1.5) densities = [skew1, skew1, skew1] densityAll, multiplicityAll = winner_of_many(densities) densityAllCheck = sample_winner_of_many(densities, nSamples=5000) assert all( abs(p1 - p2) < 3e-2 for p1, p2 in zip(densityAll, densityAllCheck))
def demo(): skew1 = skew_normal_density(L=25, unit=unit, a=1.5) densities = [skew1, skew1, skew1] densityAll, multiplicityAll = winner_of_many(densities) densityAllCheck = sample_winner_of_many(densities, nSamples=50000) if PLOTS: densitiesPlot([densityAll, densityAllCheck], unit=0.1)
def add_skew_normal_ability_to_dataframe(df, by: str, prob_col='p', new_col='ability', L=STD_L, scale=STD_SCALE, unit=STD_UNIT, a=STD_A, loc=0.0): """ :param df: pd.DataFrame with probability columns :param prob_col: Name of column holding selection (win) probabilities :param new_col: Name of new column to store ability in :param by: Categorical variable column indicated groupings :param L: Lattice size :param scale: Width of performance distribution in absolute terms :param unit: Distance between lattice points :return: New data frame with 'ability' column """ density = skew_normal_density(L=L, unit=unit, loc=loc, scale=scale, a=a) return add_centered_ability_to_dataframe(df=df, prob_col=prob_col, by=by, new_col=new_col, density=density, unit=unit)
def demo(): densities = [ skew_normal_density(L=L, unit=unit, a=0.3 * i) for i in range(25) ] best, multiplicity = winner_of_many(densities) densitiesPlot([d / unit for d in densities[:5]] + [best / unit, multiplicity], unit)
def test_five_skew(): mus = [-0.5, -0.25, 0, 1, 1.5] scales = [1.0, 1.5, 1.2, 1.3, 2.0] densities = [ skew_normal_density(L=100, unit=0.1, scale=scale, loc=mu, a=1.0) for mu, scale in zip(mus, scales) ] rank_probs = gaussian_copula_five(densities, rho=0.01) return densities, rank_probs
def test_many_skew(): densities = [ skew_normal_density(L=50, unit=1.0, scale=5.0, a=1.0) for _ in range(10) ] density = convolve_many(densities=densities, L=500, do_padding=True) assert len(density) == 1001 mu_before = sum([mean_of_density(d, unit=1) for d in densities]) mu_after = mean_of_density(density, unit=1) assert abs(mu_after - mu_before) < 1e-4
def test_calibration(): skew1 = skew_normal_density(L=L, unit=unit, a=1.5) prices = [0.2, 0.3, 0.5] implied_offsets = solve_for_implied_offsets(prices=prices, density=skew1, nIter=2) inferred_prices = state_prices_from_offsets(skew1, implied_offsets) print(str(inferred_prices)) densities = densities_from_offsets(skew1, implied_offsets) densityAllAgain, multiplicityAll = winner_of_many(densities)
def demo(): skew1 = skew_normal_density(L=L, unit = unit, a=1.5) prices = [ 0.2, 0.3, 0.5 ] implied_offsets = solve_for_implied_offsets(prices = prices, density = skew1, nIter = 2) inferred_prices = state_prices_from_offsets( skew1, implied_offsets ) print(str(inferred_prices)) densities = densities_from_offsets( skew1, implied_offsets ) densityAllAgain, multiplicityAll = winner_of_many(densities) if PLOTS: densitiesPlot( [ densityAllAgain ] + densities, unit = 0.1, legend = ['guess','analytic','1','2','3'] )
def test_many_many_normal(): densities = [ skew_normal_density(L=50, unit=1.0, scale=5.0, a=0.0) for _ in range(100) ] L = 5000 density = convolve_many(densities=densities, L=L) assert len(density) == 2 * L + 1 mu_before = sum([mean_of_density(d, unit=1) for d in densities]) mu_after = mean_of_density(density, unit=1) assert abs(mu_after - mu_before) < 1e-4
def demo(): density = skew_normal_density(L=1000, unit=unit, a=1.5) cpu_times = list() errors = list() race_sizes = [ 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000, 20000, 50000, 100000, 150000, 180000 ] for k, n in enumerate(race_sizes): print(n) true_offsets = [int(unit * k) for k in range(n)] state_prices = state_prices_from_offsets(density=density, offsets=true_offsets) print("State prices are " + str(state_prices)) offset_samples = list(range(-100, 100))[::-1] # Now try to infer offsets from state prices start_time = time.time() implied_offsets = solve_for_implied_offsets( prices=state_prices, density=density, offset_samples=offset_samples, nIter=5) cpu_times.append(time.time() - start_time) recentered_offsets = [ io - implied_offsets[0] for io in implied_offsets ] differences = [ o1 - o2 for o1, o2 in zip(recentered_offsets, true_offsets) ] avg_l1_in_offset = np.mean(np.abs(differences)) errors.append(avg_l1_in_offset) print(avg_l1_in_offset) print(cpu_times) log_cpu = [math.log(cpu) for cpu in cpu_times] log_n = [math.log(n_) for n_ in race_sizes[:k + 1]] if k >= 2: print('Fitting ...') print(np.polyfit(log_n, log_cpu, 1)) import matplotlib.pyplot as plt plt.clf() plt.scatter(race_sizes[:k + 1], cpu_times) plt.yscale('log') plt.xscale('log') plt.xlabel('Number of participants (n)') plt.ylabel('Inversion time in seconds') plt.show()
def skew_normal_place_pricing(dividends, n_samples=N_SAMPLES, longshot_expon: float = 1.0, a: float = STD_A, scale=STD_SCALE, nan_value=NAN_DIVIDEND) -> dict: """ Price place/show and exotics from win market by Monte Carlo of performances :param dividends [ float ] decimal prices :param longshot_expon power law to apply to dividends, if you want to try to correct for longshot bias. :param a skew parameter in skew-normal running time distribution :param scale scale parameter in skew-normal running time distribution :returns {'win':[1.6,4.5,...], 'place':[ ] , ... } """ # TODO: Add control variates unit = STD_UNIT L = STD_L density = skew_normal_density(L=L, unit=unit, scale=scale, a=a) adj_dividends = longshot_adjusted_dividends(dividends=dividends, longshot_expon=longshot_expon) offsets = dividend_implied_ability(dividends=adj_dividends, density=density, nan_value=nan_value) densities = densities_from_offsets(density=density, offsets=offsets) performances = simulate_performances(densities=densities, n_samples=n_samples, add_noise=True, unit=unit) placegetters = placegetters_from_performances(performances=performances, n=4) the_counts = exotic_count(placegetters, do_exotics=False) n_runners = len(adj_dividends) prices = dict() for bet_type, multiplicity in zip(PLACING_NAMES, range(1, 5)): prices[bet_type] = dividends_from_prices( [the_counts[bet_type][j] for j in range(n_runners)], multiplicity=multiplicity) return prices
def demo(): density = skew_normal_density(L=500, unit=unit, a=1.5) errors = list() race_sizes = [ int(j) for j in np.logspace(base=10., start=1, stop=5, num=20) ] for k, n in enumerate(race_sizes): print(n) print(errors[-1:]) unnormalized_probs = np.linspace(start=5 / n, stop=5 / (n * math.log(n)), num=n) state_prices = [ p_ / sum(unnormalized_probs) for p_ in unnormalized_probs ] assert abs(sum(state_prices) - 1) < 1e-6 implied_offsets = solve_for_implied_offsets(prices=state_prices, density=density, nIter=5) implied_dividends = ability_implied_dividends(ability=implied_offsets, density=density) implied_state_prices = [1 / dvd for dvd in implied_dividends] relative_differences = [ abs(p1 - p2) / (p1) for p1, p2 in zip(state_prices, implied_state_prices) ] avg_l1_error = np.mean(np.abs(relative_differences)) errors.append(avg_l1_error) import matplotlib.pyplot as plt plt.clf() plt.scatter(race_sizes[:k + 1], errors) plt.xscale('log') plt.xlabel('Number of participants (n)') plt.ylabel('Mean relative error in win probability') plt.show()
def test_minimumPdf(): skew1 = skew_normal_density(L=L, unit=unit, a=1.5) skew2 = skew_normal_density(L=L, unit=unit, a=1.5, loc=-0.5) skew3 = skew_normal_density(L=L, unit=unit, a=1.5, loc=-1.0) best, multiplicity = winner_of_many([skew1, skew2, skew3])
trifecta = [ [ [0.]*n for _ in range(n) ] for _ in range(n) ] win = probabilities second = [ 0. ]*n third = [ 0. ]*n for k1,p1 in enumerate(probabilities): for k2,p2 in enumerate(probabilities): if k1 != k2: exacta[k1][k2]=harville_exacta(p1=p1,p2=p2) second[k2] += exacta[k1][k2] if k2>k2: quinella[k1][k2] = harville_exacta(p1=p1,p2=p2)+harville_exacta(p1=p2,p2=p1) for k3,p3 in enumerate(probabilities): trifecta[k1][k2][k3] = harville_trifecta(p1=p1,p2=p2,p3=p3) third[k3] += trifecta[k1][k2][k3] show = [ f+s+t for f,s,t in zip(win,second,third)] place = [ f+s for f, s in zip(win, second)] return exacta, quinella, trifecta, win, place, show DERBY = sorted( [150,6,66,10,30,55,33,50,80,125,15/2,100,25,80,40,125,28,66,100,150,40,100,20,10/13,20 ] ) if __name__=='__main__': from winning.lattice_conventions import STD_UNIT, STD_SCALE, STD_L, STD_A dividends = [ o+1.0 for o in DERBY ] bookmaker_ratios(nSamples=100, dividends=dividends, density=skew_normal_density(L=STD_L, unit=STD_UNIT, scale=STD_SCALE, a=2.0))
# The horse racing problem import numpy as np import math from winning.lattice import skew_normal_density from winning.lattice_calibration import implied_ability, ability_implied_dividends global OFFSETS OFFSETS = None HORSE_DIM = 500 # Maximum dimension global DIVIDENDS DIVIDENDS = None unit = 0.01 L = 500 DENSITY = skew_normal_density(L=500, unit=unit, a=1.5) def make_offsets(): global OFFSETS if OFFSETS is None: from datetime import datetime day_of_year = datetime.now().timetuple().tm_yday np.random.seed(day_of_year) OFFSETS = sorted(np.random.randn(HORSE_DIM) / unit) OFFSETS = [0] + [o - OFFSETS[0] for o in OFFSETS[1:]] return OFFSETS def make_dividends(n_dim): global DIVIDENDS if DIVIDENDS is None: DIVIDENDS = dict()
def test_five_skew(): mus = [-0.5, -0.25, 0, 1, 1.5] scales = [1.0, 1.5, 1.2, 1.3, 2.0] densities = [skew_normal_density(L=500, unit=0.01, scale=scale, loc=mu, a=1.0) for mu, scale in zip(mus, scales)] margin_0 = gaussian_copula_margin_0(densities, rho=0.9) return densities[0], margin_0
def centered_std_density(loc=0.0, L=STD_L, unit=STD_UNIT, scale=STD_SCALE): density = skew_normal_density(L=L, unit=unit, loc=loc, scale=scale, a=0) return center_density(density)
def test_pandas(): df = pd.DataFrame.from_records( [(0, 0, 0, 2538111, 359005, 0.05347642, 0.10070493, 0, 5.36531324e+00), (1, 1, 1, 3409581, 359005, 0.07840445, 0.0625, 0, 3.31150750e+00), (2, 2, 2, 3819246, 359005, 0.06438515, 0.04342162, 0, 4.38275355e+00), (3, 3, 3, 3918030, 359005, 0.11929426, 0.13157895, 0, 8.96644205e-01), (4, 4, 4, 6445648, 359005, 0.25561541, 0.14184397, 0, -4.07017362e+00), (5, 5, 5, 7253682, 359005, 0.04656697, 0.0551572, 1, 6.09217096e+00), (6, 6, 6, 7662326, 359005, 0.10802503, 0.08873114, 0, 1.47559509e+00), (7, 7, 7, 7821483, 359005, 0.27423229, 0.37593985, 0, -4.58405876e+00), (8, 8, 8, 8556771, 359005, 0.06453406, 0.17636684, 1, 4.37122690e+00), (9, 9, 9, 8587406, 359005, 0.0929625, 0.04448399, 0, 2.34762741e+00), (10, 10, 10, 8612570, 359005, 0.3112959, 0.32786885, 0, -5.50559722e+00), (11, 11, 11, 8659871, 359005, 0.2446232, 0.21691974, 0, -3.76675358e+00), (12, 12, 12, 8729667, 359005, 0.28658434, 0.2247191, 0, -4.89233675e+00), (13, 13, 13, 7535716, 359005, 0.29875097, 0.06839945, 0, -5.19427295e+00), (14, 14, 14, 7828354, 359005, 0.10779594, 0.08688097, 0, 1.48885804e+00), (15, 15, 15, 8536492, 359005, 0.08720067, 0.37878788, 0, 2.72439676e+00), (16, 16, 16, 8619562, 359005, 0.23639059, 0.23640662, 1, -3.53225559e+00), (17, 17, 17, 8782045, 359005, 0.05593858, 0.00974849, 0, 5.14294432e+00), (18, 18, 18, 8821235, 359005, 0.07596882, 0.0736377, 0, 3.48611432e+00), (19, 19, 19, 9045356, 359005, 0.13795443, 0.13888889, 0, 8.84023342e-03), (20, 20, 20, 4417089, 359005, 0.78657452, 0.69444444, 0, -1.32599941e+01), (21, 21, 21, 5036342, 359005, 0.07009541, 0.03617945, 0, 3.94074830e+00), (22, 22, 22, 7135207, 359005, 0.14333008, 0.27855153, 1, -2.29298260e-01)], columns=[ 'ndx1', 'ndx2', 'ndx3', 'product_id', 'survey_id', 'p1', 'p2', 'y', 'a_check' ]) df = add_skew_normal_ability_to_dataframe(df=df, by='survey_id', prob_col='p1', new_col='a') from winning.lattice import skew_normal_density unit = 0.01 density = skew_normal_density(L=700, a=1.0, unit=unit) df = add_ability_implied_state_price_to_dataframe(df=df, by='survey_id', density=density, unit=unit, ability_col='a', new_col='p3')
def demo(): skew = skew_normal_density(L=L, unit=unit, a=1.0) print("mean is " + str(mean_of_density(skew, unit=unit))) densitiesPlot([skew], unit=unit)
def test_two_skew(): density1 = skew_normal_density(L=50, unit=0.1, scale=1.0, loc=1.0, a=1.0) density2 = skew_normal_density(L=50, unit=0.1, scale=1.0, loc=0, a=1.0) densities = [density1, density2] state_prices = gaussian_copula_win(densities=densities, rho=0.85) return densities, state_prices