def main(): results = relay.ReadResults() speeds = relay.GetSpeeds(results) speeds = relay.BinData(speeds, 3, 12, 100) # plot the distribution of actual speeds pmf = thinkstats2.Pmf(speeds, 'actual speeds') # plot the biased distribution seen by the observer biased = ObservedPmf(pmf, 7.5, label='observed speeds') thinkplot.Pmf(biased) thinkplot.Save(root='observed_speeds', title='PMF of running speed', xlabel='speed (mph)', ylabel='PMF') cdf = thinkstats2.Cdf(pmf) cdf_biased = thinkstats2.Cdf(biased) thinkplot.PrePlot(2) thinkplot.Cdfs([cdf, cdf_biased]) thinkplot.Save(root='observed_speeds_cdf', title='CDF of running speed', xlabel='speed (mph)', ylabel='CDF')
# # As a result, runners were spread out along the course with little relationship between speed and location. When I joined the race, the runners near me were (pretty much) a random sample of the runners in the race. # # So where does the bias come from? During my time on the course, the chance of overtaking a runner, or being overtaken, is proportional to the difference in our speeds. I am more likely to catch a slow runner, and more likely to be caught by a fast runner. But runners at the same speed are unlikely to see each other. # # Write a function called `ObservedPmf` that takes a `Pmf` representing the actual distribution of runners’ speeds, and the speed of a running observer, and returns a new `Pmf` representing the distribution of runners’ speeds as seen by the observer. # # To test your function, you can use `relay.py`, which reads the results from the James Joyce Ramble 10K in Dedham MA and converts the pace of each runner to mph. # # Compute the distribution of speeds you would observe if you ran a relay race at 7 mph with this group of runners. #%% import relay results = relay.ReadResults() speeds = relay.GetSpeeds(results) speeds = relay.BinData(speeds, 3, 12, 100) #%% pmf = thinkstats2.Pmf(speeds, 'actual speeds') thinkplot.Pmf(pmf) thinkplot.Config(xlabel='Speed (mph)', ylabel='PMF') #%% # Solution goes here #%% # Solution goes here #%%