def PlotMarathon(data): """Plots the marathon data. data: """ print len(data) xs, ys = zip(*data) print xs, ys n = 29 fxs, fys = xs[:n], ys[:n] sxs, sys = xs[n:], ys[n:] inter, slope = correlation.LeastSquares(sxs, sys) print '(inter, slope):', inter, slope end = 2043 target = 26.21875 / 2 lxs, lys = MakeLine(target, 0, [1965, end]) pyplot.plot(lxs, lys, 'r') lxs, lys = MakeLine(inter, slope, [1965, end]) pyplot.plot(lxs, lys, 'b') #pyplot.plot(fxs, fys, 'go') pyplot.plot(sxs, sys, 'bo') xticks = range(1980, 2060, 20) pyplot.xticks(xticks) myplot.Save(root='world_record_predict', title='Marathon record projection', ylabel='mph', formats=['pdf', 'eps', 'png'])
def NormalPlot(ys, label, color='b', jitter=0.0, **line_options): """Makes a normal probability plot. Args: ys: sequence of values label: string label for the plotted line color: color string passed along to pyplot.plot jitter: float magnitude of jitter added to the ys line_options: dictionary of options for pyplot.plot """ n = len(ys) xs = [random.gauss(0.0, 1.0) for i in range(n)] xs.sort() ys = [y + random.uniform(-jitter, +jitter) for y in ys] ys.sort() inter, slope = correlation.LeastSquares(xs, ys) fit = correlation.FitLine(xs, inter, slope) pyplot.plot(*fit, color=color, linewidth=0.5, alpha=0.5) pyplot.plot(sorted(xs), sorted(ys), color=color, marker='.', label=label, markersize=3, alpha=0.1, **line_options)
def Pvalue(filename='heri.0', delta=0.033, n=100000): data = ReadData(filename) count = 0 for i in range(n): xs, ys = zip(*data) ys = list(ys) random.shuffle(ys) inter, slope = correlation.LeastSquares(xs, ys) if abs(slope) > delta: count += 1 return float(count) / n
def FitCdf(cdf): """Fits a line to the log CCDF and returns the slope. cdf: Cdf of RDT """ xs, ps = cdf.xs, cdf.ps cps = [1 - p for p in ps] xs = xs[1:-1] lcps = [math.log(p) for p in cps[1:-1]] _inter, slope = correlation.LeastSquares(xs, lcps) return -slope
def testLeastSquares(self): xs = [1, 2, 3] ys = [3, 6, 8] inter, slope = correlation.LeastSquares(xs, ys) self.assertAlmostEquals(inter, 0.66666666) self.assertAlmostEquals(slope, 2.5) res = correlation.Residuals(xs, ys, inter, slope) for got, exp in zip(res, [-0.166666666, 0.33333333, -0.16666666666]): self.assertAlmostEquals(got, exp) R2 = correlation.CoefDetermination(ys, res) self.assertAlmostEquals(R2, 0.986842105263)
def Fit(xs, ys): """Find the linear least squares fit between xs and ys.""" inter, slope = correlation.LeastSquares(xs, ys) print '(inter, slope):', inter, slope res = correlation.Residuals(xs, ys, inter, slope) R2 = correlation.CoefDetermination(ys, res) print 'inter', inter print 'slope', slope print 'R^2', R2 print return inter, slope, R2
def Fit(halfs, fulls): """Find the linear least squares fit between halfs and fulls.""" inter, slope = correlation.LeastSquares(halfs, fulls) print '(inter, slope):', inter, slope res = correlation.Residuals(halfs, fulls, inter, slope) R2 = correlation.CoefDetermination(fulls, res) print 'inter', inter print 'slope', slope print 'R^2', R2 print print 'prediction', inter + slope * ConvertTimeToMinutes('1:34:05') return inter, slope, R2
def FitLine(xs, ys, fxs): """Fits a line to the xs and ys, and returns fitted values for fxs. Applies a log transform to the xs. xs: diameter in cm ys: age in years fxs: diameter in cm """ lxs = [math.log(x) for x in xs] inter, slope = correlation.LeastSquares(lxs, ys) # res = correlation.Residuals(lxs, ys, inter, slope) # r2 = correlation.CoefDetermination(ys, res) lfxs = [math.log(x) for x in fxs] fys = [inter + slope * x for x in lfxs] return fys
def ComputeLeastSquares(ages, weights): """Computes least squares fit for ages and weights. Prints summary statistics. """ # compute the correlation between age and weight print 'Pearson correlation', correlation.Corr(ages, weights) print 'Spearman correlation', correlation.SpearmanCorr(ages, weights) # compute least squares fit inter, slope = correlation.LeastSquares(ages, weights) print '(inter, slope):', inter, slope res = correlation.Residuals(ages, weights, inter, slope) R2 = correlation.CoefDetermination(weights, res) print 'R^2', R2 print return inter, slope, R2
def ComputeCorrelations(): resp = brfss_scatter.Respondents() resp.ReadRecords() print('Number of records:', len(resp.records)) heights, weights = resp.GetHeightWeight() pearson = correlation.Corr(heights, weights) print('Pearson correlation (weights):', pearson) log_weights = Log(weights) pearson = correlation.Corr(heights, log_weights) print('Pearson correlation (log weights):', pearson) spearman = correlation.SpearmanCorr(heights, weights) print('Spearman correlation (weights):', spearman) inter, slope = correlation.LeastSquares(heights, log_weights) print('Least squares inter, slope (log weights):', inter, slope) res = correlation.Residuals(heights, log_weights, inter, slope) R2 = correlation.CoefDetermination(log_weights, res) print('Coefficient of determination:', R2) print('sqrt(R^2):', math.sqrt(R2))