def testCov(self): t = [0, 4, 7, 3, 8, 1, 6, 2, 9, 5] a = np.array(t) t2 = [5, 4, 3, 0, 8, 9, 7, 6, 2, 1] self.assertAlmostEqual(thinkstats2.Cov(t, a), 8.25) self.assertAlmostEqual(thinkstats2.Cov(t, -a), -8.25) self.assertAlmostEqual(thinkstats2.Corr(t, a), 1) self.assertAlmostEqual(thinkstats2.Corr(t, -a), -1) self.assertAlmostEqual(thinkstats2.Corr(t, t2), -0.1878787878) self.assertAlmostEqual(thinkstats2.SpearmanCorr(t, -a), -1) self.assertAlmostEqual(thinkstats2.SpearmanCorr(t, t2), -0.1878787878)
def scatter(x): tot_crimes = df.Total_crimes thinkplot.Scatter(df[x], tot_crimes, alpha=.5) if x == 'month': thinkplot.Show(title="Total Crimes vs Time", xlabel="Year", ylabel="Total Crimes") else: thinkplot.Show(title="Total Crimes vs " + x + " Crimes", xlabel=x + " Crimes", ylabel="Total Crimes") print(x + " crime stats") print("Spearman's correlation:", thinkstats2.SpearmanCorr(tot_crimes, df[x])) print("Covariance:", thinkstats2.Cov(tot_crimes, df[x])) print()
def main(): random.seed(17) rho = -0.8 res = CorrelatedGenerator(1000, rho) xs, ys = zip(*res) a = 1.0 b = 0.0 xs = [a * x + b for x in xs] print 'mean, var of x', thinkstats2.MeanVar(xs) print 'mean, var of y', thinkstats2.MeanVar(ys) print 'covariance', thinkstats2.Cov(xs, ys) print 'Pearson corr', thinkstats2.Corr(xs, ys) print 'Spearman corr', thinkstats2.SpearmanCorr(xs, ys) thinkplot.Scatter(xs, ys) thinkplot.Show()
def Correlations(df): print('pandas cov', df.htm3.cov(df.wtkg2)) #print('NumPy cov', np.cov(df.htm3, df.wtkg2, ddof=0)) print('thinkstats2 Cov', thinkstats2.Cov(df.htm3, df.wtkg2)) print() print('pandas corr', df.htm3.corr(df.wtkg2)) #print('NumPy corrcoef', np.corrcoef(df.htm3, df.wtkg2, ddof=0)) print('thinkstats2 Corr', thinkstats2.Corr(df.htm3, df.wtkg2)) print() print('pandas corr spearman', df.htm3.corr(df.wtkg2, method='spearman')) print('thinkstats2 SpearmanCorr', thinkstats2.SpearmanCorr(df.htm3, df.wtkg2)) print('thinkstats2 SpearmanCorr log wtkg3', thinkstats2.SpearmanCorr(df.htm3, np.log(df.wtkg2))) print() print('thinkstats2 Corr log wtkg3', thinkstats2.Corr(df.htm3, np.log(df.wtkg2))) print()
def ComputeAirlineArrivalDelayCorrelations(flights): """Compute the different correlations. This is similar to Correlations() in scatter.py """ flights = flights.dropna(subset=['AIRLINE', 'ARRIVAL_DELAY']) print('pandas cov', flights.AIRLINE_CODE.cov(flights.ARRIVAL_DELAY)) print('thinkstats2 Cov', thinkstats2.Cov(flights.AIRLINE_CODE, flights.ARRIVAL_DELAY)) print() print('pandas corr Pearson', flights.AIRLINE_CODE.corr(flights.ARRIVAL_DELAY)) print('thinkstats2 Corr Pearson', thinkstats2.Corr(flights.AIRLINE_CODE, flights.ARRIVAL_DELAY)) print() print('pandas corr spearman', flights.AIRLINE_CODE.corr(flights.ARRIVAL_DELAY, method='spearman')) print( 'thinkstats2 SpearmanCorr', thinkstats2.SpearmanCorr(flights.AIRLINE_CODE, flights.ARRIVAL_DELAY)) print()