def test_weightstats_2(self): x1, x2 = self.x1, self.x2 w1, w2 = self.w1, self.w2 d1 = DescrStatsW(x1) d1w = DescrStatsW(x1, weights=w1) d2w = DescrStatsW(x2, weights=w2) x1r = d1w.asrepeats() x2r = d2w.asrepeats() # print 'random weights' # print ttest_ind(x1, x2, weights=(w1, w2)) # print stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], stats.ttest_ind(x1r, x2r), 14) # not the same as new version with random weights/replication # assert x1r.shape[0] == d1w.sum_weights # assert x2r.shape[0] == d2w.sum_weights assert_almost_equal(x2r.mean(0), d2w.mean, 14) assert_almost_equal(x2r.var(), d2w.var, 14) assert_almost_equal(x2r.std(), d2w.std, 14) # note: the following is for 1d assert_almost_equal(np.cov(x2r, bias=1), d2w.cov, 14) # assert_almost_equal(np.corrcoef(np.x2r), d2w.corrcoef, 19) # TODO: exception in corrcoef (scalar case) # one-sample tests # print d1.ttest_mean(3) # print stats.ttest_1samp(x1, 3) # print d1w.ttest_mean(3) # print stats.ttest_1samp(x1r, 3) assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11) assert_almost_equal(d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)
def test_weightstats_2(self): x1, x2 = self.x1, self.x2 w1, w2 = self.w1, self.w2 d1 = DescrStatsW(x1) d1w = DescrStatsW(x1, weights=w1) d2w = DescrStatsW(x2, weights=w2) x1r = d1w.asrepeats() x2r = d2w.asrepeats() # print 'random weights' # print ttest_ind(x1, x2, weights=(w1, w2)) # print stats.ttest_ind(x1r, x2r) assert_almost_equal( ttest_ind(x1, x2, weights=(w1, w2))[:2], stats.ttest_ind(x1r, x2r), 14) #not the same as new version with random weights/replication # assert x1r.shape[0] == d1w.sum_weights # assert x2r.shape[0] == d2w.sum_weights assert_almost_equal(x2r.mean(0), d2w.mean, 14) assert_almost_equal(x2r.var(), d2w.var, 14) assert_almost_equal(x2r.std(), d2w.std, 14) #note: the following is for 1d assert_almost_equal(np.cov(x2r, bias=1), d2w.cov, 14) #assert_almost_equal(np.corrcoef(np.x2r), d2w.corrcoef, 19) #TODO: exception in corrcoef (scalar case) #one-sample tests # print d1.ttest_mean(3) # print stats.ttest_1samp(x1, 3) # print d1w.ttest_mean(3) # print stats.ttest_1samp(x1r, 3) assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11) assert_almost_equal( d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)
def compute_summary_statistics(dbm: database_manager.DatabaseManager, tbl_name: str) -> Optional[Dict[str, Tuple]]: """ Computes summary statistics for given table. :param dbm: A DatabaseManager instance. :param tbl_name: name of the table to compute monthly return for. :return: dictionary containing various statistics. """ df, info, start_date = finance_metrics.compute_monthly_returns(dbm, tbl_name) if df is not None and info is not None: stat = {} dsw = DescrStatsW(df['Monthly_Return'].values) stat['table_name'] = tbl_name stat['contract_name'] = info[1] stat['type'] = info[3] if info[3] is not None else None stat['subtype'] = info[4] if info[4] is not None else None stat['start-date'] = start_date stat['ar'] = df['Monthly_Return'].mean() * 12 stat['vol'] = df['Monthly_Return'].std() * np.sqrt(12) stat['t-stat'] = dsw.ttest_mean(alternative='larger')[0] stat['p-value'] = dsw.ttest_mean(alternative='larger')[1] stat['kurt'] = df['Monthly_Return'].kurt() stat['skew'] = df['Monthly_Return'].skew() return stat return None
def test_weightstats_2(self): x1, x2 = self.x1, self.x2 w1, w2 = self.w1, self.w2 d1 = DescrStatsW(x1) d1w = DescrStatsW(x1, weights=w1) d2w = DescrStatsW(x2, weights=w2) x1r = d1w.asrepeats() x2r = d2w.asrepeats() # print 'random weights' # print ttest_ind(x1, x2, weights=(w1, w2)) # print stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], stats.ttest_ind(x1r, x2r), 14) #not the same as new version with random weights/replication # assert x1r.shape[0] == d1w.sum_weights # assert x2r.shape[0] == d2w.sum_weights assert_almost_equal(x2r.var(), d2w.var, 14) assert_almost_equal(x2r.std(), d2w.std, 14) #one-sample tests # print d1.ttest_mean(3) # print stats.ttest_1samp(x1, 3) # print d1w.ttest_mean(3) # print stats.ttest_1samp(x1r, 3) assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11) assert_almost_equal(d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)
def test_weightstats_3(self): x1_2d, x2_2d = self.x1_2d, self.x2_2d w1, w2 = self.w1, self.w2 d1w_2d = DescrStatsW(x1_2d, weights=w1) d2w_2d = DescrStatsW(x2_2d, weights=w2) x1r_2d = d1w_2d.asrepeats() x2r_2d = d2w_2d.asrepeats() assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14) assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14) assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14) assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14) assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14) # print d1w_2d.ttest_mean(3) # #scipy.stats.ttest is also vectorized # print stats.ttest_1samp(x1r_2d, 3) t, p, d = d1w_2d.ttest_mean(3) assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11) #print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T] cm = CompareMeans(d1w_2d, d2w_2d) ressm = cm.ttest_ind() resss = stats.ttest_ind(x1r_2d, x2r_2d) assert_almost_equal(ressm[:2], resss, 14)
def test_weightstats_3(self): x1_2d, x2_2d = self.x1_2d, self.x2_2d w1, w2 = self.w1, self.w2 d1w_2d = DescrStatsW(x1_2d, weights=w1) d2w_2d = DescrStatsW(x2_2d, weights=w2) x1r_2d = d1w_2d.asrepeats() x2r_2d = d2w_2d.asrepeats() assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14) assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14) assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14) assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14) assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14) # print d1w_2d.ttest_mean(3) # #scipy.stats.ttest is also vectorized # print stats.ttest_1samp(x1r_2d, 3) t, p, d = d1w_2d.ttest_mean(3) assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11) # print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T] cm = CompareMeans(d1w_2d, d2w_2d) ressm = cm.ttest_ind() resss = stats.ttest_ind(x1r_2d, x2r_2d) assert_almost_equal(ressm[:2], resss, 14)
def test_weightstats_ddof_tests(self): # explicit test that ttest and confint are independent of ddof # one sample case x1_2d = self.x1_2d w1 = self.w1 d1w_d0 = DescrStatsW(x1_2d, weights=w1, ddof=0) d1w_d1 = DescrStatsW(x1_2d, weights=w1, ddof=1) d1w_d2 = DescrStatsW(x1_2d, weights=w1, ddof=2) #check confint independent of user ddof res0 = d1w_d0.ttest_mean() res1 = d1w_d1.ttest_mean() res2 = d1w_d2.ttest_mean() # concatenate into one array with np.r_ assert_almost_equal(np.r_[res1], np.r_[res0], 14) assert_almost_equal(np.r_[res2], np.r_[res0], 14) res0 = d1w_d0.ttest_mean(0.5) res1 = d1w_d1.ttest_mean(0.5) res2 = d1w_d2.ttest_mean(0.5) assert_almost_equal(np.r_[res1], np.r_[res0], 14) assert_almost_equal(np.r_[res2], np.r_[res0], 14) #check confint independent of user ddof res0 = d1w_d0.tconfint_mean() res1 = d1w_d1.tconfint_mean() res2 = d1w_d2.tconfint_mean() assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14)
def test_weightstats_ddof_tests(self): # explicit test that ttest and confint are independent of ddof # one sample case x1_2d = self.x1_2d w1 = self.w1 d1w_d0 = DescrStatsW(x1_2d, weights=w1, ddof=0) d1w_d1 = DescrStatsW(x1_2d, weights=w1, ddof=1) d1w_d2 = DescrStatsW(x1_2d, weights=w1, ddof=2) # check confint independent of user ddof res0 = d1w_d0.ttest_mean() res1 = d1w_d1.ttest_mean() res2 = d1w_d2.ttest_mean() # concatenate into one array with np.r_ assert_almost_equal(np.r_[res1], np.r_[res0], 14) assert_almost_equal(np.r_[res2], np.r_[res0], 14) res0 = d1w_d0.ttest_mean(0.5) res1 = d1w_d1.ttest_mean(0.5) res2 = d1w_d2.ttest_mean(0.5) assert_almost_equal(np.r_[res1], np.r_[res0], 14) assert_almost_equal(np.r_[res2], np.r_[res0], 14) # check confint independent of user ddof res0 = d1w_d0.tconfint_mean() res1 = d1w_d1.tconfint_mean() res2 = d1w_d2.tconfint_mean() assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14)
def one_t_test(pdf, data_measlevs, var_name, test_value=0): """One sample t-test arguments: var_name (str): Name of the variable to test. test_value (numeric): Test against this value. return: text_result (html str): Result in APA format. image (matplotlib): Bar chart with mean and confidence interval. """ text_result = '' data = pdf[var_name].dropna() if data_measlevs[var_name] in ['int', 'unk']: if data_measlevs[var_name] == 'unk': text_result += warn_unknown_variable if len(set(data)) == 1: return _('One sample t-test cannot be run for constant variable.\n' ), None data = pdf[var_name].dropna() descr = DescrStatsW(data) t, p, df = descr.ttest_mean(float(test_value)) if LooseVersion(csc.versions['statsmodels']) >= LooseVersion('0.5'): # Or we could use confidence_interval_t cil, cih = descr.tconfint_mean() ci = (cih - cil) / 2 prec = cs_util.precision(data) + 1 ci_text = '[%0.*f, %0.*f]' % (prec, cil, prec, cih) else: ci = 0 # only with statsmodels ci_text = _( 'Sorry, newer statsmodels module is required for confidence interval.\n' ) text_result += _('One sample t-test against %g') % float( test_value) + ': <i>t</i>(%d) = %0.3g, %s\n' % (df, t, cs_util.print_p(p)) # Graph image = cs_chart.create_variable_population_chart(data, var_name, ci) else: text_result += _( 'One sample t-test is computed only for interval variables.') image = None return ci_text, text_result, image
def test_weightstats_3(self): x1_2d, x2_2d = self.x1_2d, self.x2_2d w1, w2 = self.w1, self.w2 d1w_2d = DescrStatsW(x1_2d, weights=w1) d2w_2d = DescrStatsW(x2_2d, weights=w2) x1r_2d = d1w_2d.asrepeats() x2r_2d = d2w_2d.asrepeats() # print d1w_2d.ttest_mean(3) # #scipy.stats.ttest is also vectorized # print stats.ttest_1samp(x1r_2d, 3) t,p,d = d1w_2d.ttest_mean(3) assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11) #print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T] ressm = CompareMeans(d1w_2d, d2w_2d).ttest_ind() resss = stats.ttest_ind(x1r_2d, x2r_2d) assert_almost_equal(ressm[:2], resss, 14)
def compute_rule(self): daily_ret_log = np.log(self.daily_ret + 1) df = pd.DataFrame() N = self.daily_ret.shape[0] iter = 0 for asset in daily_ret_log.columns: print('TREND Progress: {}%'.format( int(iter * 100 / daily_ret_log.shape[1]))) data = daily_ret_log[asset] first_not_null = 0 t_scores = [] for i in range(N): if np.isnan(data.iloc[i]): first_not_null += 1 # TODO verify t_scores.append(0) continue if i <= first_not_null + self.lookback: t_scores.append(0) continue stats = DescrStatsW(data.iloc[i - self.lookback:i]) if stats.std_mean == 0: print('Period of all zeroes for asset {} from {}'.format( asset, data.index[i - self.lookback])) t_scores.append(0) continue t_scores.append(stats.ttest_mean(0, 'larger')[0]) df[asset] = np.clip(t_scores, -1.0, 1.0) iter += 1 df['index'] = self.daily_ret.index df.set_index('index', inplace=True) return df
my_knn_socres = cross_val_score(KNeighborsRegressor(n_neighbors=5), X, y, cv=LeaveOneOut(), scoring='neg_mean_squared_error') (-my_lm_scores.mean())**0.5 #> 15.697306009399101 # 線形回帰分析 (-my_knn_socres.mean())**0.5 #> 16.07308308943869 # K最近傍法 my_df = pd.DataFrame({'lm': -my_lm_scores, 'knn': -my_knn_socres}) my_df.head() #> lm knn #> 0 18.913720 108.16 #> 1 179.215044 0.64 #> 2 41.034336 64.00 #> 3 168.490212 184.96 #> 4 5.085308 0.00 my_df.boxplot().set_ylabel("$r^2$") from statsmodels.stats.weightstats import DescrStatsW d = DescrStatsW(my_df.lm - my_df.knn) d.ttest_mean()[1] # p値 #> 0.6952755720536115 d.tconfint_mean(alpha=0.05, alternative='two-sided') # 信頼区間 #> (-72.8275283312228, 48.95036023665703)
X = [ 32.1, 26.2, 27.5, 31.8, 32.1, 31.2, 30.1, 32.4, 32.3, 29.9, 29.6, 26.6, 31.2, 30.9, 29.3 ] Y = [ 35.4, 34.6, 31.1, 32.4, 33.3, 34.7, 35.3, 34.3, 32.1, 28.3, 33.3, 30.5, 32.6, 33.3, 32.2 ] a = 0.05 # 有意水準(デフォルト) = 1 - 信頼係数 alt = 'two-sided' # 両側検定(デフォルト) # 左片側検定なら'smaller' # 右片側検定なら'larger' d = DescrStatsW(np.array(X) - np.array(Y)) # 対標本の場合 d.ttest_mean(alternative=alt)[1] # p値 #> 0.0006415571512322235 d.tconfint_mean(alpha=a, alternative=alt) # 信頼区間 #> (-3.9955246743198867, -1.3644753256801117) c = CompareMeans(DescrStatsW(X), DescrStatsW(Y)) # 対標本でない場合 ve = 'pooled' # 等分散を仮定する(デフォルト).仮定しないなら'unequal'. c.ttest_ind(alternative=alt, usevar=ve)[1] # p値 #> 0.000978530937238609 c.tconfint_diff(alpha=a, alternative=alt, usevar=ve) # 信頼区間 #> (-4.170905570517185, -1.1890944294828283) ### 4.4.4 独立性の検定(カイ2乗検定)
# Critério do valor p # Teste Unicaudal Superior # Rejeitar H_0 se o valor p\leq\alpha p_valor = t_student.sf(t, df=24) p_valor <= significancia # Outra forma de obter a resposta from statsmodels.stats.weightstats import DescrStatsW test = DescrStatsW(amostra) t, p_valor, df = test.ttest_mean(value=media, alternative='larger') print('Valor de t', t[0]) print('P-valor', p_valor[0]) print('Média do data frame ', df) p_valor[0] <= significancia # Conclusão: Com um nível de confiança de 95% não podemos rejeitar H_0, # ou seja, a alegação do fabricante é verdadeira # Teste Duas Variáveis # Seleção das amostras df.head() df['Species'].unique()