def test_weightstats_2(self):
        x1, x2 = self.x1, self.x2
        w1, w2 = self.w1, self.w2

        d1 = DescrStatsW(x1)
        d1w = DescrStatsW(x1, weights=w1)
        d2w = DescrStatsW(x2, weights=w2)
        x1r = d1w.asrepeats()
        x2r = d2w.asrepeats()
#        print 'random weights'
#        print ttest_ind(x1, x2, weights=(w1, w2))
#        print stats.ttest_ind(x1r, x2r)
        assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2],
                            stats.ttest_ind(x1r, x2r), 14)
        # not the same as new version with random weights/replication
#        assert x1r.shape[0] == d1w.sum_weights
#        assert x2r.shape[0] == d2w.sum_weights

        assert_almost_equal(x2r.mean(0), d2w.mean, 14)
        assert_almost_equal(x2r.var(), d2w.var, 14)
        assert_almost_equal(x2r.std(), d2w.std, 14)
        # note: the following is for 1d
        assert_almost_equal(np.cov(x2r, bias=1), d2w.cov, 14)
        # assert_almost_equal(np.corrcoef(np.x2r), d2w.corrcoef, 19)
        # TODO: exception in corrcoef (scalar case)

        # one-sample tests
#        print d1.ttest_mean(3)
#        print stats.ttest_1samp(x1, 3)
#        print d1w.ttest_mean(3)
#        print stats.ttest_1samp(x1r, 3)
        assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11)
        assert_almost_equal(d1w.ttest_mean(3)[:2],
                            stats.ttest_1samp(x1r, 3), 11)
    def test_weightstats_2(self):
        x1, x2 = self.x1, self.x2
        w1, w2 = self.w1, self.w2

        d1 = DescrStatsW(x1)
        d1w = DescrStatsW(x1, weights=w1)
        d2w = DescrStatsW(x2, weights=w2)
        x1r = d1w.asrepeats()
        x2r = d2w.asrepeats()
        #        print 'random weights'
        #        print ttest_ind(x1, x2, weights=(w1, w2))
        #        print stats.ttest_ind(x1r, x2r)
        assert_almost_equal(
            ttest_ind(x1, x2, weights=(w1, w2))[:2], stats.ttest_ind(x1r, x2r),
            14)
        #not the same as new version with random weights/replication
        #        assert x1r.shape[0] == d1w.sum_weights
        #        assert x2r.shape[0] == d2w.sum_weights

        assert_almost_equal(x2r.mean(0), d2w.mean, 14)
        assert_almost_equal(x2r.var(), d2w.var, 14)
        assert_almost_equal(x2r.std(), d2w.std, 14)
        #note: the following is for 1d
        assert_almost_equal(np.cov(x2r, bias=1), d2w.cov, 14)
        #assert_almost_equal(np.corrcoef(np.x2r), d2w.corrcoef, 19)
        #TODO: exception in corrcoef (scalar case)

        #one-sample tests
        #        print d1.ttest_mean(3)
        #        print stats.ttest_1samp(x1, 3)
        #        print d1w.ttest_mean(3)
        #        print stats.ttest_1samp(x1r, 3)
        assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11)
        assert_almost_equal(
            d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)
Example #3
0
def compute_summary_statistics(dbm: database_manager.DatabaseManager, tbl_name: str) -> Optional[Dict[str, Tuple]]:
    """
    Computes summary statistics for given table.
    :param dbm: A DatabaseManager instance.
    :param tbl_name: name of the table to compute monthly return for.
    :return: dictionary containing various statistics.
    """
    df, info, start_date = finance_metrics.compute_monthly_returns(dbm, tbl_name)

    if df is not None and info is not None:
        stat = {}

        dsw = DescrStatsW(df['Monthly_Return'].values)

        stat['table_name'] = tbl_name
        stat['contract_name'] = info[1]
        stat['type'] = info[3] if info[3] is not None else None
        stat['subtype'] = info[4] if info[4] is not None else None
        stat['start-date'] = start_date
        stat['ar'] = df['Monthly_Return'].mean() * 12
        stat['vol'] = df['Monthly_Return'].std() * np.sqrt(12)
        stat['t-stat'] = dsw.ttest_mean(alternative='larger')[0]
        stat['p-value'] = dsw.ttest_mean(alternative='larger')[1]
        stat['kurt'] = df['Monthly_Return'].kurt()
        stat['skew'] = df['Monthly_Return'].skew()

        return stat

    return None
Example #4
0
    def test_weightstats_2(self):
        x1, x2 = self.x1, self.x2
        w1, w2 = self.w1, self.w2

        d1 = DescrStatsW(x1)
        d1w = DescrStatsW(x1, weights=w1)
        d2w = DescrStatsW(x2, weights=w2)
        x1r = d1w.asrepeats()
        x2r = d2w.asrepeats()
#        print 'random weights'
#        print ttest_ind(x1, x2, weights=(w1, w2))
#        print stats.ttest_ind(x1r, x2r)
        assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2],
                            stats.ttest_ind(x1r, x2r), 14)
        #not the same as new version with random weights/replication
#        assert x1r.shape[0] == d1w.sum_weights
#        assert x2r.shape[0] == d2w.sum_weights
        assert_almost_equal(x2r.var(), d2w.var, 14)
        assert_almost_equal(x2r.std(), d2w.std, 14)


        #one-sample tests
#        print d1.ttest_mean(3)
#        print stats.ttest_1samp(x1, 3)
#        print d1w.ttest_mean(3)
#        print stats.ttest_1samp(x1r, 3)
        assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11)
        assert_almost_equal(d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)
    def test_weightstats_3(self):
        x1_2d, x2_2d = self.x1_2d, self.x2_2d
        w1, w2 = self.w1, self.w2

        d1w_2d = DescrStatsW(x1_2d, weights=w1)
        d2w_2d = DescrStatsW(x2_2d, weights=w2)
        x1r_2d = d1w_2d.asrepeats()
        x2r_2d = d2w_2d.asrepeats()

        assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14)
        assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14)
        assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14)
        assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14)
        assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14)

        #        print d1w_2d.ttest_mean(3)
        #        #scipy.stats.ttest is also vectorized
        #        print stats.ttest_1samp(x1r_2d, 3)
        t, p, d = d1w_2d.ttest_mean(3)
        assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11)
        #print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T]
        cm = CompareMeans(d1w_2d, d2w_2d)
        ressm = cm.ttest_ind()
        resss = stats.ttest_ind(x1r_2d, x2r_2d)
        assert_almost_equal(ressm[:2], resss, 14)
    def test_weightstats_3(self):
        x1_2d, x2_2d = self.x1_2d, self.x2_2d
        w1, w2 = self.w1, self.w2

        d1w_2d = DescrStatsW(x1_2d, weights=w1)
        d2w_2d = DescrStatsW(x2_2d, weights=w2)
        x1r_2d = d1w_2d.asrepeats()
        x2r_2d = d2w_2d.asrepeats()

        assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14)
        assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14)
        assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14)
        assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14)
        assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14)

#        print d1w_2d.ttest_mean(3)
#        #scipy.stats.ttest is also vectorized
#        print stats.ttest_1samp(x1r_2d, 3)
        t, p, d = d1w_2d.ttest_mean(3)
        assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11)
        # print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T]
        cm = CompareMeans(d1w_2d, d2w_2d)
        ressm = cm.ttest_ind()
        resss = stats.ttest_ind(x1r_2d, x2r_2d)
        assert_almost_equal(ressm[:2], resss, 14)
    def test_weightstats_ddof_tests(self):
        # explicit test that ttest and confint are independent of ddof
        # one sample case
        x1_2d = self.x1_2d
        w1 = self.w1

        d1w_d0 = DescrStatsW(x1_2d, weights=w1, ddof=0)
        d1w_d1 = DescrStatsW(x1_2d, weights=w1, ddof=1)
        d1w_d2 = DescrStatsW(x1_2d, weights=w1, ddof=2)

        #check confint independent of user ddof
        res0 = d1w_d0.ttest_mean()
        res1 = d1w_d1.ttest_mean()
        res2 = d1w_d2.ttest_mean()
        # concatenate into one array with np.r_
        assert_almost_equal(np.r_[res1], np.r_[res0], 14)
        assert_almost_equal(np.r_[res2], np.r_[res0], 14)

        res0 = d1w_d0.ttest_mean(0.5)
        res1 = d1w_d1.ttest_mean(0.5)
        res2 = d1w_d2.ttest_mean(0.5)
        assert_almost_equal(np.r_[res1], np.r_[res0], 14)
        assert_almost_equal(np.r_[res2], np.r_[res0], 14)

        #check confint independent of user ddof
        res0 = d1w_d0.tconfint_mean()
        res1 = d1w_d1.tconfint_mean()
        res2 = d1w_d2.tconfint_mean()
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)
    def test_weightstats_ddof_tests(self):
        # explicit test that ttest and confint are independent of ddof
        # one sample case
        x1_2d = self.x1_2d
        w1 = self.w1

        d1w_d0 = DescrStatsW(x1_2d, weights=w1, ddof=0)
        d1w_d1 = DescrStatsW(x1_2d, weights=w1, ddof=1)
        d1w_d2 = DescrStatsW(x1_2d, weights=w1, ddof=2)

        # check confint independent of user ddof
        res0 = d1w_d0.ttest_mean()
        res1 = d1w_d1.ttest_mean()
        res2 = d1w_d2.ttest_mean()
        # concatenate into one array with np.r_
        assert_almost_equal(np.r_[res1], np.r_[res0], 14)
        assert_almost_equal(np.r_[res2], np.r_[res0], 14)

        res0 = d1w_d0.ttest_mean(0.5)
        res1 = d1w_d1.ttest_mean(0.5)
        res2 = d1w_d2.ttest_mean(0.5)
        assert_almost_equal(np.r_[res1], np.r_[res0], 14)
        assert_almost_equal(np.r_[res2], np.r_[res0], 14)

        # check confint independent of user ddof
        res0 = d1w_d0.tconfint_mean()
        res1 = d1w_d1.tconfint_mean()
        res2 = d1w_d2.tconfint_mean()
        assert_almost_equal(res1, res0, 14)
        assert_almost_equal(res2, res0, 14)
Example #9
0
def one_t_test(pdf, data_measlevs, var_name, test_value=0):
    """One sample t-test
    
    arguments:
    var_name (str):
        Name of the variable to test.
    test_value (numeric):
        Test against this value.
        
    return:
    text_result (html str):
        Result in APA format.
    image (matplotlib):
        Bar chart with mean and confidence interval.
    """
    text_result = ''
    data = pdf[var_name].dropna()
    if data_measlevs[var_name] in ['int', 'unk']:
        if data_measlevs[var_name] == 'unk':
            text_result += warn_unknown_variable
        if len(set(data)) == 1:
            return _('One sample t-test cannot be run for constant variable.\n'
                     ), None

        data = pdf[var_name].dropna()
        descr = DescrStatsW(data)
        t, p, df = descr.ttest_mean(float(test_value))
        if LooseVersion(csc.versions['statsmodels']) >= LooseVersion('0.5'):
            # Or we could use confidence_interval_t
            cil, cih = descr.tconfint_mean()
            ci = (cih - cil) / 2
            prec = cs_util.precision(data) + 1
            ci_text = '[%0.*f, %0.*f]' % (prec, cil, prec, cih)
        else:
            ci = 0  # only with statsmodels
            ci_text = _(
                'Sorry, newer statsmodels module is required for confidence interval.\n'
            )
        text_result += _('One sample t-test against %g') % float(
            test_value) + ': <i>t</i>(%d) = %0.3g, %s\n' % (df, t,
                                                            cs_util.print_p(p))

        # Graph
        image = cs_chart.create_variable_population_chart(data, var_name, ci)
    else:
        text_result += _(
            'One sample t-test is computed only for interval variables.')
        image = None
    return ci_text, text_result, image
Example #10
0
    def test_weightstats_3(self):
        x1_2d, x2_2d = self.x1_2d, self.x2_2d
        w1, w2 = self.w1, self.w2

        d1w_2d = DescrStatsW(x1_2d, weights=w1)
        d2w_2d = DescrStatsW(x2_2d, weights=w2)
        x1r_2d = d1w_2d.asrepeats()
        x2r_2d = d2w_2d.asrepeats()
#        print d1w_2d.ttest_mean(3)
#        #scipy.stats.ttest is also vectorized
#        print stats.ttest_1samp(x1r_2d, 3)
        t,p,d = d1w_2d.ttest_mean(3)
        assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11)
        #print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T]
        ressm = CompareMeans(d1w_2d, d2w_2d).ttest_ind()
        resss = stats.ttest_ind(x1r_2d, x2r_2d)
        assert_almost_equal(ressm[:2], resss, 14)
Example #11
0
    def compute_rule(self):
        daily_ret_log = np.log(self.daily_ret + 1)
        df = pd.DataFrame()
        N = self.daily_ret.shape[0]

        iter = 0
        for asset in daily_ret_log.columns:
            print('TREND Progress: {}%'.format(
                int(iter * 100 / daily_ret_log.shape[1])))

            data = daily_ret_log[asset]
            first_not_null = 0
            t_scores = []

            for i in range(N):
                if np.isnan(data.iloc[i]):
                    first_not_null += 1
                    # TODO verify
                    t_scores.append(0)
                    continue

                if i <= first_not_null + self.lookback:
                    t_scores.append(0)
                    continue

                stats = DescrStatsW(data.iloc[i - self.lookback:i])

                if stats.std_mean == 0:
                    print('Period of all zeroes for asset {} from {}'.format(
                        asset, data.index[i - self.lookback]))
                    t_scores.append(0)
                    continue

                t_scores.append(stats.ttest_mean(0, 'larger')[0])

            df[asset] = np.clip(t_scores, -1.0, 1.0)

            iter += 1

        df['index'] = self.daily_ret.index
        df.set_index('index', inplace=True)

        return df
Example #12
0
my_knn_socres = cross_val_score(KNeighborsRegressor(n_neighbors=5),
                                X,
                                y,
                                cv=LeaveOneOut(),
                                scoring='neg_mean_squared_error')

(-my_lm_scores.mean())**0.5
#> 15.697306009399101 # 線形回帰分析

(-my_knn_socres.mean())**0.5
#> 16.07308308943869 # K最近傍法

my_df = pd.DataFrame({'lm': -my_lm_scores, 'knn': -my_knn_socres})
my_df.head()
#>            lm     knn
#> 0   18.913720  108.16
#> 1  179.215044    0.64
#> 2   41.034336   64.00
#> 3  168.490212  184.96
#> 4    5.085308    0.00

my_df.boxplot().set_ylabel("$r^2$")

from statsmodels.stats.weightstats import DescrStatsW
d = DescrStatsW(my_df.lm - my_df.knn)
d.ttest_mean()[1]  # p値
#> 0.6952755720536115

d.tconfint_mean(alpha=0.05, alternative='two-sided')  # 信頼区間
#> (-72.8275283312228, 48.95036023665703)
Example #13
0
X = [
    32.1, 26.2, 27.5, 31.8, 32.1, 31.2, 30.1, 32.4, 32.3, 29.9, 29.6, 26.6,
    31.2, 30.9, 29.3
]
Y = [
    35.4, 34.6, 31.1, 32.4, 33.3, 34.7, 35.3, 34.3, 32.1, 28.3, 33.3, 30.5,
    32.6, 33.3, 32.2
]

a = 0.05  # 有意水準(デフォルト) = 1 - 信頼係数
alt = 'two-sided'  # 両側検定(デフォルト)
# 左片側検定なら'smaller'
# 右片側検定なら'larger'

d = DescrStatsW(np.array(X) - np.array(Y))  # 対標本の場合
d.ttest_mean(alternative=alt)[1]  # p値
#> 0.0006415571512322235

d.tconfint_mean(alpha=a, alternative=alt)  # 信頼区間
#> (-3.9955246743198867, -1.3644753256801117)

c = CompareMeans(DescrStatsW(X), DescrStatsW(Y))  # 対標本でない場合

ve = 'pooled'  # 等分散を仮定する(デフォルト).仮定しないなら'unequal'.
c.ttest_ind(alternative=alt, usevar=ve)[1]  # p値
#> 0.000978530937238609

c.tconfint_diff(alpha=a, alternative=alt, usevar=ve)  # 信頼区間
#> (-4.170905570517185, -1.1890944294828283)

### 4.4.4 独立性の検定(カイ2乗検定)
Example #14
0
# Critério do valor p

# Teste Unicaudal Superior
# Rejeitar H_0 se o valor p\leq\alpha

p_valor = t_student.sf(t, df=24)
p_valor <= significancia

# Outra forma de obter a resposta

from statsmodels.stats.weightstats import DescrStatsW

test = DescrStatsW(amostra)

t, p_valor, df = test.ttest_mean(value=media, alternative='larger')
print('Valor de t', t[0])
print('P-valor', p_valor[0])
print('Média do data frame ', df)

p_valor[0] <= significancia

# Conclusão: Com um nível de confiança de 95% não podemos rejeitar H_0,
# ou seja, a alegação do fabricante é verdadeira

# Teste Duas Variáveis

# Seleção das amostras

df.head()
df['Species'].unique()