Ejemplo n.º 1
0
def test_tost_asym():
    x1, x2 = clinic[:15, 2], clinic[15:, 2]
    #Note: x1, x2 reversed by definition in multeq.dif
    assert_almost_equal(x2.mean() - x1.mean(), tost_clinic_1_asym.estimate, 13)
    resa = smws.ttost_ind(x2, x1, -1.5, 0.6, usevar='unequal')
    assert_almost_equal(resa[0], tost_clinic_1_asym.p_value, 13)

    #multi-endpoints, asymmetric bounds, vectorized
    resall = smws.ttost_ind(clinic[15:, 2:7],
                            clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5],
                            0.6,
                            usevar='unequal')
    assert_almost_equal(resall[0], tost_clinic_all_no_multi.p_value, 13)

    #SMOKE tests: foe multi-endpoint vectorized, k on k
    resall = smws.ttost_ind(clinic[15:, 2:7],
                            clinic[:15, 2:7],
                            np.exp([-1.0, -1.0, -1.5, -1.5, -1.5]),
                            0.6,
                            usevar='unequal',
                            transform=np.log)
    resall = smws.ttost_ind(clinic[15:, 2:7],
                            clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5],
                            0.6,
                            usevar='unequal',
                            transform=np.exp)

    resall = smws.ttost_paired(clinic[15:, 2:7],
                               clinic[:15, 2:7],
                               [-1.0, -1.0, -1.5, -1.5, -1.5],
                               0.6,
                               transform=np.log)
    resall = smws.ttost_paired(clinic[15:, 2:7],
                               clinic[:15, 2:7],
                               [-1.0, -1.0, -1.5, -1.5, -1.5],
                               0.6,
                               transform=np.exp)

    resall = smws.ttest_ind(clinic[15:, 2:7],
                            clinic[:15, 2:7],
                            value=[-1.0, -1.0, -1.5, -1.5, -1.5])

    #k on 1: compare all with reference
    resall = smws.ttost_ind(clinic[15:, 2:7],
                            clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5],
                            0.6,
                            usevar='unequal')
    resa3_2 = smws.ttost_ind(clinic[15:, 3:4],
                             clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5],
                             0.6,
                             usevar='unequal')
    assert_almost_equal(resall[0][1], resa3_2[0][1], decimal=13)
    resall = smws.ttost_ind(clinic[15:, 2],
                            clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5],
                            0.6,
                            usevar='unequal')
    resall = smws.ttost_ind(clinic[15:, 2],
                            clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5],
                            np.repeat(0.6, 5),
                            usevar='unequal')
Ejemplo n.º 2
0
def test_tost_log():
    x1, x2 = clinic[:15, 2], clinic[15:, 2]

    resp = smws.ttost_paired(x1, x2, 0.8, 1.25, transform=np.log)
    assert_almost_equal(resp[0], tost_clinic_1_paired.p_value, 13)

    resi = smws.ttost_ind(x1, x2, 0.8, 1.25, transform=np.log, usevar='unequal')
    assert_almost_equal(resi[0], tost_clinic_1_indep.p_value, 13)
Ejemplo n.º 3
0
def test_tost_asym():
    x1, x2 = clinic[:15, 2], clinic[15:, 2]
    #Note: x1, x2 reversed by definition in multeq.dif
    assert_almost_equal(x2.mean() - x1.mean(), tost_clinic_1_asym.estimate, 13)
    resa = smws.ttost_ind(x2, x1, -1.5, 0.6, usevar='unequal')
    assert_almost_equal(resa[0], tost_clinic_1_asym.p_value, 13)

    #multi-endpoints, asymmetric bounds, vectorized
    resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7],
                           [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6,
                           usevar='unequal')
    assert_almost_equal(resall[0], tost_clinic_all_no_multi.p_value, 13)

    #SMOKE tests: foe multi-endpoint vectorized, k on k
    resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7],
                           np.exp([-1.0, -1.0, -1.5, -1.5, -1.5]), 0.6,
                           usevar='unequal', transform=np.log)
    resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7],
                           [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6,
                           usevar='unequal', transform=np.exp)

    resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7],
                              [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6,
                              transform=np.log)
    resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7],
                              [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6,
                              transform=np.exp)

    resall = smws.ttest_ind(clinic[15:, 2:7], clinic[:15, 2:7],
                              value=[-1.0, -1.0, -1.5, -1.5, -1.5])

    #k on 1: compare all with reference
    resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:3],
                           [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal')
    resa3_2 = smws.ttost_ind(clinic[15:, 3:4], clinic[:15, 2:3],
                           [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal')
    assert_almost_equal(resall[0][1], resa3_2[0][1], decimal=13)
    resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2],
                           [-1.0, -0.5, -0.7, -1.5, -1.5], 0.6, usevar='unequal')
    resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2],
                           [-1.0, -0.5, -0.7, -1.5, -1.5],
                           np.repeat(0.6,5), usevar='unequal')
Ejemplo n.º 4
0
def test_tost_log():
    x1, x2 = clinic[:15, 2], clinic[15:, 2]

    resp = smws.ttost_paired(x1, x2, 0.8, 1.25, transform=np.log)
    assert_almost_equal(resp[0], tost_clinic_1_paired.p_value, 13)

    resi = smws.ttost_ind(x1,
                          x2,
                          0.8,
                          1.25,
                          transform=np.log,
                          usevar='unequal')
    assert_almost_equal(resi[0], tost_clinic_1_indep.p_value, 13)
Ejemplo n.º 5
0
def tost_transform_paired():
    raw = np.array('''\
       103.4 90.11  59.92 77.71  68.17 77.71  94.54 97.51
       69.48 58.21  72.17 101.3  74.37 79.84  84.44 96.06
       96.74 89.30  94.26 97.22  48.52 61.62  95.68 85.80'''.split(), float)

    x, y = raw.reshape(-1,2).T

    res1 = smws.ttost_paired(x, y, 0.8, 1.25, transform=np.log)
    res_sas = (0.0031, (3.38, 0.0031), (-5.90, 0.00005))
    assert_almost_equal(res1[0], res_sas[0], 3)
    assert_almost_equal(res1[1:], res_sas[1:], 2)
    #result R tost
    assert_almost_equal(res1[0], tost_s_paired.p_value, 13)
Ejemplo n.º 6
0
def test_tost_transform_paired():
    raw = np.array(
        '''\
       103.4 90.11  59.92 77.71  68.17 77.71  94.54 97.51
       69.48 58.21  72.17 101.3  74.37 79.84  84.44 96.06
       96.74 89.30  94.26 97.22  48.52 61.62  95.68 85.80'''.split(), float)

    x, y = raw.reshape(-1, 2).T

    res1 = smws.ttost_paired(x, y, 0.8, 1.25, transform=np.log)
    res_sas = (0.0031, (3.38, 0.0031), (-5.90, 0.00005))
    assert_almost_equal(res1[0], res_sas[0], 3)
    assert_almost_equal(res1[1:], res_sas[1:], 2)
    #result R tost
    assert_almost_equal(res1[0], tost_s_paired.p_value, 13)
Ejemplo n.º 7
0
    def setup_class(cls):
        cls.res2 = tost_clinic_paired_1
        x1, x2 = clinic[:15, 2], clinic[15:, 2]
        cls.res1 = Holder()
        res = smws.ttost_paired(x1, x2, -0.6, 0.6, transform=None)
        cls.res1.pvalue = res[0]
        #cls.res1.df = res[1][-1] not yet
        res_ds = smws.DescrStatsW(x1 - x2, weights=None, ddof=0)
        #tost confint 2*alpha TODO: check again
        cls.res1.tconfint_diff = res_ds.tconfint_mean(0.1)
        cls.res1.confint_05 = res_ds.tconfint_mean(0.05)
        cls.res1.mean_diff = res_ds.mean
        cls.res1.std_mean_diff = res_ds.std_mean

        cls.res2b = ttest_clinic_paired_1
Ejemplo n.º 8
0
    def setup_class(cls):
        cls.res2 = tost_clinic_paired_1
        x1, x2 = clinic[:15, 2], clinic[15:, 2]
        cls.res1 = Holder()
        res = smws.ttost_paired(x1, x2, -0.6, 0.6, transform=None)
        cls.res1.pvalue = res[0]
        #cls.res1.df = res[1][-1] not yet
        res_ds = smws.DescrStatsW(x1 - x2, weights=None, ddof=0)
        #tost confint 2*alpha TODO: check again
        cls.res1.tconfint_diff = res_ds.tconfint_mean(0.1)
        cls.res1.confint_05 = res_ds.tconfint_mean(0.05)
        cls.res1.mean_diff = res_ds.mean
        cls.res1.std_mean_diff = res_ds.std_mean

        cls.res2b = ttest_clinic_paired_1
Ejemplo n.º 9
0
 def setup_class(cls):
     cls.res2 = tost_clinic_paired
     x, y = clinic[:15, 3], clinic[15:, 3]
     cls.res1 = Holder()
     res = smws.ttost_paired(x, y, -0.6, 0.6, transform=None)
     cls.res1.pvalue = res[0]
Ejemplo n.º 10
0
#> cat_items(tt2, prefix="ttest_clinic_indep_1_two_mu_pooled.")
ttest_clinic_indep_1_two_mu_pooled = Holder()
ttest_clinic_indep_1_two_mu_pooled.statistic = -3.299592184135305
ttest_clinic_indep_1_two_mu_pooled.parameter = 28
ttest_clinic_indep_1_two_mu_pooled.p_value = 0.002643203760742494
ttest_clinic_indep_1_two_mu_pooled.conf_int = (-0.35391340938235,
                                               0.6832467427156834)
ttest_clinic_indep_1_two_mu_pooled.estimate = (3.498, 3.333333333333333)
ttest_clinic_indep_1_two_mu_pooled.null_value = 1
ttest_clinic_indep_1_two_mu_pooled.alternative = 'two.sided'
ttest_clinic_indep_1_two_mu_pooled.method = ' Two Sample t-test'
ttest_clinic_indep_1_two_mu_pooled.data_name = 'clinic$var1[1:15] and clinic$var1[16:30]'

res1 = smws.ttost_paired(clinic[:15, 2],
                         clinic[15:, 2],
                         -0.6,
                         0.6,
                         transform=None)
res2 = smws.ttost_paired(clinic[:15, 3],
                         clinic[15:, 3],
                         -0.6,
                         0.6,
                         transform=None)
res = smws.ttost_ind(clinic[:15, 3],
                     clinic[15:, 3],
                     -0.6,
                     0.6,
                     usevar='unequal')


class CheckTostMixin:
Ejemplo n.º 11
0
import numpy as np


from statsmodels.stats.weightstats import ttost_paired

data = pd.read_csv(open('combined_data.csv'))

for t in data.index:
    if int(data.loc[t, 'Baseline']) == 0:
        data.loc[t, 'STF Baseline'] = data.loc[t, 'Succesfully Tracked Features 0']
        data.loc[t, 'STF Experiment'] = data.loc[t, 'Succesfully Tracked Features 1']
    else:
        data.loc[t, 'STF Baseline'] = data.loc[t, 'Succesfully Tracked Features 1']
        data.loc[t, 'STF Experiment'] = data.loc[t, 'Succesfully Tracked Features 0']

pvalue, stats1, stats2 = ttost_paired(data['STF Experiment'], data['STF Baseline'], 0, 10000)

print pvalue
print stats1
print stats2

plt.scatter(data.index, data['STF Baseline'], label='baseline')
plt.scatter(data.index, data['STF Experiment'], color="green", label='experiment')
plt.legend(loc='upper right')
plt.draw()

dataMax = max(data['STF Baseline'].max(), data['STF Experiment'].max())
bins = np.linspace(0, dataMax)

plt.figure()
plt.hist(data['STF Baseline'], alpha = 0.5, bins=bins, label="baseline")
Ejemplo n.º 12
0
 def setup_class(cls):
     cls.res2 = tost_clinic_paired
     x, y = clinic[:15, 3], clinic[15:, 3]
     cls.res1 = Holder()
     res = smws.ttost_paired(x, y, -0.6, 0.6, transform=None)
     cls.res1.pvalue = res[0]
Ejemplo n.º 13
0
#> cat_items(tt2, prefix="ttest_clinic_indep_1_two_mu_pooled.")
ttest_clinic_indep_1_two_mu_pooled = Holder()
ttest_clinic_indep_1_two_mu_pooled.statistic = -3.299592184135305
ttest_clinic_indep_1_two_mu_pooled.parameter = 28
ttest_clinic_indep_1_two_mu_pooled.p_value = 0.002643203760742494
ttest_clinic_indep_1_two_mu_pooled.conf_int = (-0.35391340938235, 0.6832467427156834)
ttest_clinic_indep_1_two_mu_pooled.estimate = (3.498, 3.333333333333333)
ttest_clinic_indep_1_two_mu_pooled.null_value = 1
ttest_clinic_indep_1_two_mu_pooled.alternative = 'two.sided'
ttest_clinic_indep_1_two_mu_pooled.method = ' Two Sample t-test'
ttest_clinic_indep_1_two_mu_pooled.data_name = 'clinic$var1[1:15] and clinic$var1[16:30]'




res1 = smws.ttost_paired(clinic[:15, 2], clinic[15:, 2], -0.6, 0.6, transform=None)
res2 = smws.ttost_paired(clinic[:15, 3], clinic[15:, 3], -0.6, 0.6, transform=None)
res = smws.ttost_ind(clinic[:15, 3], clinic[15:, 3], -0.6, 0.6, usevar='unequal')


class CheckTostMixin(object):

    def test_pval(self):
        assert_almost_equal(self.res1.pvalue, self.res2.p_value, decimal=13)
        #assert_almost_equal(self.res1.df, self.res2.df, decimal=13)

class TestTostp1(CheckTostMixin):
    #paired var1
    @classmethod
    def setup_class(cls):
        cls.res2 = tost_clinic_paired_1
Ejemplo n.º 14
0
 def __init__(self):
     self.res2 = tost_clinic_paired
     x, y = clinic[:15, 3], clinic[15:, 3]
     self.res1 = Holder()
     res = smws.ttost_paired(x, y, -0.6, 0.6, transform=None)
     self.res1.pvalue = res[0]
Ejemplo n.º 15
0
 def __init__(self):
     self.res2 = tost_clinic_paired
     x, y = clinic[:15, 3], clinic[15:, 3]
     self.res1 = Holder()
     res = smws.ttost_paired(x, y, -0.6, 0.6, transform=None)
     self.res1.pvalue = res[0]
Ejemplo n.º 16
0
def test_discr_stat():
    mega_dict = {}

    dissimilarity = "euclidean"
    for remove_isolates in [True, False]:
        mega_dict[remove_isolates] = {}
        for n_informative in [5, 25, 49]:
            mega_dict[remove_isolates][f"n_informative_{n_informative}"] = {}
            for n_redundant in [5, 25, 49]:
                mega_dict[remove_isolates][f"n_informative_{n_informative}"][
                    f"n_redundant_{n_redundant}"] = {}
                for n_features in [100, 1000]:
                    mega_dict[remove_isolates][
                        f"n_informative_{n_informative}"][
                            f"n_redundant_{n_redundant}"][
                                f"n_features_{n_features}"] = {}
                    for n_samples in [100, 1000]:
                        X, Y = make_classification(n_samples,
                                                   n_features,
                                                   n_informative,
                                                   n_redundant,
                                                   n_repeated=0,
                                                   n_classes=2,
                                                   n_clusters_per_class=2,
                                                   weights=None,
                                                   flip_y=0.01,
                                                   class_sep=1.0,
                                                   hypercube=True,
                                                   shift=0.0,
                                                   scale=1.0,
                                                   shuffle=True,
                                                   random_state=42)
                        disc = benchmarking.discr_stat(
                            X, Y, dissimilarity=dissimilarity)[0]
                        mega_dict[remove_isolates][
                            f"n_informative_{n_informative}"][
                                f"n_redundant_{n_redundant}"][
                                    f"n_features_{n_features}"][
                                        f"n_samples_{n_samples}"] = disc

    def get_recursively(search_dict, field):
        """
        Takes a dict with nested lists and dicts,
        and searches all dicts for a key of the field
        provided.
        """
        fields_found = []

        for key, value in search_dict.items():

            if key == field:
                fields_found.append(value)

            elif isinstance(value, dict):
                results = get_recursively(value, field)
                for result in results:
                    fields_found.append(result)

            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict):
                        more_results = get_recursively(item, field)
                        for another_result in more_results:
                            fields_found.append(another_result)

        return fields_found

    d1 = np.array(get_recursively(mega_dict, 'n_samples_100'))
    d2 = np.array(get_recursively(mega_dict, 'n_samples_1000'))
    assert ttost_paired(d1, d2, 0, 1)
    assert ttost_paired(d1, d2, 0, 1)[0] == 0.4162766457698946

    # Test for mean difference based on n informative
    d1 = np.array(
        list(
            NestedDictValues(
                get_recursively(mega_dict, 'n_informative_49')[0])))
    d2 = np.array(
        list(
            NestedDictValues(
                get_recursively(mega_dict, 'n_informative_25')[0])))
    assert ttost_paired(d1, d2, 0, 1)
    assert ttost_paired(d1, d2, 0, 1)[0] == 0.7244168201421846

    # Test for mean difference based on n informative
    d1 = np.array(
        list(
            NestedDictValues(
                get_recursively(mega_dict, 'n_informative_25')[0])))
    d2 = np.array(
        list(NestedDictValues(
            get_recursively(mega_dict, 'n_informative_5')[0])))
    assert ttost_paired(d1, d2, 0, 1)
    assert ttost_paired(d1, d2, 0, 1)[0] == 0.9999143618449223

    # Test for mean difference based on n informative
    d1 = np.array(
        list(
            NestedDictValues(
                get_recursively(mega_dict, 'n_informative_49')[0])))
    d2 = np.array(
        list(NestedDictValues(
            get_recursively(mega_dict, 'n_informative_5')[0])))
    assert ttost_paired(d1, d2, 0, 1)
    assert ttost_paired(d1, d2, 0, 1)[0] == 0.9999959561894431

    # Test for mean difference based on n redundant
    d1 = np.array(
        list(NestedDictValues(get_recursively(mega_dict,
                                              'n_redundant_49')[0])))
    d2 = np.array(
        list(NestedDictValues(get_recursively(mega_dict,
                                              'n_redundant_25')[0])))
    assert ttost_paired(d1, d2, 0, 1)
    assert ttost_paired(d1, d2, 0, 1)[0] == 0.6177770457546709

    # Test for mean difference based on n redundant
    d1 = np.array(
        list(NestedDictValues(get_recursively(mega_dict,
                                              'n_redundant_25')[0])))
    d2 = np.array(
        list(NestedDictValues(get_recursively(mega_dict,
                                              'n_redundant_49')[0])))
    assert ttost_paired(d1, d2, 0, 1)
    assert ttost_paired(d1, d2, 0, 1)[0] == 0.3822229542453291

    # Test for mean difference based on n redundant
    d1 = np.array(
        list(NestedDictValues(get_recursively(mega_dict,
                                              'n_redundant_49')[0])))
    d2 = np.array(
        list(NestedDictValues(get_recursively(mega_dict, 'n_redundant_5')[0])))
    assert ttost_paired(d1, d2, 0, 1)
    assert ttost_paired(d1, d2, 0, 1)[0] == 0.04145449595382497

    # Test for mean difference based on n features
    d1 = np.array(
        list(NestedDictValues(get_recursively(mega_dict,
                                              'n_features_100')[0])))
    d2 = np.array(
        list(NestedDictValues(
            get_recursively(mega_dict, 'n_features_1000')[0])))
    assert ttost_paired(d1, d2, 0, 1)
    assert ttost_paired(d1, d2, 0, 1)[0] == 0.016759998812705155