def test_tost_asym(): x1, x2 = clinic[:15, 2], clinic[15:, 2] #Note: x1, x2 reversed by definition in multeq.dif assert_almost_equal(x2.mean() - x1.mean(), tost_clinic_1_asym.estimate, 13) resa = smws.ttost_ind(x2, x1, -1.5, 0.6, usevar='unequal') assert_almost_equal(resa[0], tost_clinic_1_asym.p_value, 13) #multi-endpoints, asymmetric bounds, vectorized resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') assert_almost_equal(resall[0], tost_clinic_all_no_multi.p_value, 13) #SMOKE tests: foe multi-endpoint vectorized, k on k resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], np.exp([-1.0, -1.0, -1.5, -1.5, -1.5]), 0.6, usevar='unequal', transform=np.log) resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal', transform=np.exp) resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, transform=np.log) resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, transform=np.exp) resall = smws.ttest_ind(clinic[15:, 2:7], clinic[:15, 2:7], value=[-1.0, -1.0, -1.5, -1.5, -1.5]) #k on 1: compare all with reference resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') resa3_2 = smws.ttost_ind(clinic[15:, 3:4], clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') assert_almost_equal(resall[0][1], resa3_2[0][1], decimal=13) resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5], 0.6, usevar='unequal') resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5], np.repeat(0.6, 5), usevar='unequal')
def test_tost_log(): x1, x2 = clinic[:15, 2], clinic[15:, 2] resp = smws.ttost_paired(x1, x2, 0.8, 1.25, transform=np.log) assert_almost_equal(resp[0], tost_clinic_1_paired.p_value, 13) resi = smws.ttost_ind(x1, x2, 0.8, 1.25, transform=np.log, usevar='unequal') assert_almost_equal(resi[0], tost_clinic_1_indep.p_value, 13)
def test_tost_asym(): x1, x2 = clinic[:15, 2], clinic[15:, 2] #Note: x1, x2 reversed by definition in multeq.dif assert_almost_equal(x2.mean() - x1.mean(), tost_clinic_1_asym.estimate, 13) resa = smws.ttost_ind(x2, x1, -1.5, 0.6, usevar='unequal') assert_almost_equal(resa[0], tost_clinic_1_asym.p_value, 13) #multi-endpoints, asymmetric bounds, vectorized resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') assert_almost_equal(resall[0], tost_clinic_all_no_multi.p_value, 13) #SMOKE tests: foe multi-endpoint vectorized, k on k resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], np.exp([-1.0, -1.0, -1.5, -1.5, -1.5]), 0.6, usevar='unequal', transform=np.log) resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal', transform=np.exp) resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, transform=np.log) resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, transform=np.exp) resall = smws.ttest_ind(clinic[15:, 2:7], clinic[:15, 2:7], value=[-1.0, -1.0, -1.5, -1.5, -1.5]) #k on 1: compare all with reference resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') resa3_2 = smws.ttost_ind(clinic[15:, 3:4], clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') assert_almost_equal(resall[0][1], resa3_2[0][1], decimal=13) resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5], 0.6, usevar='unequal') resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5], np.repeat(0.6,5), usevar='unequal')
def tost_transform_paired(): raw = np.array('''\ 103.4 90.11 59.92 77.71 68.17 77.71 94.54 97.51 69.48 58.21 72.17 101.3 74.37 79.84 84.44 96.06 96.74 89.30 94.26 97.22 48.52 61.62 95.68 85.80'''.split(), float) x, y = raw.reshape(-1,2).T res1 = smws.ttost_paired(x, y, 0.8, 1.25, transform=np.log) res_sas = (0.0031, (3.38, 0.0031), (-5.90, 0.00005)) assert_almost_equal(res1[0], res_sas[0], 3) assert_almost_equal(res1[1:], res_sas[1:], 2) #result R tost assert_almost_equal(res1[0], tost_s_paired.p_value, 13)
def test_tost_transform_paired(): raw = np.array( '''\ 103.4 90.11 59.92 77.71 68.17 77.71 94.54 97.51 69.48 58.21 72.17 101.3 74.37 79.84 84.44 96.06 96.74 89.30 94.26 97.22 48.52 61.62 95.68 85.80'''.split(), float) x, y = raw.reshape(-1, 2).T res1 = smws.ttost_paired(x, y, 0.8, 1.25, transform=np.log) res_sas = (0.0031, (3.38, 0.0031), (-5.90, 0.00005)) assert_almost_equal(res1[0], res_sas[0], 3) assert_almost_equal(res1[1:], res_sas[1:], 2) #result R tost assert_almost_equal(res1[0], tost_s_paired.p_value, 13)
def setup_class(cls): cls.res2 = tost_clinic_paired_1 x1, x2 = clinic[:15, 2], clinic[15:, 2] cls.res1 = Holder() res = smws.ttost_paired(x1, x2, -0.6, 0.6, transform=None) cls.res1.pvalue = res[0] #cls.res1.df = res[1][-1] not yet res_ds = smws.DescrStatsW(x1 - x2, weights=None, ddof=0) #tost confint 2*alpha TODO: check again cls.res1.tconfint_diff = res_ds.tconfint_mean(0.1) cls.res1.confint_05 = res_ds.tconfint_mean(0.05) cls.res1.mean_diff = res_ds.mean cls.res1.std_mean_diff = res_ds.std_mean cls.res2b = ttest_clinic_paired_1
def setup_class(cls): cls.res2 = tost_clinic_paired x, y = clinic[:15, 3], clinic[15:, 3] cls.res1 = Holder() res = smws.ttost_paired(x, y, -0.6, 0.6, transform=None) cls.res1.pvalue = res[0]
#> cat_items(tt2, prefix="ttest_clinic_indep_1_two_mu_pooled.") ttest_clinic_indep_1_two_mu_pooled = Holder() ttest_clinic_indep_1_two_mu_pooled.statistic = -3.299592184135305 ttest_clinic_indep_1_two_mu_pooled.parameter = 28 ttest_clinic_indep_1_two_mu_pooled.p_value = 0.002643203760742494 ttest_clinic_indep_1_two_mu_pooled.conf_int = (-0.35391340938235, 0.6832467427156834) ttest_clinic_indep_1_two_mu_pooled.estimate = (3.498, 3.333333333333333) ttest_clinic_indep_1_two_mu_pooled.null_value = 1 ttest_clinic_indep_1_two_mu_pooled.alternative = 'two.sided' ttest_clinic_indep_1_two_mu_pooled.method = ' Two Sample t-test' ttest_clinic_indep_1_two_mu_pooled.data_name = 'clinic$var1[1:15] and clinic$var1[16:30]' res1 = smws.ttost_paired(clinic[:15, 2], clinic[15:, 2], -0.6, 0.6, transform=None) res2 = smws.ttost_paired(clinic[:15, 3], clinic[15:, 3], -0.6, 0.6, transform=None) res = smws.ttost_ind(clinic[:15, 3], clinic[15:, 3], -0.6, 0.6, usevar='unequal') class CheckTostMixin:
import numpy as np from statsmodels.stats.weightstats import ttost_paired data = pd.read_csv(open('combined_data.csv')) for t in data.index: if int(data.loc[t, 'Baseline']) == 0: data.loc[t, 'STF Baseline'] = data.loc[t, 'Succesfully Tracked Features 0'] data.loc[t, 'STF Experiment'] = data.loc[t, 'Succesfully Tracked Features 1'] else: data.loc[t, 'STF Baseline'] = data.loc[t, 'Succesfully Tracked Features 1'] data.loc[t, 'STF Experiment'] = data.loc[t, 'Succesfully Tracked Features 0'] pvalue, stats1, stats2 = ttost_paired(data['STF Experiment'], data['STF Baseline'], 0, 10000) print pvalue print stats1 print stats2 plt.scatter(data.index, data['STF Baseline'], label='baseline') plt.scatter(data.index, data['STF Experiment'], color="green", label='experiment') plt.legend(loc='upper right') plt.draw() dataMax = max(data['STF Baseline'].max(), data['STF Experiment'].max()) bins = np.linspace(0, dataMax) plt.figure() plt.hist(data['STF Baseline'], alpha = 0.5, bins=bins, label="baseline")
#> cat_items(tt2, prefix="ttest_clinic_indep_1_two_mu_pooled.") ttest_clinic_indep_1_two_mu_pooled = Holder() ttest_clinic_indep_1_two_mu_pooled.statistic = -3.299592184135305 ttest_clinic_indep_1_two_mu_pooled.parameter = 28 ttest_clinic_indep_1_two_mu_pooled.p_value = 0.002643203760742494 ttest_clinic_indep_1_two_mu_pooled.conf_int = (-0.35391340938235, 0.6832467427156834) ttest_clinic_indep_1_two_mu_pooled.estimate = (3.498, 3.333333333333333) ttest_clinic_indep_1_two_mu_pooled.null_value = 1 ttest_clinic_indep_1_two_mu_pooled.alternative = 'two.sided' ttest_clinic_indep_1_two_mu_pooled.method = ' Two Sample t-test' ttest_clinic_indep_1_two_mu_pooled.data_name = 'clinic$var1[1:15] and clinic$var1[16:30]' res1 = smws.ttost_paired(clinic[:15, 2], clinic[15:, 2], -0.6, 0.6, transform=None) res2 = smws.ttost_paired(clinic[:15, 3], clinic[15:, 3], -0.6, 0.6, transform=None) res = smws.ttost_ind(clinic[:15, 3], clinic[15:, 3], -0.6, 0.6, usevar='unequal') class CheckTostMixin(object): def test_pval(self): assert_almost_equal(self.res1.pvalue, self.res2.p_value, decimal=13) #assert_almost_equal(self.res1.df, self.res2.df, decimal=13) class TestTostp1(CheckTostMixin): #paired var1 @classmethod def setup_class(cls): cls.res2 = tost_clinic_paired_1
def __init__(self): self.res2 = tost_clinic_paired x, y = clinic[:15, 3], clinic[15:, 3] self.res1 = Holder() res = smws.ttost_paired(x, y, -0.6, 0.6, transform=None) self.res1.pvalue = res[0]
def test_discr_stat(): mega_dict = {} dissimilarity = "euclidean" for remove_isolates in [True, False]: mega_dict[remove_isolates] = {} for n_informative in [5, 25, 49]: mega_dict[remove_isolates][f"n_informative_{n_informative}"] = {} for n_redundant in [5, 25, 49]: mega_dict[remove_isolates][f"n_informative_{n_informative}"][ f"n_redundant_{n_redundant}"] = {} for n_features in [100, 1000]: mega_dict[remove_isolates][ f"n_informative_{n_informative}"][ f"n_redundant_{n_redundant}"][ f"n_features_{n_features}"] = {} for n_samples in [100, 1000]: X, Y = make_classification(n_samples, n_features, n_informative, n_redundant, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=42) disc = benchmarking.discr_stat( X, Y, dissimilarity=dissimilarity)[0] mega_dict[remove_isolates][ f"n_informative_{n_informative}"][ f"n_redundant_{n_redundant}"][ f"n_features_{n_features}"][ f"n_samples_{n_samples}"] = disc def get_recursively(search_dict, field): """ Takes a dict with nested lists and dicts, and searches all dicts for a key of the field provided. """ fields_found = [] for key, value in search_dict.items(): if key == field: fields_found.append(value) elif isinstance(value, dict): results = get_recursively(value, field) for result in results: fields_found.append(result) elif isinstance(value, list): for item in value: if isinstance(item, dict): more_results = get_recursively(item, field) for another_result in more_results: fields_found.append(another_result) return fields_found d1 = np.array(get_recursively(mega_dict, 'n_samples_100')) d2 = np.array(get_recursively(mega_dict, 'n_samples_1000')) assert ttost_paired(d1, d2, 0, 1) assert ttost_paired(d1, d2, 0, 1)[0] == 0.4162766457698946 # Test for mean difference based on n informative d1 = np.array( list( NestedDictValues( get_recursively(mega_dict, 'n_informative_49')[0]))) d2 = np.array( list( NestedDictValues( get_recursively(mega_dict, 'n_informative_25')[0]))) assert ttost_paired(d1, d2, 0, 1) assert ttost_paired(d1, d2, 0, 1)[0] == 0.7244168201421846 # Test for mean difference based on n informative d1 = np.array( list( NestedDictValues( get_recursively(mega_dict, 'n_informative_25')[0]))) d2 = np.array( list(NestedDictValues( get_recursively(mega_dict, 'n_informative_5')[0]))) assert ttost_paired(d1, d2, 0, 1) assert ttost_paired(d1, d2, 0, 1)[0] == 0.9999143618449223 # Test for mean difference based on n informative d1 = np.array( list( NestedDictValues( get_recursively(mega_dict, 'n_informative_49')[0]))) d2 = np.array( list(NestedDictValues( get_recursively(mega_dict, 'n_informative_5')[0]))) assert ttost_paired(d1, d2, 0, 1) assert ttost_paired(d1, d2, 0, 1)[0] == 0.9999959561894431 # Test for mean difference based on n redundant d1 = np.array( list(NestedDictValues(get_recursively(mega_dict, 'n_redundant_49')[0]))) d2 = np.array( list(NestedDictValues(get_recursively(mega_dict, 'n_redundant_25')[0]))) assert ttost_paired(d1, d2, 0, 1) assert ttost_paired(d1, d2, 0, 1)[0] == 0.6177770457546709 # Test for mean difference based on n redundant d1 = np.array( list(NestedDictValues(get_recursively(mega_dict, 'n_redundant_25')[0]))) d2 = np.array( list(NestedDictValues(get_recursively(mega_dict, 'n_redundant_49')[0]))) assert ttost_paired(d1, d2, 0, 1) assert ttost_paired(d1, d2, 0, 1)[0] == 0.3822229542453291 # Test for mean difference based on n redundant d1 = np.array( list(NestedDictValues(get_recursively(mega_dict, 'n_redundant_49')[0]))) d2 = np.array( list(NestedDictValues(get_recursively(mega_dict, 'n_redundant_5')[0]))) assert ttost_paired(d1, d2, 0, 1) assert ttost_paired(d1, d2, 0, 1)[0] == 0.04145449595382497 # Test for mean difference based on n features d1 = np.array( list(NestedDictValues(get_recursively(mega_dict, 'n_features_100')[0]))) d2 = np.array( list(NestedDictValues( get_recursively(mega_dict, 'n_features_1000')[0]))) assert ttost_paired(d1, d2, 0, 1) assert ttost_paired(d1, d2, 0, 1)[0] == 0.016759998812705155