def test_equal_mean_median(self): x = np.linspace(-1, 1, 21) y = x ** 3 W1, pval1 = stats.levene(x, y, center="mean") W2, pval2 = stats.levene(x, y, center="median") assert_almost_equal(W1, W2) assert_almost_equal(pval1, pval2)
def test_trimmed1(self): # Test that center='trimmed' gives the same result as center='mean' # when proportiontocut=0. W1, pval1 = stats.levene(g1, g2, g3, center='mean') W2, pval2 = stats.levene(g1, g2, g3, center='trimmed', proportiontocut=0.0) assert_almost_equal(W1, W2) assert_almost_equal(pval1, pval2)
def test_equal_mean_median(self): x = np.linspace(-1,1,21) np.random.seed(1234) x2 = np.random.permutation(x) y = x**3 W1, pval1 = stats.levene(x, y, center='mean') W2, pval2 = stats.levene(x2, y, center='median') assert_almost_equal(W1, W2) assert_almost_equal(pval1, pval2)
def test_trimmed2(self): x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0] y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0] # Use center='trimmed' W1, pval1 = stats.levene(x, y, center="trimmed", proportiontocut=0.125) # Trim the data here, and use center='mean' W2, pval2 = stats.levene(x[1:-1], y[1:-1], center="mean") # Result should be the same. assert_almost_equal(W1, W2) assert_almost_equal(pval1, pval2)
def main(): df = pd.read_json(sys.argv[1], lines=True) reddit_df, weekends_df, weekdays_df = filterAndTransform(df) weekend_counts = weekends_df['comment_count'] weekday_counts = weekdays_df['comment_count'] # T-test, normality test and variance test ttest = stats.ttest_ind(weekend_counts, weekday_counts) initial_ttest_p = ttest.pvalue initial_weekday_normality_p = stats.normaltest(weekday_counts).pvalue initial_weekend_normality_p = stats.normaltest(weekend_counts).pvalue initial_levene_p = stats.levene(weekday_counts, weekend_counts).pvalue #Fix 1 transformed_weekday_counts = np.sqrt(weekday_counts) transformed_weekday_normality_p = stats.normaltest( transformed_weekday_counts).pvalue transformed_weekend_counts = np.sqrt(weekend_counts) transformed_weekend_normality_p = stats.normaltest( transformed_weekend_counts).pvalue transformed_levene_p = stats.levene(transformed_weekend_counts, transformed_weekday_counts).pvalue #Fix 2 weekly_weekday_counts = weekdays_df.groupby(['year', 'week' ]).mean()['comment_count'] weekly_weekday_normality_p = stats.normaltest(weekly_weekday_counts).pvalue weekly_weekend_counts = weekends_df.groupby(['year', 'week' ]).mean()['comment_count'] weekly_weekend_normality_p = stats.normaltest(weekly_weekend_counts).pvalue weekly_levene_p = stats.levene(weekly_weekday_counts, weekly_weekend_counts).pvalue weekly_ttest_p = stats.ttest_ind(weekly_weekday_counts, weekly_weekend_counts).pvalue utest_p = stats.mannwhitneyu(weekday_counts, weekend_counts, alternative='two-sided').pvalue # ... print( OUTPUT_TEMPLATE.format( initial_ttest_p=initial_ttest_p, initial_weekday_normality_p=initial_weekday_normality_p, initial_weekend_normality_p=initial_weekend_normality_p, initial_levene_p=initial_levene_p, transformed_weekday_normality_p=transformed_weekday_normality_p, transformed_weekend_normality_p=transformed_weekend_normality_p, transformed_levene_p=transformed_levene_p, weekly_weekday_normality_p=weekly_weekday_normality_p, weekly_weekend_normality_p=weekly_weekend_normality_p, weekly_levene_p=weekly_levene_p, weekly_ttest_p=weekly_ttest_p, utest_p=utest_p, ))
def stats_tests(): global errors tests = ['Brown-Forsythe', 'Bartlett', 'Levene', 'Fligner-Killeen'] securities = list(container.index) indicators = list(container.columns) output = pd.DataFrame(index=pd.MultiIndex.from_product([securities, indicators]), columns=tests) for security in securities: for indicator in indicators: all = pd.Series(container.loc[security][indicator]['all']) signal = pd.Series(container.loc[security][indicator]['signal']) all = pd.to_numeric(all, errors='coerce') signal = pd.to_numeric(signal, errors='coerce') try: output.loc[security, indicator][tests[0]] = stats.levene( all, signal, center='median' ) except: errors.append([security, indicator, tests[0]]) try: output.loc[security, indicator][tests[1]] = stats.bartlett( all, signal ) except: errors.append([security, indicator, tests[1]]) try: output.loc[security, indicator][tests[2]] = stats.levene( all, signal, center='mean' ) except: errors.append([security, indicator, tests[2]]) try: output.loc[security, indicator][tests[3]] = stats.fligner( all, signal ) except: errors.append([security, indicator, tests[3]]) p_values = output.dropna().applymap(lambda x: x.pvalue).unstack() p_values_container = output.dropna().applymap(lambda x: x.pvalue).unstack().melt() p_values.to_pickle('p_values_full') p_values_container.to_pickle('p_values_container_full')
def _varAnalysis(df, labels): """ """ from scipy.stats import levene if df.shape[0] != len(labels): raise ValueError( "The number of input samples is not equal to labels size") return 0 label_ = np.unique(labels) groups = _split(df, labels) if len(label_) == 2: print('Performing t-test analysis...') from scipy.stats import ttest_ind F, P = [], [] for i in range(df.shape[1]): sample = [item[:, i] for item in groups] stat, p = levene(*sample) if p < 0.05: f, p = ttest_ind(*sample, equal_var=False) else: f, p = ttest_ind(*sample, equal_var=True) F.append(f) P.append(p) elif len(label_) > 2: print('Performing anova analysis...') F, P = [], [] for i in range(df.shape[1]): sample = [item[:, i] for item in groups] stat, p = levene(*sample) if p < 0.05: from pingouin import welch_anova meta = pd.DataFrame(df.iloc[:, i]) meta.columns = ['feature'] meta['labels'] = labels result = welch_anova(data=meta, dv='feature', between='labels') f = result['F'].values[0] p = result['p-unc'].values[0] else: from scipy.stats import f_oneway f, p = f_oneway(*sample) F.append(f) P.append(p) else: raise ValueError("Groups for comparison are less than 2!") F = pd.DataFrame(F) P = pd.DataFrame(P) F.index = df.columns P.index = df.columns return F, P
def test_trimmed2(self): x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0] y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0] np.random.seed(1234) x2 = np.random.permutation(x) # Use center='trimmed' W0, pval0 = stats.levene(x, y, center='trimmed', proportiontocut=0.125) W1, pval1 = stats.levene(x2, y, center='trimmed', proportiontocut=0.125) # Trim the data here, and use center='mean' W2, pval2 = stats.levene(x[1:-1], y[1:-1], center='mean') # Result should be the same. assert_almost_equal(W0, W2) assert_almost_equal(W1, W2) assert_almost_equal(pval1, pval2)
def anova_oneway(): ''' One-way ANOVA: test if results from 3 groups are equal. ''' # Get the data data = getData('altman_910.txt') # Sort them into groups, according to column 1 group1 = data[data[:,1]==1,0] group2 = data[data[:,1]==2,0] group3 = data[data[:,1]==3,0] # First, check if the variances are equal, with the "Levene"-test (W,p) = stats.levene(group1, group2, group3) if p<0.05: print('Warning: the p-value of the Levene test is <0.05: p={0}'.format(p)) # Do the one-way ANOVA F_statistic, pVal = stats.f_oneway(group1, group2, group3) # Print the results print 'Altman 910:' print (F_statistic, pVal) if pVal < 0.05: print('One of the groups is significantly different.') # Elegant alternative implementation, with pandas & statsmodels df = pd.DataFrame(data, columns=['value', 'treatment']) model = ols('value ~ C(treatment)', df).fit() print anova_lm(model)
def apply_test(a1, a2, type): #Todos los tests se hacen con un 95% de confianza. if type == "shapiro": _, p1 = stats.shapiro(a1) _, p2 = stats.shapiro(a2) return (p1 > 0.05 and p2 > 0.05) elif type == "levene": _, p = stats.levene(a1, a2) return p > 0.05 elif type == "anova": _, p = stats.f_oneway(a1, a2) return p > 0.05 elif type == "welch": _, p = stats.ttest_ind(a1, a2, equal_var=False) return p > 0.05 elif type == "kruskal": _, p = stats.kruskal(a1, a2) return p > 0.05 else: print("Test no identificado.") return -1
def cep_anova(samples_dict): ''' Perform ANOVAs for the samples listed in sample_list ''' samples_list = samples_dict.values() result_dict = {} # First, perform a Levene test to determine the homogeneity of variance equal_var_test = levene(*samples_list, center='mean') # The significance stat is the second element in the result tuple equal_var_test_sig = equal_var_test[1] # Then, depending on the result, we'll perform either a standard or a Welch's test # If there's no result, then end test here if pd.isnull(equal_var_test_sig): result_dict['test'] = 'N/A' else: if equal_var_test_sig >= SIG_LEVEL: result_dict['test'] = 'Standard' # Perform an ANOVA here anova_result = f_oneway(*samples_list) elif equal_var_test_sig < SIG_LEVEL: result_dict['test'] = 'Welch' # Perform a Welch test here anova_result = welch_anova(*samples_list) anova_result_sig = anova_result[1] result_dict['anova_p'] = anova_result_sig if anova_result_sig < SIG_LEVEL: # If significant, we'll continue with posthoc tests # First, split samples into pairs so we can perform tests # on each pair c = combinations(samples_dict.items(), 2) pairs_dict = {} for i in c: # Get the value tuple first val_tuple = i[0][0], i[1][0] # Then the sample tuple sample_tuple = i[0][1], i[1][1] # Then assign all to pairs_dict pairs_dict[val_tuple] = sample_tuple # If we did standard test earlier, follow with Tukey posthoc # If we did Welch earlier, follow with Games-Howell # First, let's calculate msw, r, and df to feed into the posthoc msw, r, df = get_msw_et_al(*samples_list) kwargs_dict = {} kwargs_dict['r'] = r if result_dict['test'] == 'Standard': result_dict['posthoc'] = 'Tukey' posthoc = tukey kwargs_dict['msw'] = msw kwargs_dict['df'] = df elif result_dict['test'] == 'Welch': result_dict['posthoc'] = 'Games-Howell' posthoc = gh for key, sample_tuple in pairs_dict.items(): sample_a = sample_tuple[0] sample_b = sample_tuple[1] mean_diff, pval = posthoc(sample_a, sample_b, **kwargs_dict) # Translate result into verdict, sign, and cohens_d # And save this tuple in the key entry of the result_dict result_dict[key] = translate_result(pval, mean_diff, sample_a, sample_b) return result_dict
def get_p_value_by_feature(pd_train, pd_test, feature_name): """ 对特征进行统计检验,保证在两个类别之间的特征是有差异的,没有差异的特征去除掉 :param pd_train: 可以是train 可以是label 为 1 :param pd_test: 可以是test 可以是label 为 0 :param feature_name: 特征的名字 :return: p值 小于 0.05 是有差异 大于 0.05 是无差异 """ # pd_train = pd.read_csv(train_path) # pd_test = pd.read_csv(test_path) train_feature = pd_train[feature_name] test_feature = pd_test[feature_name] train_feature_class = len(set(train_feature)) test_feature_class = len(set(test_feature)) if train_feature_class > 2 and test_feature_class > 2: # 说明这是连续变量,就使用T检验或者是U检验 train_feature_mean = np.mean(train_feature) test_feature_mean = np.mean(test_feature) train_feature_std = np.std(train_feature) test_feature_std = np.std(test_feature) # 进行正态性和方差齐性检验 sta_value, p_value = levene(train_feature, test_feature) # 方差齐性 sta_train, p_value_train = stats.kstest( train_feature, "norm", (train_feature_mean, train_feature_std)) sta_train, p_value_test = stats.kstest( test_feature, "norm", (test_feature_mean, test_feature_std)) # print(p_value_train, p_value_test, p_value) if p_value_train >= 0.05 and p_value_test >= 0.05 and p_value >= 0.05: statistic, pvalue_t = ttest_ind(train_feature, test_feature) # print(feature_name + " t检验:", round(pvalue_t, 3)) return round(pvalue_t, 3) else: stat_num, p_m_value = mannwhitneyu(train_feature, test_feature) # print(feature_name + " u检验:", round(p_m_value, 3)) return round(p_m_value, 3) if train_feature_class == 2 and test_feature_class == 2: # 进行卡方检验 train_class_1, train_class_2 = Counter(train_feature).most_common() test_class_1, test_class_2 = Counter(test_feature).most_common() kf_data = np.array( [[np.array(train_class_1[-1]), np.array(test_class_1[-1])], [np.array(train_class_2[-1]), np.array(test_class_2[-1])]]) # print(kf_data) a, p_value, b, c = chi2_contingency(kf_data) # print(feature_name+"卡方检验: p_value={:.4f}".format(p_value)) return round(p_value, 3)
def apply_anova(data): p_values = [] genes = data.drop(columns=['group']).columns for i, col in enumerate(genes): group_names, groups = split_into_groups(data, col) res = stats.f_oneway(*groups) shapiro_res = stats.shapiro(np.concatenate(groups)) # normality test levene_res = stats.levene(*groups, center='mean') # homodestacity test p_values.append((col, res.pvalue, shapiro_res[1], levene_res.pvalue)) if i % 100 == 0: print('Progress {:2.0%}'.format((i / (genes.shape[0]))), end='\r') anova_table = pd.DataFrame( p_values, columns=['gene', 'p_value', 'shapiro_p_value', 'levene_p_value']) print( 'Found {} genes that influence'.format( (anova_table['p_value'] < ALPHA).sum()), 'health conditions according to ANOVA tests.') print( 'Found {} genes that influence'.format( ((anova_table['p_value'] < ALPHA) & (anova_table['shapiro_p_value'] > ALPHA) & (anova_table['levene_p_value'] > ALPHA)).sum()), 'health conditions according to ANOVA Shapiro-Wilks and Levene tests.') return anova_table
def _homogeneity_tests(self): df = self.__df homogeneityTests = pd.DataFrame( { "Test Statistic": [ stats.levene(df.iloc[:, 0], df.iloc[:, 1])[0], stats.bartlett(df.iloc[:, 0], df.iloc[:, 1])[0] ], "P-value": [ stats.levene(df.iloc[:, 0], df.iloc[:, 1])[1], stats.bartlett(df.iloc[:, 0], df.iloc[:, 1])[1] ] }, index=["Levene", "Bartlett"]) return round(homogeneityTests, 3)
def ttestForTwoChoiceQuestions(xValues, yValues): npArrayX = np.array(xValues) npArrayY = np.array(yValues) # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.normaltest.html#scipy.stats.normaltest xIsNormal = isNormal(npArrayX) yIsNormal = isNormal(npArrayY) if xIsNormal and yIsNormal: # Levene test for equal variances # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.levene.html#scipy.stats.levene l, lp = stats.levene(npArrayX, npArrayY) parametric = xIsNormal and yIsNormal and lp >- 0.05 else: parametric = False if parametric: # if levene test comes out well and samples are normal, can use standard t-test for independent samples # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html#scipy.stats.ttest_ind t, tp = stats.ttest_ind(xValues, yValues, axis=0) else: # if not, use Kruskal-Wallis H-test instead # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html#scipy.stats.kruskal t, tp = stats.kruskal(npArrayX, npArrayY) t = t / 5.0 # these come out bigger than the t-test stats return parametric, t, tp
def Simulate(self): res = Results() res.origW2 = [] res.surrW2 = [] res.stats = {} for i in range(self.nb_tree): # Generate tree t = self.treeGenerator.generate(self.tree_size) # Sample a clade allNodes = [ n for i, n in enumerate( t.ageorder_node_iter(include_leaves=True, descending=True)) if self.cladeThrMin <= i <= self.cladeThrMax ] for c in random.sample(allNodes, self.nb_clades): res.origW2.append(computeW2(t, c)) res.surrW2.append(self.surrogateStrat.generate(t, c)) # Compute stats res.stats['correl'], res.stats['correlPval'] = spearmanr( res.origW2, res.surrW2) res.stats['origW2Mean'] = np.mean(res.origW2) res.stats['origW2Std'] = np.std(res.origW2) res.stats['surrW2Mean'] = np.mean(res.surrW2) res.stats['surrW2Std'] = np.std(res.surrW2) leveneVal, res.stats['LevenePVal'] = levene(res.origW2, res.surrW2) return res
def calc_ttest(data, exp_set, control_set, tags=()): d = [ st.ttest_ind( data.ix[probeset, list(exp_set.filenames)], data.ix[probeset, list(control_set.filenames)], equal_var=False) for probeset in data.index] rs = pandas.DataFrame( index=data.index, data=d, columns=[ tm.e( tags+(("st", "t"),("tt", "welch ttest"))), tm.e( tags + (("st", "pval"), ("tt", "welch ttest"), ("mc", "nominal") ))]) rs[tm.e( tags + (("tt", "welch ttest"), ("st", "pval"), ("mc", "bonf")))] = statsmodels.sandbox.stats.multicomp.multipletests(rs.ix[:, tm.e( tags + (("st", "pval"), ("tt", "welch ttest"), ("mc", "nominal"))) ], method="bonferroni")[1] rs[tm.e( tags + (("tt", "welch ttest"), ("st", "pval"), ("mc", "bh")))] = statsmodels.sandbox.stats.multicomp.multipletests(rs.ix[:, tm.e( tags + (("st", "pval"), ("tt", "welch ttest"), ("mc", "nominal")))], method="fdr_bh")[1] d = [ st.ttest_ind( data.ix[probeset, list(exp_set.filenames)], data.ix[probeset, list(control_set.filenames)], equal_var=True) for probeset in data.index] rs[tm.e( tags+(("st", "t"),("tt", "student ttest")))] = [v[0] for v in d] rs[tm.e( tags + (("st", "pval"), ("tt", "student ttest"), ("mc", "nominal") ))] = [v[1] for v in d] rs[tm.e( tags + (("st", "pval"), ("tt", "student ttest"), ("mc", "bonf")))] = statsmodels.sandbox.stats.multicomp.multipletests(rs.ix[:, tm.e( tags + (("st", "pval"), ("tt", "student ttest"), ("mc", "nominal"))) ], method="bonferroni")[1] rs[tm.e( tags + (("st", "pval"), ("tt", "student ttest"), ("mc", "bh")))] = statsmodels.sandbox.stats.multicomp.multipletests(rs.ix[:, tm.e( tags + (("st", "pval"), ("tt", "student ttest"), ("mc", "nominal")))], method="fdr_bh")[1] # do diagnostic tests for heteroskedasticity d = [st.levene( data.ix[probeset, list(exp_set.filenames)], data.ix[probeset, list(control_set.filenames)]) for probeset in data.index ] rs[ tm.e( tags + (("tt", "levene"), ("st", "pval")))] = [z[1] for z in d] # omnibus test for normality # d = [st.normaltest( data.ix[probeset, list(exp_set.filenames)]) for probeset in data.index ] # rs[ tm.e( tags + (("tt", "d-p omnibus"), ("st", "pval"), ("cg", "exp") ))] = [z[1] for z in d] # d = [st.normaltest( data.ix[probeset, list(control_set.filenames)]) for probeset in data.index ] # rs[ tm.e( tags + (("tt", "d-p omnibus"), ("st", "pval"), ("cg", "ctrl") ))] = [z[1] for z in d] return rs
def test_data(self): args = [] for k in range(1,11): args.append(eval('g%d'%k)) W, pval = stats.levene(*args) assert_almost_equal(W,1.7059176930008939,7) assert_almost_equal(pval,0.0990829755522,7)
def test(self, arr1, arr2): p_value = 0 if self.statistics == "auto": # проверяем Левеном на равенство дисперсий. Если равны if stats.levene(arr1, arr2)[1] > 0.05: # Шапир на нормальность выборок. Если нормальные if stats.shapiro(arr1)[1] > 0.05 and stats.shapiro(arr2)[1] > 0.05: # p = Student p_value = stats.ttest_ind(arr1, arr2)[1] else: # p = Mann if equal(arr1, arr2): p_value = 1 else: p_value = stats.mannwhitneyu(arr1, arr2)[1] else: p_value = stats.ttest_ind(arr1, arr2, False)[1] elif self.statistics == "student": p_value = stats.ttest_ind(arr1, arr2)[1] elif self.statistics == "welch": p_value = stats.ttest_ind(arr1, arr2, False)[1] elif self.statistics == "mann": if equal(arr1, arr2): p_value = 1 else: p_value = stats.mannwhitneyu(arr1, arr2)[1] return p_value
def check_different_feature_with_anova_limit2(): _student_data, headerArray = loadFeatureData() for i in range(cluster_result_transferred.__len__() - 1): for j in range(i + 1, cluster_result_transferred.__len__()): print(i, j) compare_array = [i, j] print("feature,w,p,f,p_f") for feature_index in range(1, headerArray.__len__()): if headerArray[feature_index] == "unknownCount": continue _uid_to_feature_map = {} for item in _student_data: _uid_to_feature_map[item[0]] = item[feature_index] cluster_with_feature = [] for cluster_index in compare_array: feature_array = [] for uid in cluster_result_transferred[cluster_index]: if uid in _uid_to_feature_map: feature_array.append(_uid_to_feature_map[uid]) cluster_with_feature.append(feature_array) w, p = stats.levene(*cluster_with_feature) f, p_f = stats.f_oneway(*cluster_with_feature) print(headerArray[feature_index], ",", w, ",", p, ",", f, ",", p_f)
def levene(tamannoMuestras, poblacion): results = st.levene(muestra(poblacion, tamannoMuestras), muestra(poblacion, tamannoMuestras), muestra(poblacion, tamannoMuestras), muestra(poblacion, tamannoMuestras)) print("Levene Valor Estadistico %f" % results[0]) print("Levene Valor p %f" % results[1])
def homogeneity_class_covariances(X_train, y_train): dims = X_train.shape[1] y_u = np.unique(y_train) covs = [] for y in y_u: covs.append( X_train[y_train==y] ) levene_stat, levene_pval = [], [] levene_success = 0 for j in range(0, dims): L = [] for M in covs: L.append( M[:, j] ) l_stat, l_pval = levene(*L) levene_pval.append( l_pval ) if l_pval < 0.05: levene_success += 1 levene_stat.append( l_stat ) if levene_success > 0: levene_stat_avg = np.average( levene_stat, weights=levene_stat ) else: levene_stat_avg = np.nan levene_pval_avg = np.average( levene_pval, weights=levene_pval ) levene_success_ratio = levene_success / dims return levene_stat_avg, levene_pval_avg, levene_success_ratio
def test_for_side_levene(df_side, lvl=3, hue='value'): from scipy import stats columns = ['statistic', 'p-value'] index_0 = list("lvl_{}".format(i) for i in range(0, lvl)) index_1 = [ "start_cm", "rel_pt", "amp_max_cop", 'amp_max_pel', 'amp_max_c7', "vel_max_cop", 'vel_max_pel', 'vel_max_c7', "overshoot", "dcm", "dtml", "rcm" ] index = pd.MultiIndex.from_product([index_0, index_1]) n_row = len(index_0) * len(index_1) n_col = len(columns) data = np.empty((n_row, n_col)) data[:] = np.nan df = pd.DataFrame(data, index=index, columns=columns) for i in range(0, len(index_0)): for v in index_1: i0, i1 = index_0[i], v v_df = get_data_group_by_player_mean(df_side, i, v, hue=hue) df.loc[(i0, i1)] = stats.levene(v_df['left'], v_df['right']) return df
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--input', type=str, default='testData.csv', help='path to the input csv file (default: testData.csv)') parser.add_argument('--level', type=float, default=0.05, help='level of significance (default: 0.05)') args = parser.parse_args() filename = args.input level = args.level group_list = data_reader(filename) data_list = [] for group_xi in group_list: data_list.append(group_xi.data_array) data_array = np.array(data_list) statistic, p_value = stats.levene(*(data_array[i, :] for i in range(data_array.shape[0]))) if (p_value < level): print('p-value = ' + str(p_value) + '*') else: print('p-value = ' + str(p_value))
def levene(cls, xa, xb): print('2群間: 母平均の95%ルビーン検定による等分散性の検定-------------------start') _, p = st.levene(xa, xb, center='mean') if p >= 0.05: print(f'p値 = {p:.3f} // 検定結果: 帰無仮説を採択して、2つの標本には等分散性なしとは言えない') else: print(f'p値 = {p:.3f} // 検定結果: 帰無仮説を棄却して、2つの標本には等分散性なし')
def find_feature_Ttest(Data, label, use_all=True): # 两个都是list Data = np.array(Data) label = np.array(label) positiver_f = Data[label == 1] negative_f = Data[label == 0] ksresult1 = check_normality(positiver_f) ksresult2 = check_normality(negative_f) if ((ksresult1[1] > 0.05) and (ksresult2[1] > 0.05)): # 检验方差齐性 leveneresult = stats.levene(positiver_f, negative_f) if leveneresult[1] >= 0.05: ttestresult = stats.ttest_ind(positiver_f, negative_f) static_value = ttestresult[0] p_value = ttestresult[1] method = 'ttest' if leveneresult[1] < 0.05: ttestresult = stats.ttest_ind(positiver_f, negative_f, equal_var=False) static_value = ttestresult[0] p_value = ttestresult[1] method = 'ttes_adj' elif use_all: nontestresult = stats.mannwhitneyu(positiver_f, negative_f) static_value = nontestresult[0] p_value = nontestresult[1] method = 'mannwhitneyu' return [static_value, p_value, method]
def oneway_anova(df, x, y, W, H, use_hsd=True, plot=True): # mean_compare_table mean_compare_table = df.groupby(x, as_index=False)[[y]].mean() print(mean_compare_table) if plot: # plot plt.figure(figsize=(W, H)) sns.violinplot(x, y, data=df) # set group val_list = list(set(df[x])) groups = [] for val in val_list: groups.append(df.loc[df[x] == val, y].tolist()) # anova levene_test = levene(*groups) if levene_test.pvalue >= 0.05: print("方差齐") f_value, p_value = f_oneway(*groups) else: print("方差不齐") f_value, p_value = f_oneway(*groups) # 实际都使用f_oneway #h_value, p_value = kruskalwallis(*groups) # 结论 print(p_value) if use_hsd: hsd = pairwise_tukeyhsd(endog=df[y], groups=df[x], alpha=0.05) print(hsd.summary()) return mean_compare_table
def _anova_assumptions(self, cl): arrays = [['Normality (Shapiro-Wilk)', 'Normality (Shapiro-Wilk)', 'Variance', 'Variance'], ['test stats', 'p-value', 'test stats', 'p-value']] temp = np.zeros((4, 1+len(self.indep_var))) index = [self.dep_var] # Experimental errors are normally distributed temp[0,0], temp[1,0] = ss.shapiro(self.ols_model.resid) if temp[1,0] > cl: # test for equal variances using Bartlett's test for i in range(len(self.indep_var)): index.append(self.indep_var[i]) list_unique = self.df[self.indep_var[i]].unique() args = [self.df.loc[self.df[self.indep_var[i]]== x].accuracy for x in list_unique] temp[2,i+1], temp[3,i+1] = ss.bartlett(*args) arrays[0][2] = arrays[0][2] + ' (Bartlett)' arrays[0][3] = arrays[0][3] + ' (Bartlett)' else: # test for equal variances using Levene's test for i in range(len(self.indep_var)): list_unique = self.df[self.indep_var[i]].unique() args = [self.df.loc[self.df[self.indep_var[i]]== x].accuracy for x in list_unique] temp[2,i+1], temp[3,i+1] = ss.levene(*args) arrays[0][2] = arrays[0][2] + ' (Levene)' arrays[0][3] = arrays[0][3] + ' (Levene)' self.anova_assump_df = pd.DataFrame(temp, index=arrays, columns=index) if self.print_output==True: print(' ------------------\n', 'ANOVA assumptions', '\n ------------------'),\ print(self.anova_assump_df, '\n') return
def ttest(ds1, ds2, p = 0.05): rlt_var, p_var = sp.levene(ds1, ds2) eq = p_var > p # If equal_variance is False, then Welch's ttest is performed rlt_tt, p_tt = sp.ttest_ind(ds1, ds2, equal_var = eq) return p_tt
def ANOVA_assumptions_test(R, N, H): # RNCH are the provided groups valid = False # test for normality ps = [] for i in [R, N, H]: shapiro_test = stats.shapiro(i) ps.append(shapiro_test.pvalue) # test for equal variances _, p = levene(R, N, H) ps.append(p) if (np.array(ps) > 0.05).all(): valid = True if valid: # stats f_oneway functions takes the groups as input and returns F and P-value fvalue, pvalue = stats.f_oneway(R, N, H) test = 'ANOVA' else: fvalue, pvalue = stats.kruskal(R, N, H) test = 'kruskal' return fvalue, pvalue, test
def compute(self, model): Cexp = ad.cross_covariance(sts_exp, binsize = 150*ms) # Now pipe model and exp into actual test pvalue = levene(model.covar.ravel(), Cexp.ravel()).pvalue self.score = LeveneScore(pvalue) return self.score
def checkParametricConditions(accuracies,alpha): print("Checking independence ") print("Ok") independence = True print("Checking normality using Shapiro-Wilk's test for normality, alpha=0.05") (W, p) = shapiro(accuracies) print("W: %f, p:%f" % (W, p)) if p < alpha: print("The null hypothesis (normality) is rejected") normality = False else: print("The null hypothesis (normality) is accepted") normality = True print("Checking heteroscedasticity using Levene's test, alpha=0.05") (W, p) = levene(*accuracies) print("W: %f, p:%f" % (W, p)) if p < alpha: print("The null hypothesis (heteroscedasticity) is rejected") heteroscedasticity = False else: print("The null hypothesis (heteroscedasticity) is accepted") heteroscedasticity = True parametric = independence and normality and heteroscedasticity return parametric
def compute_anova(df, clusters): clusters = clusters['labels'] set_cluster(df, clusters) clusters_ = set(clusters.values()) # print(stats.wilcoxon(list(df[df['#Cluster'] == '0']['SMSin']))) return stats.levene(*[df[df['#Cluster'] == c]['SMSin'] for c in clusters_])
def levene(data): """Test of equal variance. H0 = same variance. @W: thev test statistics @pval: the p-value """ W, pval = st.levene(*data) return (W, pval)
def levene_by_column(df, dummy): """Iterate Levene's test for equality of variances for each column of a DataFrame, after splitting the observations in two groups according to a dummy variable. Args: df (pd.DataFrame): The dataframe on which to perform the test. dummy (string): Name of *df* column (e.g. "Treatment"). Must represent a dummy variable (take value 0 or 1). Observations where the dummy value is missing are not considered. Returns: pd.DataFrame: A dataframe displaying, in each row, the Levene's test statistic and p-value for each column. """ df1 = df[df[dummy] == 1].drop(dummy, axis=1) df0 = df[df[dummy] == 0].drop(dummy, axis=1) levene_outcome = [] for col in df1.columns: levene_outcome.append( stats.levene(df0[col].dropna(), df1[col].dropna())) levene_df = pd.DataFrame(levene_outcome, index=df1.columns, columns=["test stat.", "p-value"]) return levene_df
def levenes_test(target, feature): ''' This function does a Levene's Test for a categorical feature PARAMETERS ---------- target: {pandas.Series} the response variable feature: {pandas.Series} the categorical feature RETURNS ------- results: {pandas.DataFrame} dataframe containing the results of the Levene's Test ''' categories = feature.unique() feature_dict = {category: target[feature==category] for category in categories} feature_tuple = (feature_dict[category] for category in categories) stat, pval = levene(*feature_tuple) results = pd.DataFrame({'Statistic': [stat], 'p-value': [pval]}) return results
def variance_test(self, group_a, group_b): print('-----Variance test--------------------------------------------------') df_a = self.df[group_a] df_b = self.df[group_b] t_l, p_l = levene(df_a, df_b) print('Statistic: {} and p-value: {} of variance comparison'.format(t_l, p_l)) print('-----END Variance test----------------------------------------------\n')
def two_sample_ttest(df, x, y, val_1, val_2, W, H, plot=True): # mean_compare_table mean_compare_table = df.groupby(x, as_index=False)[[y]].mean() print(mean_compare_table) a = df.loc[df[x] == val_1, y].tolist() b = df.loc[df[x] == val_2, y].tolist() if plot: # plot-1 plt.figure(figsize=(W, H)) sns.violinplot(x, y, data=df) # plot-2 plt.figure(figsize=(W, H)) sns.kdeplot(a, shade=True, label=val_1) sns.kdeplot(b, shade=True, label=val_2) # T-test groups = [a, b] levene_test = levene(*groups) if levene_test.pvalue >= 0.05: t_test = ttest_ind( a, b, equal_var=True) # standard independent 2 sample test else: t_test = ttest_ind(a, b, equal_var=False) # Welch's t-test p_value = t_test.pvalue # 结论 if p_value <= 0.05: print(p_value) print("%s 在 %s 上存在显著性差异" % (y, x)) else: print(p_value) print("%s 在 %s 上不存在显著性差异" % (y, x)) return mean_compare_table
def tTest(data, checking, group, group1, group2, nameGroup1, nameGroup2, x, output): output[x]['Variable'] = checking leveneResult = stats.levene(data[checking][data[group] == group1], data[checking][data[group] == group2], center='mean') summary, results = rp.ttest(group1=data[checking][data[group] == group1], group1_name=nameGroup1, group2=data[checking][data[group] == group2], group2_name=nameGroup2) output[x][nameGroup1 + ' N'] = round(summary.iloc[0]['N'], 2) output[x][nameGroup2 + ' N'] = round(summary.iloc[1]['N'], 2) output[x][nameGroup1 + ' Mean'] = round(summary.iloc[0]['Mean'], 2) output[x][nameGroup2 + ' Mean'] = round(summary.iloc[1]['Mean'], 2) output[x][nameGroup1 + ' SD'] = round(summary.iloc[0]['SD'], 2) output[x][nameGroup2 + ' SD'] = round(summary.iloc[1]['SD'], 2) output[x][nameGroup1 + ' SE'] = round(summary.iloc[0]['SE'], 2) output[x][nameGroup2 + ' SE'] = round(summary.iloc[1]['SE'], 2) if leveneResult.pvalue < 0.05: output[x]['Leneve Value'] = str(round(leveneResult.pvalue, 2)) + "****" else: output[x]['Leneve Value'] = str(round(leveneResult.pvalue, 2)) values = results.results output[x]["T-Test P Value"] = signifiant(float(values.loc[[3]])) output[x]["Cohen Effect Size"] = effectSize(float(values.loc[[6]]))
def test_equal_var(): '''Levene test for independence ''' d1 = self.d1 d2 = self.d2 #rewrite this, for now just use scipy.stats return stats.levene(d1.data, d2.data)
def return_test_results(self, arr1, arr2): test_name = "" p_value = 0 t_value = 0 levene = stats.levene(arr1, arr2)[1] if self.statistics == "auto": # проверяем Левеном на равенство дисперсий. Если равны if levene > 0.05: # Шапир на нормальность выборок. Если нормальные if stats.shapiro(arr1)[1] > 0.05 and stats.shapiro(arr2)[1] > 0.05: # p = Student test_name = "Student" result = stats.ttest_ind(arr1, arr2) t_value = result[0] p_value = result[1] else: # p = Mann test_name = "Mann" if equal(arr1, arr2): t_value = None p_value = 1 else: result = stats.mannwhitneyu(arr1, arr2) t_value = result[0] p_value = result[1] else: test_name = "Welch" result = stats.ttest_ind(arr1, arr2, False) t_value = result[0] p_value = result[1] elif self.statistics == "student": test_name = "Student" result = stats.ttest_ind(arr1, arr2) t_value = result[0] p_value = result[1] elif self.statistics == "welch": test_name = "Welch" result = stats.ttest_ind(arr1, arr2, False) t_value = result[0] p_value = result[1] elif self.statistics == "mann": test_name = "Mann" if equal(arr1, arr2): t_value = None p_value = 1 else: result = stats.mannwhitneyu(arr1, arr2) t_value = result[0] p_value = result[1] df = len(arr1) + len(arr2) - 2 return [test_name, t_value, p_value, df, levene]
def anova_oneway(): ''' One-way ANOVA: test if results from 3 groups are equal. Twenty-two patients undergoing cardiac bypass surgery were randomized to one of three ventilation groups: Group I: Patients received 50% nitrous oxide and 50% oxygen mixture continuously for 24 h. Group II: Patients received a 50% nitrous oxide and 50% oxygen mixture only dirng the operation. Group III: Patients received no nitrous oxide but received 35-50% oxygen for 24 h. The data show red cell folate levels for the three groups after 24h' ventilation. ''' # Get the data print('One-way ANOVA: -----------------') inFile = 'altman_910.txt' data = np.genfromtxt(inFile, delimiter=',') # Sort them into groups, according to column 1 group1 = data[data[:,1]==1,0] group2 = data[data[:,1]==2,0] group3 = data[data[:,1]==3,0] # --- >>> START stats <<< --- # First, check if the variances are equal, with the "Levene"-test (W,p) = stats.levene(group1, group2, group3) if p<0.05: print(('Warning: the p-value of the Levene test is <0.05: p={0}'.format(p))) # Do the one-way ANOVA F_statistic, pVal = stats.f_oneway(group1, group2, group3) # --- >>> STOP stats <<< --- # Print the results print('Data form Altman 910:') print((F_statistic, pVal)) if pVal < 0.05: print('One of the groups is significantly different.') # Elegant alternative implementation, with pandas & statsmodels df = pd.DataFrame(data, columns=['value', 'treatment']) model = ols('value ~ C(treatment)', df).fit() anovaResults = anova_lm(model) print(anovaResults) # Check if the two results are equal. If they are, there is no output np.testing.assert_almost_equal(F_statistic, anovaResults['F'][0]) return (F_statistic, pVal) # should be (3.711335988266943, 0.043589334959179327)
def run(self): if len(self._data) < self._min_size: pass if len(self._data.groups.values()) <= 1: raise NoDataError("Equal variance test requires at least two numeric vectors.") if NormTest(self._data, display=False, alpha=self._alpha).p_value > self._alpha: statistic, p_value = bartlett(*self._data.groups.values()) r = 'Bartlett' self._results.update({'p value': p_value, self._statistic_name[r]: statistic, 'alpha': self._alpha}) else: statistic, p_value = levene(*self._data.groups.values()) r = 'Levene' self._results.update({'p value': p_value, self._statistic_name[r]: statistic, 'alpha': self._alpha}) self._test = r self._name = self._names[r]
def cep_ttest(sample_a, sample_b): ''' Sample A and Sample B are array-like data stores Ideally they should be numpy arrays or pandas Series So we can perform mean and standard deviation calculations with them The function will return a dictionary with the following entries: "test": "Standard" (equal variance) or "Welch" (not equal variance) "pval": P-value of the test performed "verdict": "Not significant" or effect size specified "cohen": Cohen's d value "sign": blank, ".", "*", "**", or "***" depending on p-value and significance "g1_n": response count in sample_a "g2_n": response count in sample_b ''' # Construct a result_dict result_dict = {} # First, perform a Levene's test to determine whether the samples have equal variances equal_var_test = levene(sample_a, sample_b, center='mean') # The significance stat is the second element in the result tuple equal_var_test_sig = equal_var_test[1] # Then, depending on the result, we'll perform either a standard or a Welch's test # If there's no result, then end test here if pd.isnull(equal_var_test_sig): result_dict['test'] = 'N/A' else: if equal_var_test_sig >= SIG_LEVEL: equal_var_arg = True result_dict['test'] = 'Standard' elif equal_var_test_sig < SIG_LEVEL: equal_var_arg = False result_dict['test'] = 'Welch' ttest_result = ttest_ind(sample_a, sample_b, axis=0, equal_var=equal_var_arg) ttest_result_sig = ttest_result[1] result_dict['pval'] = ttest_result_sig # If it's not significant, end here # Translate result here mean_diff = sample_a.mean() - sample_b.mean() verdict, sign, cohens_d = translate_result(ttest_result_sig, mean_diff, sample_a, sample_b) result_dict['cohen'] = cohens_d result_dict['verdict'] = verdict result_dict['sign'] = sign result_dict['g1_n'] = sample_a.count() result_dict['g2_n'] = sample_b.count() result_dict['g1_mean'] = sample_a.mean() result_dict['g2_mean'] = sample_b.mean() return result_dict
def apply_test(data, group, test): '''applies test along axis=1 data - 2d data array group - group identity (rows) test - 'levene' for example should accept functions too ''' n_samples = data.shape[1] if test == 'levene': levene_W = np.zeros(n_samples) levene_p = np.zeros(n_samples) for t_ind in range(n_samples): levene_W[t_ind], levene_p[t_ind] = stats.levene( data[group == 0, t_ind], data[group == 1, t_ind]) return levene_W, levene_p else: raise NotImplementedError('Only levene is implemented currently...')
def anova_oneway(): ''' One-way ANOVA: test if results from 3 groups are equal. ''' # Get the data print('One-way ANOVA: -----------------') data = getData('altman_910.txt', subDir='..\Data\data_altman') # Sort them into groups, according to column 1 group1 = data[data[:, 1] == 1, 0] group2 = data[data[:, 1] == 2, 0] group3 = data[data[:, 1] == 3, 0] # First, check if the variances are equal, with the "Levene"-test (W, p) = stats.levene(group1, group2, group3) if p < 0.05: print('Warning: the p-value of the Levene test is <0.05: p={0}'.format( p)) # Do the one-way ANOVA F_statistic, pVal = stats.f_oneway(group1, group2, group3) # Print the results print('Data form Altman 910:') print((F_statistic, pVal)) if pVal < 0.05: print('One of the groups is significantly different.') # Elegant alternative implementation, with pandas & statsmodels df = pd.DataFrame(data, columns=['value', 'treatment']) model = ols('value ~ C(treatment)', df).fit() anovaResults = anova_lm(model) print(anovaResults) # Check if the two results are equal. If they are, there is no output np.testing.assert_almost_equal(F_statistic, anovaResults['F'][0]) return (F_statistic, pVal) # should be (3.711335988266943, 0.043589334959179327)
import os
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;") parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help="Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values." ) parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used") parser.add_argument( "--bias", action="store_true", default=False, help="if false,then the calculations are corrected for statistical bias", ) parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored") parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored" ) parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored") parser.add_argument( "--printextras", action="store_true", default=False, help="If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ") parser.add_argument( "--axis", type=int, default=0, help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help="the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds") parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help="lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e") parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols != None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols != None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols != None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_ ) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf is 0 and mf is 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf is 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf is 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf is 0 and mf is 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf is 0 and mf is 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf is 0 and mf is 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf is 0 and mf is 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf is 0 and mf is 0: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf is 0 and mf is 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf is 0 and mf is 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf is 0 and mf is 0: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf is 0 and mf is 0: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf is 0 and mf is 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda is 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two) ) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind( map(float, sample_one), map(float, sample_two), equal_var=args.equal_var ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_ ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two) ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples ) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
ejecucion['medRHV'] = numpy.mean(RHVs) ejecucion['stdRHV'] = numpy.std(RHVs) ejecucion['ksRHVpval'] = stats.kstest(RHVs,'norm', args=(ejecucion['medRHV'],ejecucion['stdRHV'])).pvalue if ejecucion['stdRHV'] else 0 ejecucion['shapiroRHVpval'] = stats.shapiro(RHVs)[1] if ejecucion['stdRHV'] else 0 ejecucion['medGD'] = numpy.mean(gds) ejecucion['stdGD'] = numpy.std(gds) ejecucion['ksGDpval'] = stats.kstest(gds,'norm', args=(ejecucion['medGD'],ejecucion['stdGD'])).pvalue if ejecucion['stdGD'] else 0 ejecucion['shapiroGDpval'] = stats.shapiro(gds)[1] if ejecucion['stdGD'] else 0 if RHVsAnteriores: ejecucion["tstudentRHV"] = stats.ttest_ind(RHVsAnteriores, RHVs, equal_var=False).pvalue ejecucion["leveneRHV"] = stats.levene(RHVsAnteriores, RHVs).pvalue else: ejecucion["tstudentRHV"] = "-" ejecucion["leveneRHV"] = "-" if gdsAnteriores: ejecucion["tstudentGD"] = stats.ttest_ind(gdsAnteriores, gds, equal_var=False).pvalue ejecucion["leveneGD"] = stats.levene(gdsAnteriores, gds).pvalue else: ejecucion["tstudentGD"] = "-" ejecucion["leveneGD"] = "-" RHVsAnteriores = RHVs gdsAnteriores = gds with open(PATH_JSON_EJECUCION,'w') as f:
def levene((x, y)): return stats.levene(x, y)
def test_result_attributes(self): args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10] res = stats.levene(*args) attributes = ('statistic', 'pvalue') check_named_results(res, attributes)
#hfmt = dates.DateFormatter('%H:%M') #ax.xaxis.set_major_formatter(hfmt) # y_formatter = mpl.ticker.ScalarFormatter(useOffset=False) # ax.yaxis.set_major_formatter(y_formatter) # ax.grid(True) f.suptitle("Dichte der Leistungsgradienten") f.autofmt_xdate() plt.savefig("images/sonnenfinsternis-dichte-gradienten.png")#, bbox_inches='tight') plt.clf() friday_series, friday_vals = ecdf.get_ecdf(friday_momentum_df.momentum) ecdf.plot_ecdf_curve(friday_series, friday_vals, color="b", label="Typischer Freitag") eclipse_series, eclipse_vals = ecdf.get_ecdf(eclipse_momentum_df.momentum) ecdf.plot_ecdf_curve(eclipse_series, eclipse_vals, color="r", label="Sonnenfinsternis") print "Mittelwert alle Freitage: %f" % np.median(friday_momentum_df.momentum) print "Mittelwert Sonnenfinsternis: %f" % np.median(eclipse_momentum_df.momentum) # http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.levene.html#scipy.stats.levene W, p_val = stats.levene(friday_momentum_df.momentum, eclipse_momentum_df.momentum, center='median') print ("Levenes Test auf Gleichheit der Varianz: P=%s (gleiche Varianz für p<=0.05)" % p_val) W, p_val = stats.fligner(friday_momentum_df.momentum, eclipse_momentum_df.momentum) print "Fliegners Test auf Gleichheit der Varianz: P=%s" % p_val f.suptitle("ECDF der Leistungsgradienten: Ungleiche Varianzen (Levene, p=%f)" % p_val) plt.savefig("images/sonnenfinsternis-ecdf-gradienten.png")#, bbox_inches='tight')
def test_data(self): args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10] W, pval = stats.levene(*args) assert_almost_equal(W,1.7059176930008939,7) assert_almost_equal(pval,0.0990829755522,7)
#Report some descriptive statistics print stroop.describe() #visualizations (with seaborn style) import matplotlib.pyplot as plot import seaborn as sns # Do the boxplot plot.show(sns.boxplot(stroop)) # Do the violinplot plot.show(sns.violinplot(stroop, widths = 0.5)) """ # Do the distribution plot sns.distplot(stroop['Congruent'],kde=False, color= "b") """ from scipy.stats import levene print levene(stroop.Congruent, stroop.Incongruent) from scipy.stats import kstest print 'ks_con', kstest(stroop.Congruent, 'norm') print 'ks_inc', kstest(stroop.Incongruent, 'norm') from scipy.stats import ks_2samp ks_2samp(stroop.Congruent, stroop.Incongruent) # Do the t-test import scipy.stats as ss print ss.ttest_ind(stroop.Congruent, stroop.Incongruent)
def f_test(self,test_series): """F-test of equal variances.""" # print(stats.bartlett(self.series,test_series)) # return stats.f.sf(F, df1, df2) return stats.levene(self.series,test_series)
def main(): def n_digits(num): if num <= 1: return 1 return math.ceil(math.log(num) / math.log(10)) db = sqlite.connect(db_fn) dbc = db.cursor() rows = [] integer_digits = {'best': 0, 'best_time': 0, 'mean': 0, 'stddev': 0} allvals = [] allvals_dict = {} for variant in VARIANTS: query = ("select tw from (select min(treewidth) as tw from validationresults where variant='%(variant)s' and instance='%(instance)s' group by seed)") result = dbc.execute(query % {'variant': variant, 'instance': instance}) vals = NP.array([row[0] for row in result]) min, mean, stddev = vals.min(), vals.mean(), vals.std() # print('%s: vals=%r' % (variant, vals), file=sys.stderr) W, p = STATS.shapiro(vals) print('%s: normal distribution? shapiro-wilk: W=%s (p=%s) %s@5%% %s@2%%' % (variant, W, p, 'no' if W <= .905 else 'yes', 'no' if W <= .884 else 'yes'), file=sys.stderr) z, p = STATS.skewtest(vals) print('%s: normal distribution? skew test: (z=%s) p=%s => %s' % (variant, z, p, 'no' if p < .5 else 'yes'), file=sys.stderr) allvals.append(vals) allvals_dict[variant] = vals query = ("select min(runtime_s)" " from validationresults" " where variant='%(variant)s' and instance='%(instance)s' and treewidth='%(treewidth)s'") result = dbc.execute(query % {'variant': variant, 'instance': instance, 'treewidth': min}) best_time = [row[0] for row in result][0] # print("%s: best=%s @ %ss, avg=%s +- %s" % (variant, min, best_time, mean, stddev), file=sys.stderr) row = {'variant': variant, 'best': min, 'best_time': round(best_time, 1), 'mean': round(mean, 1), 'stddev': round(stddev, 1)} rows.append(row) integer_digits['best'] = max(integer_digits['best'], n_digits(row['best'])) integer_digits['best_time'] = max(integer_digits['best_time'], n_digits(row['best_time'])) integer_digits['mean'] = max(integer_digits['mean'], n_digits(row['mean'])) integer_digits['stddev'] = max(integer_digits['stddev'], n_digits(row['stddev'])) db.close() T, p = STATS.bartlett(*allvals) print('equal variances? bartlett: T=%s (p=%s) [vs Chi-Quadrat_{k-1=%s, alpha=.5}]' % (T, p, len(allvals) - 1), file=sys.stderr) W, p = STATS.levene(*allvals, center='mean') print('equal variances? levene (mean): (W=%s) p=%s' % (W, p), file=sys.stderr) W, p = STATS.levene(*allvals, center='median') print('equal variances? levene (median): (W=%s) p=%s' % (W, p), file=sys.stderr) F, p = STATS.f_oneway(*allvals) print('equal means? one-way ANOVA: F=%s, p=%s [vs F_{k-1=%s,n-k=%s}]' % (F, p, len(allvals) - 1, sum([len(x) for x in allvals]) - len(allvals)), file=sys.stderr) try: W, p = STATS.kruskal(*allvals) print('equal means? kruskal wallis: W=%s, p=%s' % (W, p), file=sys.stderr) except Exception as e: print(e) lsd = LSD.LSD(allvals, .05) print('LSD: %r' % lsd, file=sys.stderr) print(statsmodels.stats.multicomp.pairwise_tukeyhsd(NP.array(allvals).ravel(), NP.array([[x] * 20 for x in VARIANTS]).ravel(), alpha=.10), file=sys.stderr) print(statsmodels.stats.multicomp.pairwise_tukeyhsd(NP.array(allvals).ravel(), NP.array([[x] * 20 for x in VARIANTS]).ravel(), alpha=.05), file=sys.stderr) def welch(var1, var2): res = STATS.ttest_ind(allvals_dict[var1], allvals_dict[var2], equal_var=False) print('%4s vs %s t,p=%r => \t%s @a=10%%, %s @a=5%%' % (var1, var2, res, 'NE' if res[1] < .01116 else ' E', 'NE' if res[1] < .00568 else ' E'), file=sys.stderr) print('pairwise Welch\'s t-test with Bonferroni correction:', file=sys.stderr) welch('IHA', 'MA1') welch('IHA', 'MA2') welch('IHA', 'MA3') welch('GAtw', 'MA1') welch('GAtw', 'MA2') welch('GAtw', 'MA3') welch('MA1', 'MA2') welch('MA1', 'MA3') welch('MA2', 'MA3') def mannwhitneyu(var1, var2): try: res = STATS.mannwhitneyu(allvals_dict[var1], allvals_dict[var2]) print('%4s vs %s u,p=%r => \t%s @a=10%%, %s @a=5%%' % (var1, var2, res, 'NE' if res[1] < .01116 else ' E', 'NE' if res[1] < .00568 else ' E'), file=sys.stderr) except Exception as e: print('%4s vs %s failed: %r' % (var1, var2, e)) print('pairwise Mann-Whitney U test with Bonferroni correction:', file=sys.stderr) mannwhitneyu('IHA', 'MA1') mannwhitneyu('IHA', 'MA2') mannwhitneyu('IHA', 'MA3') mannwhitneyu('GAtw', 'MA1') mannwhitneyu('GAtw', 'MA2') mannwhitneyu('GAtw', 'MA3') mannwhitneyu('MA1', 'MA2') mannwhitneyu('MA1', 'MA3') mannwhitneyu('MA2', 'MA3') #latex = [r'\begin{sidefigure}{caption={Results for instance \Instance{%(instanceTexEsc)s}},label={fig:%(instanceFileEsc)s-results},place={htbp}}''\n' #r' \begin{center}''\n' latex = [r'\begin{table}[hbtp]''\n' r' \caption{Results for instance \Instance{%(instanceTexEsc)s}}''\n' r' \label{fig:%(instanceFileEsc)s-results}''\n' r' \centering\small''\n' r' \begin{tabular}{l S[table-format=%(best)s] S[table-format=%(best_time)s.1]%%''\n' r' S[table-format=%(mean)s.1,table-number-alignment=right] @{$\,\pm\,$} S[table-format=%(stddev)s.1,table-number-alignment=left]''\n' r' S[table-format=2]} \toprule''\n' r' & \multicolumn{2}{c}{\header{Best}} & \multicolumn{2}{c}{\header{Average}} & \\ \cmidrule(lr){2-3}\cmidrule(lr){4-5}''\n' r' & \header{treewidth} & \header{seconds} & \multicolumn{2}{c}{\header{treewidth}} & \header{samples} \\ \midrule' % dict(integer_digits.items() | dict(instanceTexEsc=instance.replace('_', r'\textunderscore{}'), instanceFileEsc=instance.replace('_', '-')).items())] for row in rows: latex.append(' ' * (3 * 3) + ' & '.join([row['variant'], str(row['best']), str(row['best_time']), str(row['mean']), str(row['stddev']), "20"]) + r'\\') latex.append(r' \bottomrule''\n' r' \end{tabular}''\n' r'\end{table}') #r' \end{center}''\n' #r'\end{sidefigure}') with open('validation-validationset-%s-results.tex' % instance.replace('_', '-'), 'w') as f: print('\n'.join(latex), file=f)
def get_levene(group1, group2): lev_w, lev_p_value = levene(group1, group2) return (lev_p_value, lev_w)
for fiber in fiber_list: mod = Model(lambda x, a, b: a * x + b) slope_displ = mod.fit(fiber.binned_exp['static_fr_mean'], x=fiber.binned_exp['displ_mean'], a=1, b=1).best_values['a'] slope_force = mod.fit(fiber.binned_exp['static_fr_mean'], x=fiber.binned_exp['force_mean'], a=1, b=1).best_values['a'] slope_displ_list.append(slope_displ) slope_force_list.append(slope_force) slope_displ_arr = np.array(slope_displ_list) slope_force_arr = np.array(slope_force_list) sensitivity_df = pd.DataFrame( np.c_[slope_displ_arr, slope_force_arr], index=['#' + str(i+1) for i in range(slope_displ_arr.size)], columns=['Displacement sensitivity (Hz/mm)', 'Force sensitivity (Hz/mN)']) for column in sensitivity_df.columns: sensitivity_df[column[:5] + '_normalized'] = sensitivity_df[column] /\ sensitivity_df[column].median() sensitivity_df.transpose().to_excel('./csvs/sensitivity.xlsx') print(sensitivity_df.var()) from scipy.stats import f, bartlett, levene print(f.cdf(sensitivity_df['Displ_normalized'].var() / sensitivity_df['Force_normalized'].var(), sensitivity_df.shape[0], sensitivity_df.shape[0])) print(bartlett(sensitivity_df['Displ_normalized'], sensitivity_df['Force_normalized'])) print(levene(sensitivity_df['Displ_normalized'], sensitivity_df['Force_normalized']))