def test_weightstats_2(self): x1, x2 = self.x1, self.x2 w1, w2 = self.w1, self.w2 d1 = DescrStatsW(x1) d1w = DescrStatsW(x1, weights=w1) d2w = DescrStatsW(x2, weights=w2) x1r = d1w.asrepeats() x2r = d2w.asrepeats() # print 'random weights' # print ttest_ind(x1, x2, weights=(w1, w2)) # print stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], stats.ttest_ind(x1r, x2r), 14) #not the same as new version with random weights/replication # assert x1r.shape[0] == d1w.sum_weights # assert x2r.shape[0] == d2w.sum_weights assert_almost_equal(x2r.var(), d2w.var, 14) assert_almost_equal(x2r.std(), d2w.std, 14) #one-sample tests # print d1.ttest_mean(3) # print stats.ttest_1samp(x1, 3) # print d1w.ttest_mean(3) # print stats.ttest_1samp(x1r, 3) assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11) assert_almost_equal(d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)
def _matched_p_value(self, statmatch): """ Compute p-values for the difference of matched means test for every matching variable using vectorized operations :param statmatch: StatisticalMatching instance that has been fitted :return: NumPy array containing t-stats for each matching variable """ def get_match_weights(matches): """ Takes a list of match indicies and counts duplicates to determine weights :param matches: Pandas or numpy array representing matches :return: Array of weights """ weights = defaultdict(lambda: 0) match_indicies = matches[np.isfinite(matches)] for value in match_indicies: weights[value] += 1 return np.asarray(weights.values()) has_match = np.isfinite(statmatch.matches) match_index = np.asarray(statmatch.matches[has_match], dtype=np.int32) unique_matches = np.unique(match_index) # don't repeat weighted obs weights = get_match_weights(statmatch.matches) treated = np.array(statmatch.design_matrix[statmatch.names][has_match]) control = np.array( statmatch.design_matrix[statmatch.names].ix[unique_matches]) (_, pvalue, _) = ttest_ind(treated, control, weights=(None, weights)) return pvalue
def test_ttest_ind_with_uneq_var(): #from scipy # check vs. R a = (1, 2, 3) b = (1.1, 2.9, 4.2) pr = 0.53619490753126731 tr = -0.68649512735572582 t, p, df = ttest_ind(a, b, usevar='unequal') assert_almost_equal([t, p], [tr, pr], 13) a = (1, 2, 3, 4) pr = 0.84354139131608286 tr = -0.2108663315950719 t, p, df = ttest_ind(a, b, usevar='unequal') assert_almost_equal([t, p], [tr, pr], 13)
def independent_t_test(pdf, var_name, grouping_name): """Independent samples t-test arguments: var_name (str): grouping_name (str): """ from statsmodels.stats.weightstats import ttest_ind text_result = '' dummy_groups, [var1, var2] = _split_into_groups(pdf, var_name, grouping_name) var1 = var1.dropna() var2 = var2.dropna() t, p, df = ttest_ind(var1, var2) # CI http://onlinestatbook.com/2/estimation/difference_means.html # However, there are other computtional methods: # http://dept.stat.lsa.umich.edu/~kshedden/Python-Workshop/stats_calculations.html # http://www.statisticslectures.com/topics/ciindependentsamplest/ mean_diff = np.mean(var1) - np.mean(var2) sse = np.sum((np.mean(var1) - var1)**2) + np.sum((np.mean(var2) - var2)**2) mse = sse / (df) nh = 2.0 / (1.0 / len(var1) + 1.0 / len(var2)) s_m1m2 = np.sqrt(2 * mse / (nh)) t_cl = stats.t.ppf(1 - (0.05 / 2), df) # two-tailed lci = mean_diff - t_cl * s_m1m2 hci = mean_diff + t_cl * s_m1m2 prec = cs_util.precision(var1.append(var2)) + 1 text_result += _('Difference between the two groups:') +' %0.*f, ' % (prec, mean_diff) + \ _('95%% confidence interval [%0.*f, %0.*f]') % (prec, lci, prec, hci)+'\n' text_result += _('Result of independent samples t-test:')+' <i>t</i>(%0.3g) = %0.3g, %s\n' % \ (df, t, cs_util.print_p(p)) return text_result
def test_ttest_ind_with_uneq_var(): # from scipy # check vs. R a = (1, 2, 3) b = (1.1, 2.9, 4.2) pr = 0.53619490753126731 tr = -0.68649512735572582 t, p, df = ttest_ind(a, b, usevar='unequal') assert_almost_equal([t, p], [tr, pr], 13) a = (1, 2, 3, 4) pr = 0.84354139131608286 tr = -0.2108663315950719 t, p, df = ttest_ind(a, b, usevar='unequal') assert_almost_equal([t, p], [tr, pr], 13)
def test_weightstats_2(self): x1, x2 = self.x1, self.x2 w1, w2 = self.w1, self.w2 d1 = DescrStatsW(x1) d1w = DescrStatsW(x1, weights=w1) d2w = DescrStatsW(x2, weights=w2) x1r = d1w.asrepeats() x2r = d2w.asrepeats() # print 'random weights' # print ttest_ind(x1, x2, weights=(w1, w2)) # print stats.ttest_ind(x1r, x2r) assert_almost_equal( ttest_ind(x1, x2, weights=(w1, w2))[:2], stats.ttest_ind(x1r, x2r), 14) #not the same as new version with random weights/replication # assert x1r.shape[0] == d1w.sum_weights # assert x2r.shape[0] == d2w.sum_weights assert_almost_equal(x2r.mean(0), d2w.mean, 14) assert_almost_equal(x2r.var(), d2w.var, 14) assert_almost_equal(x2r.std(), d2w.std, 14) #note: the following is for 1d assert_almost_equal(np.cov(x2r, bias=1), d2w.cov, 14) #assert_almost_equal(np.corrcoef(np.x2r), d2w.corrcoef, 19) #TODO: exception in corrcoef (scalar case) #one-sample tests # print d1.ttest_mean(3) # print stats.ttest_1samp(x1, 3) # print d1w.ttest_mean(3) # print stats.ttest_1samp(x1r, 3) assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11) assert_almost_equal( d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)
def t_test( x1: np.ndarray, x2: np.ndarray, ) -> Dict[str, float]: """Conducts Welch's t-test on two samples. Args: x1: Array of data from group 1. x2: Array of data from group 2. Returns: Dictionary containing the t-statistic, p-value, degrees of freedom, difference in means between the groups, confidence interval lower bound, and confidence interval upper bounds. """ t_stat, p_value, df = ttest_ind( x1=x1, x2=x2, usevar='unequal', ) diff_means = x2.mean() - x1.mean() cm = CompareMeans(DescrStatsW(x2), DescrStatsW(x1)) ci_lower, ci_upper = cm.tconfint_diff(usevar='unequal') results = { 't_stat': t_stat, 'p_value': p_value, 'df': df, 'diff_means': diff_means, 'ci_lower': ci_lower, 'ci_upper': ci_upper, } return results
def get_result_data_scores_of_reviews(trait, reviews, authors_higher, authors_lower): all_data = [] all_reviews_for_group_higher = [[author, anime, grade] for author, anime, grade in reviews if author in authors_higher] all_reviews_for_group_lower = [[author, anime, grade] for author, anime, grade in reviews if author in authors_lower] for genre in all_genres: reviews_for_genre_higher = [ grade for author, anime, grade in all_reviews_for_group_higher if genre in anime_genres[str(anime)] ] reviews_for_genre_lower = [ grade for author, anime, grade in all_reviews_for_group_lower if genre in anime_genres[str(anime)] ] avg_score_higher = sum(reviews_for_genre_higher) / len( reviews_for_genre_higher) avg_score_lower = sum(reviews_for_genre_lower) / len( reviews_for_genre_lower) t, p, df = ttest_ind(reviews_for_genre_higher, reviews_for_genre_lower) cd = cohend( avg_score_higher, avg_score_lower, reviews_for_genre_higher, reviews_for_genre_lower, ) diff = avg_score_higher - avg_score_lower all_data.append([ genre, avg_score_higher, avg_score_lower, diff, t, df, p, cd, trait ]) return all_data
def test_tost_asym(): x1, x2 = clinic[:15, 2], clinic[15:, 2] #Note: x1, x2 reversed by definition in multeq.dif assert_almost_equal(x2.mean() - x1.mean(), tost_clinic_1_asym.estimate, 13) resa = smws.ttost_ind(x2, x1, -1.5, 0.6, usevar='unequal') assert_almost_equal(resa[0], tost_clinic_1_asym.p_value, 13) #multi-endpoints, asymmetric bounds, vectorized resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') assert_almost_equal(resall[0], tost_clinic_all_no_multi.p_value, 13) #SMOKE tests: foe multi-endpoint vectorized, k on k resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], np.exp([-1.0, -1.0, -1.5, -1.5, -1.5]), 0.6, usevar='unequal', transform=np.log) resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal', transform=np.exp) resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, transform=np.log) resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, transform=np.exp) resall = smws.ttest_ind(clinic[15:, 2:7], clinic[:15, 2:7], value=[-1.0, -1.0, -1.5, -1.5, -1.5]) #k on 1: compare all with reference resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') resa3_2 = smws.ttost_ind(clinic[15:, 3:4], clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') assert_almost_equal(resall[0][1], resa3_2[0][1], decimal=13) resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5], 0.6, usevar='unequal') resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5], np.repeat(0.6, 5), usevar='unequal')
def test_weightstats_2(self): x1, x2 = self.x1, self.x2 w1, w2 = self.w1, self.w2 d1 = DescrStatsW(x1) d1w = DescrStatsW(x1, weights=w1) d2w = DescrStatsW(x2, weights=w2) x1r = d1w.asrepeats() x2r = d2w.asrepeats() # print 'random weights' # print ttest_ind(x1, x2, weights=(w1, w2)) # print stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], stats.ttest_ind(x1r, x2r), 14) # not the same as new version with random weights/replication # assert x1r.shape[0] == d1w.sum_weights # assert x2r.shape[0] == d2w.sum_weights assert_almost_equal(x2r.mean(0), d2w.mean, 14) assert_almost_equal(x2r.var(), d2w.var, 14) assert_almost_equal(x2r.std(), d2w.std, 14) # note: the following is for 1d assert_almost_equal(np.cov(x2r, bias=1), d2w.cov, 14) # assert_almost_equal(np.corrcoef(np.x2r), d2w.corrcoef, 19) # TODO: exception in corrcoef (scalar case) # one-sample tests # print d1.ttest_mean(3) # print stats.ttest_1samp(x1, 3) # print d1w.ttest_mean(3) # print stats.ttest_1samp(x1r, 3) assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11) assert_almost_equal(d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)
def find_sample_size(sample_sizes, mean, sd): for i in range(0, len(sample_sizes)): N = sample_sizes[i] # create our control data from our normal distribution control_data = norm.rvs(loc=mean, scale=sd, size=N) # Multiply the control data by the relative effect variant_data = control_data * MDE significance_results = [] for j in range(0, simulations): # Randomly allocate the sample data to the control and variant rv = binom.rvs(1, 0.5, size=N) control_sample = control_data[rv == True] variant_sample = variant_data[rv == False] # Welch's t-test test_result = ttest_ind(control_sample, variant_sample, alternative=alternative, usevar='unequal') # Test for significance significance_results.append(test_result[1] <= alpha) # The power is the number of times we have a significant result power = np.mean(significance_results) if power > target_power: return N # never reached power 0.8 return sample_sizes[-1]
def unmatched_t_statistic(self): """ Calculate the t-statistics of the unmatched standard error """ treated = self.outcome[self.treated] controlled = self.outcome[~self.treated] (tstat, _, _) = ttest_ind(treated, controlled) return tstat
def execute(): pd.set_option('display.max_columns', None) # Hypothesis Test1 career_stats = pd.read_csv('PJ Phase1\\5.player_career_stats_cleaned.csv') pros = career_stats[career_stats['PTS'] > 10] norms = career_stats[career_stats['PTS'] <= 10] pros_fg_percentage = np.asarray(pros['FG%']) norms_fg_percentage = np.asarray(norms['FG%']) ttest = ttest_ind(pros_fg_percentage, norms_fg_percentage) print(ttest) # Hypothesis Test 2 player_stats = pd.read_csv('PJ Phase1\\2.player_stats_cleaned.csv') df = player_stats.groupby(['Name'], sort=False, as_index=False).agg(lambda x: x.value_counts().index[0]) df = df[['Name', 'Pos']] career_stats_with_pos = pd.merge(career_stats, df, sort=False) print(career_stats_with_pos.head()) print(career_stats_with_pos['Pos'].value_counts()) c = np.asarray(career_stats_with_pos[career_stats_with_pos['Pos']=='C']['PTS']) sf = np.asarray(career_stats_with_pos[career_stats_with_pos['Pos']=='SF']['PTS']) sg = np.asarray(career_stats_with_pos[career_stats_with_pos['Pos']=='SG']['PTS']) pg = np.asarray(career_stats_with_pos[career_stats_with_pos['Pos']=='PG']['PTS']) pf = np.asarray(career_stats_with_pos[career_stats_with_pos['Pos']=='PF']['PTS']) pos = [c, sf, sg, pg, pf] names = ['c', 'sf', 'sg', 'pg', 'pf'] for i in range(5): for j in range(5): ttest = ttest_ind(pos[i], pos[j]) print(names[i], names[j], ttest) print(career_stats.head()) y = np.asarray(career_stats['PTS']) x = np.asarray(career_stats['FG%']) x2 = sm.add_constant(x) est = sm.OLS(y, x2) est2 = est.fit() print(est2.summary())
def unmatched_p_value(self): """ Calculate the t-statistics of the unmatched standard error """ treated = self.outcome[self.treated] controlled = self.outcome[~self.treated] (_, pvalue, _) = ttest_ind(treated, controlled) return pvalue
def fit(self, pos, neg): tstat, pvalue, df = ttest_ind(pos[self.col], neg[self.col]) diff = np.mean(pos[self.col]) - np.mean(neg[self.col]) return { 'feature': self.col, 'tstats': tstat, 'pvalue': pvalue, 'diff(pos-neg)': diff }
def p_value_analytically(group_a_scores, group_b_scores): from statsmodels.stats.weightstats import ttest_ind t, p, dof = ttest_ind( group_a_scores, group_b_scores, alternative='larger', usevar='unequal' ) # ... But what question is this answering? return p
def compare_wines(wines_a, wines_b, results_filename="_comparison.csv"): print(f"comparing '{wines_a.name}' and '{wines_b.name}'") index = ["var", "t", "P", "DoF"] t_test_results = pd.DataFrame(columns=index) for var in variables: s = pd.Series([var, *ttest_ind(wines_a[var], wines_b[var])], index=index) t_test_results = t_test_results.append(s, ignore_index=True) print(t_test_results) t_test_results.to_csv(wines_a.name + wines_b.name + results_filename)
def _unmatched_p_value(self, statmatch): """ Compute p-values for the difference of means test for every matching variable using vectorized operations :param statmatch: StatisticalMatching instance that has been fitted :return: NumPy array containing t-stats for each matching variable """ treated = np.array( statmatch.design_matrix[statmatch.names][statmatch.treated]) control = np.array( statmatch.design_matrix[statmatch.names][~statmatch.treated]) (_, pvalue, _) = ttest_ind(treated, control) return pvalue
def roiwise_stats(epi_data, nonepi_data): atlas_labels = '/ImagePTE1/ajoshi/code_farm/svreg/USCLobes/BCI-DNI_brain.label.nii.gz' at_labels = np.asanyarray(ni.load_img(atlas_labels).dataobj) # roi_list = [ # 3, 100, 101, 184, 185, 200, 201, 300, 301, 400, 401, 500, 501, 800, # 850, 900, 950 # ] #roi_list = [301, 300, 401, 400, 101, 100, 201, 200, 501, 500, 900] roi_list = [ 100, 101, 184, 185, 200, 201, 300, 301, 400, 401, 500, 501, 900 ] #roi_list = [300,301] #roi_list = np.unique(at_labels.flatten()) #roi_list = [3, 100, 101, 184, 185, 200, 201, 300, # 301, 400, 401, 500, 501, 800, 850, 900] epi_roi_lesion_vols = np.zeros((37, len(roi_list))) nonepi_roi_lesion_vols = np.zeros((37, len(roi_list))) for i, roi in enumerate(roi_list): msk = at_labels == roi epi_roi_lesion_vols[:, i] = np.sum(epi_data[:, msk], axis=1) nonepi_roi_lesion_vols[:, i] = np.sum(nonepi_data[:, msk], axis=1) ''' For the whole brain comparison msk = at_labels > 0 epi_roi_lesion_vols[:, len(roi_list)] = np.sum(epi_data[:, msk], axis=1) nonepi_roi_lesion_vols[:, len(roi_list)] = np.sum(nonepi_data[:, msk], axis=1) ''' t, p, _ = ttest_ind(epi_roi_lesion_vols, nonepi_roi_lesion_vols) F = epi_roi_lesion_vols.var(axis=0) / (nonepi_roi_lesion_vols.var(axis=0) + 1e-6) pval = 1 - ss.f.cdf(F, 37 - 1, 37 - 1) roi_list = np.array(roi_list) print('significant rois in t-test are') print(roi_list[p < 0.05]) print('significant rois in f-test are') print(roi_list[pval < 0.05]) _, pval_fdr = fdrcorrection(pval) print('significant rois in f-test after FDR correction are') print(roi_list[pval_fdr < 0.05]) w, s = shapiro(epi_roi_lesion_vols) print(w, s) return epi_roi_lesion_vols, nonepi_roi_lesion_vols
def test_ttest(): x1, x2 = clinic[:15, 2], clinic[15:, 2] all_tests = [] t1 = smws.ttest_ind(x1, x2, alternative='larger', usevar='unequal') all_tests.append((t1, ttest_clinic_indep_1_g)) t2 = smws.ttest_ind(x1, x2, alternative='smaller', usevar='unequal') all_tests.append((t2, ttest_clinic_indep_1_l)) t3 = smws.ttest_ind(x1, x2, alternative='smaller', usevar='unequal', value=1) all_tests.append((t3, ttest_clinic_indep_1_l_mu)) for res1, res2 in all_tests: assert_almost_equal(res1[0], res2.statistic, decimal=13) assert_almost_equal(res1[1], res2.p_value, decimal=13) #assert_almost_equal(res1[2], res2.df, decimal=13) cm = smws.CompareMeans(smws.DescrStatsW(x1), smws.DescrStatsW(x2)) ci = cm.tconfint_diff(alternative='two-sided', usevar='unequal') assert_almost_equal(ci, ttest_clinic_indep_1_two_mu.conf_int, decimal=13) ci = cm.tconfint_diff(alternative='two-sided', usevar='pooled') assert_almost_equal(ci, ttest_clinic_indep_1_two_mu_pooled.conf_int, decimal=13) ci = cm.tconfint_diff(alternative='smaller', usevar='unequal') assert_almost_equal_inf(ci, ttest_clinic_indep_1_l.conf_int, decimal=13) ci = cm.tconfint_diff(alternative='larger', usevar='unequal') assert_almost_equal_inf(ci, ttest_clinic_indep_1_g.conf_int, decimal=13) #test get_compare cm = smws.CompareMeans(smws.DescrStatsW(x1), smws.DescrStatsW(x2)) cm1 = cm.d1.get_compare(cm.d2) cm2 = cm.d1.get_compare(x2) cm3 = cm.d1.get_compare(np.hstack((x2, x2))) #all use the same d1, no copying assert_(cm.d1 is cm1.d1) assert_(cm.d1 is cm2.d1) assert_(cm.d1 is cm3.d1)
def groupTest(g1, g2, test_type, direction='two-sided'): g1 = np.array(g1) g2 = np.array(g2) g1 = g1[~np.isnan(g1)] g2 = g2[~np.isnan(g2)] delta_psi = np.nanmean(g1) - np.nanmean(g2) tumor_foc = calcTumorFormFoc(delta_psi, np.nanmean(g1)) if test_type == 'sig': t1 = stats.ttest_ind(g1, g2)[1] elif test_type == 'equ': t1 = smw.ttest_ind(g1, g2, alternative='two-sided')[1] # t1=smw.ttost_ind(g1,g2,-threshold_tost,threshold_tost,usevar='unequal')[0] #equalvalence test return [t1, delta_psi, tumor_foc]
def mean_ttest(list_1, list_2, significance=0.05): """в пределе T-распределение сходится к Z-распредлению""" T, p_value, _ = ttest_ind(list_2, list_1, alternative='larger', usevar='unequal') if p_value <= significance / 2: decision = 'M(list_2) > M(list_1)' elif p_value >= 1 - significance / 2: decision = 'M(list_2) < M(list_1)' else: decision = 'M(list_2) ~ M(list_1)' return p_value, np.mean(list_2) - np.mean(list_1), decision
def independent_sample_test(x, y, confidence=0.95): #方差齐性检验 F, Sig = stats.levene(x, y) #t值 双尾p值 t, p_two, df = st.ttest_ind(x, y, usevar='pooled') alpha = 1 - confidence t_score = stats.t.isf(alpha / 2, df) #样本均值差值 sample_mean = x.mean() - y.mean() #合并标准差 sp = np.sqrt(((x.shape[0]-1)*np.square(x.std()) + (y.shape[0]-1)* np.square(y.std()))\ / (x.shape[0] + y.shape[0] -2)) #效应量Cohen's d d = sample_mean / sp #置信区间 SE = np.sqrt(np.square(x) / x.shape[0] + np.square(y) / y.shape[0]) lower_limit = sample_mean - t_score * SE upper_limit = sample_mean + t_score * SE t2, p_two2, df2 = st.ttest_ind(x, y, usevar='unequal') t_score2 = stats.t.isf(alpha / 2, df2) lower_limit2 = sample_mean - t_score2 * SE upper_limit2 = sample_mean + t_score2 * SE result = pd.DataFrame([[ 'F', 'Sig', 't', 'df', 'Sig(双侧)', '效应量', '均值差值', '置信水平', '置信区间下限', '置信区间上限' ], [ F, Sig, t, df, p_two, d, sample_mean, confidence, lower_limit, upper_limit ], [ None, None, t2, df2, p_two2, d, sample_mean, confidence, lower_limit2, upper_limit2 ]], index=['独立样本检验', '假设方差相等', '假设方差不相等']) return result
def test_ttest(): x1, x2 = clinic[:15, 2], clinic[15:, 2] all_tests = [] t1 = smws.ttest_ind(x1, x2, alternative='larger', usevar='unequal') all_tests.append((t1, ttest_clinic_indep_1_g)) t2 = smws.ttest_ind(x1, x2, alternative='smaller', usevar='unequal') all_tests.append((t2, ttest_clinic_indep_1_l)) t3 = smws.ttest_ind(x1, x2, alternative='smaller', usevar='unequal', value=1) all_tests.append((t3, ttest_clinic_indep_1_l_mu)) for res1, res2 in all_tests: assert_almost_equal(res1[0], res2.statistic, decimal=13) assert_almost_equal(res1[1], res2.p_value, decimal=13) #assert_almost_equal(res1[2], res2.df, decimal=13) cm = smws.CompareMeans(smws.DescrStatsW(x1), smws.DescrStatsW(x2)) ci = cm.tconfint_diff(alternative='two-sided', usevar='unequal') assert_almost_equal(ci, ttest_clinic_indep_1_two_mu.conf_int, decimal=13) ci = cm.tconfint_diff(alternative='two-sided', usevar='pooled') assert_almost_equal(ci, ttest_clinic_indep_1_two_mu_pooled.conf_int, decimal=13) ci = cm.tconfint_diff(alternative='smaller', usevar='unequal') assert_almost_equal_inf(ci, ttest_clinic_indep_1_l.conf_int, decimal=13) ci = cm.tconfint_diff(alternative='larger', usevar='unequal') assert_almost_equal_inf(ci, ttest_clinic_indep_1_g.conf_int, decimal=13) #test get_compare cm = smws.CompareMeans(smws.DescrStatsW(x1), smws.DescrStatsW(x2)) cm1 = cm.d1.get_compare(cm.d2) cm2 = cm.d1.get_compare(x2) cm3 = cm.d1.get_compare(np.hstack((x2,x2))) #all use the same d1, no copying assert_(cm.d1 is cm1.d1) assert_(cm.d1 is cm2.d1) assert_(cm.d1 is cm3.d1)
def ttest_cat_cont_var(df, feature1, feature2): sample_0 = np.array(df.loc[df[feature1]==0, feature2].dropna()) sample_1 = np.array(df.loc[df[feature1]==1, feature2].dropna()) test_stat, pval, _ = stests.ttest_ind(sample_0, sample_1,usevar='unequal') test_stat_eq, pval_eq, _ = stests.ttest_ind(sample_0, sample_1,usevar='pooled') print("Two independent samples with equal sample size and unequal variances") print("test_statistic :{:.3f}".format(test_stat_eq)) print("p-value :{:.3f}".format(pval_eq)) print("\nTwo independent samples with unequal sample size and unequal variances") print("test_statistic :{:.3f}".format(test_stat)) print("p-value :{:.3f}".format(pval)) # Decision Making print('-------------------------------------') alpha = 0.05 if pval_eq<=alpha: print("Reject H0,There is a relationship between {} and {} variables".format(feature1,feature2)) else: print("Retain H0,The two population variables are not related to each other as the difference between means is not significant") return test_stat,pval_eq,test_stat, pval
def graph(x,y,xLabel,yLabel,title,figname): plt.clf() plt.hist(x,color="c",edgecolor="k",alpha=0.5) plt.axvline(np.array(x).mean(),color="k",linestyle="dashed",linewidth=3,label="average") plt.xlabel(xLabel) plt.ylabel(yLabel) plt.title(title) yAxis = np.arange(0,10,1) acRes = [y] z = np.array(acRes*10) plt.plot(z,yAxis,label="model accuracy") p_value = ttest_ind(x,[y])[1] plt.plot([],[],label=f"p-value: {np.round(p_value,4)}",color="w") plt.legend() plt.savefig(figname)
def test_weightstats_1(self): x1, x2 = self.x1, self.x2 w1, w2 = self.w1, self.w2 w1_ = 2. * np.ones(len(x1)) w2_ = 2. * np.ones(len(x2)) d1 = DescrStatsW(x1) # print ttest_ind(x1, x2) # print ttest_ind(x1, x2, usevar='unequal') # #print ttest_ind(x1, x2, usevar='unequal') # print stats.ttest_ind(x1, x2) # print ttest_ind(x1, x2, usevar='unequal', alternative='larger') # print ttest_ind(x1, x2, usevar='unequal', alternative='smaller') # print ttest_ind(x1, x2, usevar='unequal', weights=(w1_, w2_)) # print stats.ttest_ind(np.r_[x1, x1], np.r_[x2,x2]) assert_almost_equal(ttest_ind(x1, x2, weights=(w1_, w2_))[:2], stats.ttest_ind(np.r_[x1, x1], np.r_[x2, x2]))
def test_ttest_2sample(self): x1, x2 = self.x1, self.x2 x1r, x2r = self.x1r, self.x2r w1, w2 = self.w1, self.w2 #Note: stats.ttest_ind handles 2d/nd arguments res_sp = stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], res_sp, 14) #check correct ttest independent of user ddof cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=0)) cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) res0 = cm0.ttest_ind(usevar='unequal') res1 = cm1.ttest_ind(usevar='unequal') res2 = cm2.ttest_ind(usevar='unequal') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) #check confint independent of user ddof res0 = cm0.tconfint_diff(usevar='pooled') res1 = cm1.tconfint_diff(usevar='pooled') res2 = cm2.tconfint_diff(usevar='pooled') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) res0 = cm0.tconfint_diff(usevar='unequal') res1 = cm1.tconfint_diff(usevar='unequal') res2 = cm2.tconfint_diff(usevar='unequal') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14)
def makettest(xperiment_dir): datasets, methods, classes, classifiers = ('ngrams', 'nongrams'), ('kfolds', 'eensemble', 'ros', 'hparamt'), \ ('binary', 'multi'), ('Poly-2 Kernel', 'AdaBoost', 'GradientBoosting') all_metrics = {} for type_data in datasets: for class_type in classes: for method in methods: for clf in classifiers: try: clasifier_name = '{}_{}_{}_{}'.format(type_data, class_type, method, clf) # To Select only: accuracy, precision, recall, f-score, support classes and time if class_type == 'multi': limit_cols = 14 else: limit_cols = 10 metrics_vect = contenido_csv('{}/{}_{}_{}_{}.csv'.format(xperiment_dir, type_data, class_type, method, clf)) metrics_vect = np.array(metrics_vect)[:, :limit_cols] all_metrics[clasifier_name] = metrics_vect except IOError as e: pass else: print '{}/{}/{}_{}_{}_{}'. \ format(SITE_ROOT, xperiment_dir, type_data, class_type, method, clf) fscore_idx = {'binary': [5, 6], 'multi': [7, 8, 9]} for class_type in classes: base_clf_name = 'nongrams_{}_kfolds_Poly-2 Kernel'.format(class_type) base_clf = np.array(all_metrics[base_clf_name], dtype='f')[:, fscore_idx[class_type]] print base_clf_name '''for ith_col in range(base_clf.shape[1]): print '\tclass {} -- {}'.format(ith_col + 1, ','.join(base_clf[:, ith_col].ravel())) ''' for clasifier_name in all_metrics.keys(): if clasifier_name != base_clf_name and class_type in clasifier_name: current_clf = np.array(all_metrics[clasifier_name], dtype='f')[:, fscore_idx[class_type]] print '\n\t{}'.format(clasifier_name) for ith_col in range(current_clf.shape[1]): stats_result = ttest_ind(base_clf[:, ith_col].ravel(), current_clf[:, ith_col].ravel()) msg_result = '\tclass {} -- test statisic: {} \tpvalue of the t-test: {} ' \ '\tdegrees of freedom used in the t-test: {}'. \ format(ith_col + 1, round(stats_result[0], 4), round(stats_result[1], 4), round(stats_result[2]), 4) if stats_result[1] > 0.05: msg_result += ' P-value mayor a 0.05' print msg_result
def test_ttest_2sample(self): x1, x2 = self.x1, self.x2 x1r, x2r = self.x1r, self.x2r w1, w2 = self.w1, self.w2 #Note: stats.ttest_ind handles 2d/nd arguments res_sp = stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], res_sp, 14) #check correct ttest independent of user ddof cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) assert_almost_equal(cm.ttest_ind()[:2], res_sp, 14) cm0 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=0)) cm1 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=0), DescrStatsW(x2, weights=w2, ddof=1)) cm2 = CompareMeans(DescrStatsW(x1, weights=w1, ddof=1), DescrStatsW(x2, weights=w2, ddof=2)) res0 = cm0.ttest_ind(usevar='separate') res1 = cm1.ttest_ind(usevar='separate') res2 = cm2.ttest_ind(usevar='separate') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) #check confint independent of user ddof res0 = cm0.confint_diff(usevar='pooled') res1 = cm1.confint_diff(usevar='pooled') res2 = cm2.confint_diff(usevar='pooled') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14) res0 = cm0.confint_diff(usevar='separate') res1 = cm1.confint_diff(usevar='separate') res2 = cm2.confint_diff(usevar='separate') assert_almost_equal(res1, res0, 14) assert_almost_equal(res2, res0, 14)
def boxplot_cs(total_df, pv=False): uniques = total_df['data'].unique() n = len(uniques) fig, axes = plt.subplots(n, 1, figsize=(15, 8 * n)) for index, a in enumerate(axes.ravel()): #Getting dataset subgraph x = uniques[index] data = total_df.query('data==@x') #Getting colorpalette n_hues = len(data.type.unique()) sns.set_palette('coolwarm', n_colors=n_hues) sns.boxplot(data=data, x='model', hue='type', y='cancer_score', ax=a) sns.swarmplot(data=data, x='model', hue='type', y='cancer_score', ax=a, edgecolor='black', linewidth=0.5) a.set_title('Comparison of predicted patient cancer scores for data = {} \nusing DeepCAT re-trained model'\ ' vs PyTorch (Richie) implementation'.format(x)) if pv == True: pvals = [] top = data.cancer_score.max() a.set_ylim(data.cancer_score.min() - 0.01, top + 0.02) print('Here PVAL') y1, y2, y3 = top + 0.005, top + 0.01, top + 0.015 #Getting models and plotting n_models = data.model.unique() for i, mod in enumerate(n_models): pval = ttest_ind( data.query('model==@mod&type=="control"')['cancer_score'], data.query('model==@mod&type=="cancer"')['cancer_score']) a.plot([i - 0.2, i - 0.2, i + 0.2, i + 0.2], [y1, y2, y2, y1], lw=1.5, c='k') a.text(i, y3, "p = {pv:.3e}".format(pv=pval[1]), ha='center', va='center', color='k')
def two_means_hypothesis(values1: np.ndarray, values2: np.ndarray, pooled: bool = False, alternative: str = "two-sided") -> tuple: """Perform t test comparing two means Args: values1 (np.array): sample 1 values values2 (np.array): sample 2 values pooled (bool, optional): whether to calculate pooled std. Defaults to False. alternative (str, optional): two-sided/larger/smaller. Defaults to "two-sided". Returns: tuple: t statistic, p_value of the test """ usevar = 'pooled' if pooled else "unequal" (tstat, pval, df) = ttest_ind(values1, values2, usevar=usevar, alternative=alternative) return (tstat, pval)
def integrated_ttest(result_path, test_variables, group_variables): feature_list = test_variables.columns ttest_results = pd.DataFrame({'Feature Name': [],'t value': [], 'p value': []}) t_value_list = [] p_value_list = [] group0_var = test_variables[group_variables == 0] group1_var = test_variables[group_variables == 1] for feature_name in feature_list: t_value, p_value, df = st.ttest_ind(group0_var[feature_name], group1_var[feature_name], usevar='unequal') t_value_list.append(t_value) p_value_list.append(p_value) ttest_results['Feature Name'] = feature_list ttest_results['t value'] = t_value_list ttest_results['p value'] = p_value_list ttest_results.to_csv(path_or_buf=result_path + '/' + 'significance.csv')
def test_tost_asym(): x1, x2 = clinic[:15, 2], clinic[15:, 2] #Note: x1, x2 reversed by definition in multeq.dif assert_almost_equal(x2.mean() - x1.mean(), tost_clinic_1_asym.estimate, 13) resa = smws.ttost_ind(x2, x1, -1.5, 0.6, usevar='unequal') assert_almost_equal(resa[0], tost_clinic_1_asym.p_value, 13) #multi-endpoints, asymmetric bounds, vectorized resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') assert_almost_equal(resall[0], tost_clinic_all_no_multi.p_value, 13) #SMOKE tests: foe multi-endpoint vectorized, k on k resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], np.exp([-1.0, -1.0, -1.5, -1.5, -1.5]), 0.6, usevar='unequal', transform=np.log) resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal', transform=np.exp) resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, transform=np.log) resall = smws.ttost_paired(clinic[15:, 2:7], clinic[:15, 2:7], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, transform=np.exp) resall = smws.ttest_ind(clinic[15:, 2:7], clinic[:15, 2:7], value=[-1.0, -1.0, -1.5, -1.5, -1.5]) #k on 1: compare all with reference resall = smws.ttost_ind(clinic[15:, 2:7], clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') resa3_2 = smws.ttost_ind(clinic[15:, 3:4], clinic[:15, 2:3], [-1.0, -1.0, -1.5, -1.5, -1.5], 0.6, usevar='unequal') assert_almost_equal(resall[0][1], resa3_2[0][1], decimal=13) resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5], 0.6, usevar='unequal') resall = smws.ttost_ind(clinic[15:, 2], clinic[:15, 2], [-1.0, -0.5, -0.7, -1.5, -1.5], np.repeat(0.6,5), usevar='unequal')
def calculate_t_p_values(): res = [] for feat in AUDIO_FEATURES: set_1 = all_feat[feat] set_2 = all_feat_2[feat] ttest = ttest_ind(set_1, set_2) print(f'{feat} t-test') print(ttest) print('\n') res.append({ "audio_feature": feat, "t": ttest[0], "p": ttest[1], "degrees_of_freedom": ttest[2] }) with open(f'ttest/{pl_name}-{pl_name_2}.csv', 'w') as f: w = csv.DictWriter( f, fieldnames=["audio_feature", "t", "p", "degrees_of_freedom"]) w.writeheader() for item in res: w.writerow(item)
def t_test(data_lastDV, group1, group2): """ T-test to predict each condition --> numerical outcome variable Last column is the outcome variable (DV) http://statsmodels.sourceforge.net/devel/stats.html Note: T-tests are for 1 categorical variable with 2 levels :param data: data frame containing the independent and dependent variables :return: None """ col_names = data_lastDV.columns.values.tolist() # get the columns' names outcome = col_names.pop() # remove the last item in the list fig = plt.figure() i = 1 for cond in col_names: df = data_lastDV[[cond, outcome]].dropna() cat1 = df[df[cond] == group1][outcome] cat2 = df[df[cond] == group2][outcome] print("\n"+FORMAT_LINE) print("T-test: " + cond) print(FORMAT_LINE) print(ttest_ind(cat1, cat2)) # returns t-stat, p-value, and degrees of freedom print("(t-stat, p-value, df)") ax = fig.add_subplot(1,2, i) ax = df.boxplot(outcome, cond, ax=plt.gca()) ax.set_xlabel(cond) ax.set_ylabel(outcome) i += 1 # box plot user_input = input(">> Display boxplot of conditions? [y/n]: ") if is_yes(user_input): fig.tight_layout() plt.show()
def test_parametric_significance_test_on_raw_observations_mean(): """ Ensure parametric experiment (when working with the raw observations) is evaluated correctly when the experiment metric is a mean. """ group_1_observations = np.array([1, 2, 1, 2, 1, 2, 1, 2, 1]) group_2_observations = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) alternative_hypothesis = 'two-sided' expected_test_statistic, expected_p_value, _ = weightstats.ttest_ind( x1=group_1_observations, x2=group_2_observations, alternative=alternative_hypothesis) actual_p_value, actual_test_statistic = evaluation.parametric_significance_test_on_raw_observations( group_1_observations=group_1_observations, group_2_observations=group_2_observations, alternative_hypothesis=alternative_hypothesis, measurement_type='mean') assert actual_test_statistic == expected_test_statistic assert actual_p_value == expected_p_value
def _stat_test(data_max, data_min, test, alpha): # calculate effect size if (max(data_max) <= 1 and min(data_max) >= 0 and max(data_min) <= 1 and min(data_min) >= 0): # if analyze proportions use Cohen's h: effect_size = _cohenh(data_max, data_min) else: # for other variables use Cohen's d: effect_size = _cohend(data_max, data_min) # calculate power power = TTestIndPower().power(effect_size=effect_size, nobs1=len(data_max), ratio=len(data_min) / len(data_max), alpha=alpha, alternative='larger') if test == 'ks_2samp': p_val = ks_2samp(data_max, data_min, alternative='less')[1] elif test == 'mannwhitneyu': p_val = mannwhitneyu(data_max, data_min, alternative='greater')[1] elif test == 'ttest': p_val = ttest_ind(data_max, data_min, alternative='larger')[1] return p_val, power
dwell_time_B = pd.read_csv('Datasets/Dwell_Time/Dwell_Time_VersionB.csv') ## Visualize the data ## Calculate mean and standard deviation (sd) of dwell time on the web pages mean_A = round(dwell_time_A['dwellTime'].mean(), 2) sd_A = round(dwell_time_A['dwellTime'].std(), 2) mean_B = round(dwell_time_B['dwellTime'].mean(), 2) sd_B = round(dwell_time_B['dwellTime'].std(), 2) mean_sd_AB = pd.DataFrame({'mean': [mean_A, mean_B], 'sd': [sd_A, sd_B]}, index=['A', 'B']) print(mean_sd_AB) ## Plot the densities of dwell times A and B dwell_times = pd.DataFrame({'A': dwell_time_A['dwellTime'], 'B': dwell_time_B['dwellTime']}) dwell_times.plot(kind='kde', color=['r', 'b']) plt.show() ## For this question, we use a t-test. (tstat, t_p_value, t_df) = ttest_ind(x1=dwell_time_A, x2=dwell_time_B, alternative='two-sided') print("tstat: %f \t p_value: %e" % (tstat[1], t_p_value[1])) ## Is the dwell time different between version A and B (with significance level 0.05)? ## What is the power of this conclusion? Use the tt_ind_solve_power function to find out. ################################################################################### ## EXERCISE: ## After you finish the above analysis, an engineer in your team notifies you that ## 3430 of the records in group B with unsuccessful conversion are fake data ## automatically filled in by computer. The number of visitors for version B is thus ## 298234/2-3430, but there were still 8604 successful conversions for version B. ## Does this revelation change your conclusion from section 1? ###################################################################################
def test_pvalue(testdata): result = pvalue(testdata, control_label='A') expected_nonprop = ttest_ind(testdata['kpi1']['A'], testdata['kpi1']['B'])[1] expected_prop = ztest(testdata['kpi2']['A'], testdata['kpi2']['B'])[1] assert result['B']['kpi1'] == expected_nonprop assert result['B']['kpi2'] == expected_prop
def fn(control, test): if _is_proportion(control, test): return ztest(control, test, alternative='two-sided')[1] else: return ttest_ind(control, test, alternative='two-sided')[1]
def report_on(trial): """ Calculate headline stats for TRIAL once """ tr = TrialAnalysis.objects.get_or_create(trial=trial)[0] if trial.report_set.count() < 2: return nobs1 = int(trial.report_set.count()/2) if trial.offline: reports = trial.report_set.all() else: reports = trial.report_set.exclude(date__isnull=True) points = [t.get_value() for t in reports] pointsa = [t.get_value() for t in reports.filter(group__name=Group.GROUP_A)] pointsb = [t.get_value() for t in reports.filter(group__name=Group.GROUP_B)] sd = np.std(points) mean = np.mean(points) nobsa = len(pointsa) nobsb = len(pointsb) meana = np.mean(pointsa) meanb = np.mean(pointsb) stderrmeana = scistats.sem(pointsa) stderrmeanb = scistats.sem(pointsb) small = tt_ind_solve_power(effect_size=0.1, alpha=0.05, nobs1=nobs1, power=None) med = tt_ind_solve_power(effect_size=0.2, alpha=0.05, nobs1=nobs1, power=None) large = tt_ind_solve_power(effect_size=0.5, alpha=0.05, nobs1=nobs1, power=None) if trial.variable_set.get().style == Variable.BINARY: obs = np.array([[len([p for p in pointsa if p == True]), len([p for p in pointsa if p == False])], [len([p for p in pointsb if p == True]), len([p for p in pointsb if p == False])]]) try: chi2, pval, dof, expected = scistats.chi2_contingency(obs) except ValueError: exc = traceback.format_exc() class Message(letter.Letter): Postie = POSTIE From = '*****@*****.**' To = '*****@*****.**' Subject = 'Chi2 failure instance' Body = "Couldn't run chi2 for {0}\n\n{1}".format(str(obs), exc) try: Message.send() except: print exc pval = None else: tstat, pval, df = ttest_ind(pointsa, pointsb) tr.power_small=small tr.power_med=med tr.power_large=large tr.sd=sd tr.mean=mean tr.nobsa = nobsa tr.nobsb = nobsb tr.meana = meana tr.meanb = meanb tr.stderrmeana = stderrmeana tr.stderrmeanb = stderrmeanb tr.pval = pval tr.save() return
def showstatsresults(prefx='', charts_path='recursos/charts/'): '''elite_clfs = ('nongrams_multi_kfolds_Poly-2 Kernel', 'nongrams_binary_kfolds_Poly-2 Kernel', 'nongrams_binary_ros_AdaBoost', 'nongrams_binary_ros_GradientBoosting', 'ngrams_binary_ros_AdaBoost', 'ngrams_multi_ros_AdaBoost', 'nongrams_multi_ros_AdaBoost', 'nongrams_multi_ros_GradientBoosting') ''' elite_clfs = ('nongrams_multi_kfolds_Poly-2 Kernel', 'nongrams_binary_kfolds_Poly-2 Kernel', 'ngrams_binary_eensemble_Poly-2 Kernel', 'ngrams_binary_eensemble_GradientBoosting', 'nongrams_binary_eensemble_Poly-2 Kernel', 'ngrams_multi_eensemble_Poly-2 Kernel', 'ngrams_multi_eensemble_GradientBoosting', 'nongrams_multi_eensemble_GradientBoosting') all_metrics = getelitedata(elite_clfs, 'recursos/resultados/experiment_tfidf') metrics_candidate = {} for clf_name in elite_clfs[2:]: if 'binary' in clf_name: col_names = ['Accuracy', 'P-class 1', 'P-class 2', 'R-class 1', 'R-class 2', 'F1-class 1', 'F1-class 2'] else: col_names = ['Accuracy', 'P-class 1', 'P-class 2', 'P-class 3', 'R-class 1', 'R-class 2', 'R-class 3', 'F1-class 1', 'F1-class 2', 'F1-class 3'] metrics_candidate[clf_name] = all_metrics[clf_name][col_names] metrics_main = {} for clf_name in elite_clfs[:2]: if 'binary' in clf_name: col_names = ['Accuracy', 'P-class 1', 'P-class 2', 'R-class 1', 'R-class 2', 'F1-class 1', 'F1-class 2'] else: col_names = ['Accuracy', 'P-class 1', 'P-class 2', 'P-class 3', 'R-class 1', 'R-class 2', 'R-class 3', 'F1-class 1', 'F1-class 2', 'F1-class 3'] metrics_main[clf_name] = all_metrics[clf_name][col_names] for m_clf_name in metrics_main.keys(): clf_case = m_clf_name.split('_')[1] if 'binary' in m_clf_name: selected_col = {'Accuracy': 0, 'F1-class 1': 5, 'F1-class 2': 6} base_metric, confusion_m = 'F1-class 2', ['CM-1', 'CM-2', 'CM-3', 'CM-4'] else: selected_col = {'Accuracy': 0, 'F1-class 1': 7, 'F1-class 2': 8, 'F1-class 3': 9} base_metric, confusion_m = 'F1-class 3', ['CM-1', 'CM-2', 'CM-3', 'CM-4', 'CM-5', 'CM-6', 'CM-7', 'CM-8', 'CM-9'] m_clf_nshort = makeshortname(m_clf_name) print '\n{}'.format(m_clf_name) elit_metrics = [] '''query = metrics_main[m_clf_name][base_metric] == metrics_main[m_clf_name][base_metric].max() query_result = metrics_main[m_clf_name][query][confusion_m].values best_confussion_m = [round(val, 1) for val in np.nditer(query_result)] elit_metrics.append((m_clf_nshort, list(metrics_main[m_clf_name].mean(0).values), best_confussion_m)) ''' filtered_data = {'1_{}'.format(m_clf_nshort): metrics_main[m_clf_name][selected_col.keys()]} cont = 2 for c_clf_name in metrics_candidate.keys(): if clf_case in c_clf_name: print '\n\t{}'.format(c_clf_name) for selct_metric in selected_col.keys(): stats_result = ttest_ind(metrics_main[m_clf_name][selct_metric].values, metrics_candidate[c_clf_name][selct_metric].values) msg_result = '\t{} -- test statisic: {} \tpvalue of the t-test: {} ' \ '\tdegrees of freedom used in the t-test: {}'. \ format(selct_metric, stats_result[0], stats_result[1], stats_result[2]) if stats_result[1] > 0.05: msg_result += ' P-value mayor a 0.05' print '\t{}'.format(msg_result) '''query = metrics_candidate[c_clf_name][base_metric] == metrics_candidate[c_clf_name][base_metric].max() query_result = metrics_candidate[c_clf_name][query][confusion_m].values best_confussion_m = [round(val, 1) for val in np.nditer(query_result)] ''' filtered_data['{}_{}'.format(cont, makeshortname(c_clf_name))] = metrics_candidate[c_clf_name] '''elit_metrics.append((makeshortname(c_clf_name), list(metrics_candidate[c_clf_name].mean(0).values), best_confussion_m)) ''' cont += 1 # guardar_csv(elit_metrics, 'recursos/resultados/experiment_tfidf/elite_{}_metrics.csv'.format(clf_case)) for selct_metric in selected_col.keys(): data_labels, data_toplot = [], [] plt.subplot() for clf_name in filtered_data.keys(): data_toplot.append(filtered_data[clf_name][selct_metric]) if clf_name[2:][0] == 'O': clf_name = 'N{}'.format(clf_name[3:]) else: clf_name = 'L{}'.format(clf_name[3:]) data_labels.append(clf_name) plt.boxplot(data_toplot) if 'Accuracy' in selct_metric: selct_metric = 'Exactitud' else: selct_metric = 'F-score_Clase_{}'.format(selct_metric[-1]) plt.xticks(np.arange(0, len(data_toplot)) + 1, data_labels) [plt.savefig('{}/{}{}_{}_{}.{}'.format(charts_path, clf_case, selct_metric, clf_case, img_format)) for img_format in ('eps', 'jpg')] plt.close()