예제 #1
0
def calc_ttest(id1, id2):

    #reading sim files
    logger.info('reading '+id1+' ...')
    fr=open('../../result/wordsim/simcos/'+id1+'.txt')
    wordsim1=json.load(fr)
    words=wordsim1.keys()
    fr.close()

    logger.info('reading '+id2+' ...')
    fr=open('../../result/wordsim/simcos/'+id2+'.txt')
    wordsim2=json.load(fr)
    fr.close()
    keywordsim2={}
    for w in words:
        keywordsim2[w]={}
        for item in wordsim2[w]:
            keywordsim2[w][item[0]]=item[1]
    
    #ttest all
    logger.info('ttest all of '+id1+'_'+id2)
    fw_all=open('../../result/wordsim/ttest/all/'+id1+'_'+id2+'.txt','w')
    for i, w in enumerate(words):
        logger.debug('ttest all of '+id1+'_'+id2+' of '+w+' ('+str(i)+'/'+str(len(words))+')')
        
        values1=[x[1] for x in wordsim1[w]]
        values2=[keywordsim2[w].get(x[0], 0) for x in wordsim1[w]]

        #pdb.set_trace()
        m=sum(abs(numpy.array(values1)-numpy.array(values2)))/len(values1)
 
        ttest=ttest_rel(values1, values2)

        fw_all.write(w+',ttest'+','+str(ttest[0])+','+str(ttest[1])+','+str(m)+'\n')
    fw_all.close()
    
        
    #ttest slice
    logger.info('ttest slice of '+id1+'_'+id2)
    fw_slice=open('../../result/wordsim/ttest/slice/'+id1+'_'+id2+'.txt','w')
    steps=20
    for i, w in enumerate(words):
        logger.info('ttest slice of '+id1+'_'+id2+' of '+w)
        for step in range(0,steps):
            top=1-(step*(2.0/steps))
            bottom=1-((step+1)*(2.0/steps))
            keys_subset=[x[0] for x in wordsim1[w] if x[1] <= top and x[1] > bottom]
            
            logger.debug('ttest slice of '+id1+'_'+id2+' of '+w+' ('+str(i)+'/'+str(len(words))+') len: '+str(len(keys_subset))+' slice: '+str(bottom)+'-'+str(top))
            if len(keys_subset)>0:
                values1_subset=[x[1] for x in wordsim1[w] if x[1] <= top and x[1] > bottom]
                values2_subset=[keywordsim2[w].get(x, 0) for x in keys_subset]
		m=sum(abs(numpy.array(values1_subset)-numpy.array(values2_subset)))/len(values1_subset)
                ttest=ttest_rel(values1_subset, values2_subset)

                fw_slice.write(w+','+str(top)+','+str(bottom)+','+str(len(keys_subset))+',ttest,'+str(ttest[0])+','+str(ttest[1])+','+str(m)+'\n')
            else:
                fw_slice.write(w+','+str(top)+','+str(bottom)+','+str(len(keys_subset))+'-'+'\n')
예제 #2
0
def loocvFileCompare(csvpath, kvals):
	"calls loocvFile() for a FFT and a CH then decides if the AUC difference is significant"
	aucs = [loocvFile(csvpath % ('fft'), kvals),
		loocvFile(csvpath % ('ch' ), kvals)]
	(t, prob) = stats.ttest_rel(aucs[0], aucs[1])
	print "---------------------------"
	print "Paired T-test: t=%g, p=%g" % (t, prob)
예제 #3
0
def _ttest(orig_score, rep_score, rpd=True, pbar=False):
    """

    @param orig_score: The original scores.
    @param rep_score: The reproduced/replicated scores.
    @param rpd: Boolean indicating if the evaluated runs are reproduced.
    @param pbar: Boolean value indicating if progress bar should be printed.
    @return: Generator with p-values.
    """
    if rpd:  # paired two-tailed t-test
        topic_scores_orig = topic_scores(orig_score)
        topic_scores_rep = topic_scores(rep_score)

        generator = tqdm(
            topic_scores_orig.items()) if pbar else topic_scores_orig.items()

        for measure, scores in generator:
            yield measure, ttest_rel(scores,
                                     topic_scores_rep.get(measure)).pvalue

    else:  # else unpaired two-tailed t-test
        topic_scores_orig = topic_scores(orig_score)
        topic_scores_rep = topic_scores(rep_score)

        generator = tqdm(
            topic_scores_orig.items()) if pbar else topic_scores_orig.items()

        for measure, scores in generator:
            yield measure, ttest_ind(scores,
                                     topic_scores_rep.get(measure)).pvalue
예제 #4
0
def main():
    judgment_path = sys.argv[1]
    metric = sys.argv[2]
    ranked_list_path1 = sys.argv[3]
    ranked_list_path2 = sys.argv[4]
    # print
    qrels = load_qrels_flat(judgment_path)

    ranked_list_1: Dict[str, List[TrecRankedListEntry]] = load_ranked_list_grouped(ranked_list_path1)
    ranked_list_2: Dict[str, List[TrecRankedListEntry]] = load_ranked_list_grouped(ranked_list_path2)

    metric_fn = get_metric_fn(metric)

    score_d1 = get_score_per_query(qrels, metric_fn, ranked_list_1)
    score_d2 = get_score_per_query(qrels, metric_fn, ranked_list_2)

    pairs = []
    for key in score_d1:
        try:
            e = (score_d1[key], score_d2[key])
            pairs.append(e)
        except KeyError as e:
            pass

    if len(pairs) < len(score_d1) or len(pairs) < len(score_d2):
        print("{} matched from {} and {} scores".format(len(pairs), len(score_d1), len(score_d2)))

    l1, l2 = zip(*pairs)
    d, p_value = stats.ttest_rel(l1, l2)
    print("baseline:", average(l1))
    print("treatment:", average(l2))
    print(d, p_value)
예제 #5
0
def processFoldResult(baselineResult, result):
    baselineScore = baselineResult['auc-mean']
    baselineFold = baselineResult['auc-fold']
    foldAuc = baselineFold - result['auc-fold']
    tstat = stats.ttest_rel(baselineFold, result['auc-fold'])
    return (result['auc-mean'], result['auc-fold'].mean(),
            result['auc-fold'].std(), baselineScore - result['auc-mean'],
            foldAuc.mean(), foldAuc.std(), tstat[1])
예제 #6
0
def check_hypothesis(all_voca, cdf_cont, cdf_ncont, clueweb_cdf, clueweb_ctf,
                     clueweb_df, clueweb_tf, ctf_cont, ctf_ncont, df_cont,
                     df_ncont, tf_cont, tf_ncont, unigrams):
    hypo1 = []
    hypo1_1 = []
    hypo2_1 = []
    hypo2_2 = []
    not_observed_in_relevant_docs = set()
    for t in unigrams:
        if t not in all_voca:
            not_observed_in_relevant_docs.add(t)
            continue

        # Hypothesis 1 : P(t|controversy,R) > P(t| !controversy,R)
        # Hypothesis 2 : P(t|R) > P(t|BG)

        p1 = tf_cont[t] / ctf_cont
        p2 = tf_ncont[t] / ctf_ncont
        hypo1.append((t, (p1, p2)))
        p1 = df_cont[t] / cdf_cont
        p2 = df_ncont[t] / cdf_ncont
        hypo1_1.append((t, (p1, p2)))

        p1 = (tf_cont[t] + tf_ncont[t]) / (ctf_cont + ctf_ncont)
        if t not in clueweb_df:
            print("warning {} not in clueweb voca".format(t))
            continue

        p2 = clueweb_tf[t] / clueweb_ctf
        hypo2_1.append((t, (p1, p2)))

        p1 = (df_cont[t] + df_ncont[t]) / (cdf_cont + cdf_ncont)
        p2 = clueweb_df[t] / clueweb_cdf
        hypo2_2.append((t, (p1, p2)))
    todo = [
        (hypo1, "Hypothesis 1 : P(t|controversy,R) > P(t| !controversy,R)"),
        (hypo1_1,
         "Hypothesis 1 : P(t|controversy,R) > P(t| !controversy,R) by binary model"
         ),
        (hypo2_1, "Hypothesis 2 : P(t|R) > P(t|BG)"),
        (hypo2_2, "Hypothesis 2 : P(t|R) > P(t|BG) by binary model"),
    ]

    print("not_observed_in_relevant_docs : {} ".format(
        not_observed_in_relevant_docs))
    for hypo, desc in todo:
        print(desc)
        terms, pairs = zip(*hypo)
        p1_list, p2_list = zip(*pairs)
        diff, p = stats.ttest_rel(p1_list, p2_list)
        print(diff, p)
        for term, pair in hypo:
            p1, p2 = pair
            print(
                term, "tf_cont:{} tf_ncont:{} df_cont:{}".format(
                    tf_cont[term], tf_ncont[term], df_cont[term]),
                "{0:.4f} {1:.4f}".format(p1, p2))
예제 #7
0
def roi_pair_ttest():
    """
    compare rsfc difference between ROIs
    scheme: hemi-separately network-wise
    """
    import numpy as np
    import pickle as pkl
    import pandas as pd
    from scipy.stats.stats import ttest_rel
    from cxy_hcp_ffa.lib.predefine import net2label_cole
    from commontool.stats import EffectSize

    # inputs
    hemis = ('lh', 'rh')
    roi_pair = ('pFus-face', 'mFus-face')
    data_file = pjoin(work_dir, 'rsfc_individual2Cole_{}.pkl')
    compare_name = f"{roi_pair[0].split('-')[0]}_vs_" \
                   f"{roi_pair[1].split('-')[0]}"

    # outputs
    out_file = pjoin(work_dir,
                     f"rsfc_individual2Cole_{compare_name}_ttest_paired.csv")

    # start
    trg_names = list(net2label_cole.keys())
    trg_labels = list(net2label_cole.values())
    out_data = {'network': trg_names}
    es = EffectSize()
    for hemi in hemis:
        data = pkl.load(open(data_file.format(hemi), 'rb'))
        assert data['trg_label'] == trg_labels

        out_data[f'CohenD_{hemi}'] = []
        out_data[f't_{hemi}'] = []
        out_data[f'P_{hemi}'] = []
        for trg_idx, trg_name in enumerate(trg_names):
            sample1 = data[roi_pair[0]][:, trg_idx]
            sample2 = data[roi_pair[1]][:, trg_idx]
            nan_vec1 = np.isnan(sample1)
            nan_vec2 = np.isnan(sample2)
            nan_vec = np.logical_or(nan_vec1, nan_vec2)
            print(f'#NAN in sample1 or sample2:', np.sum(nan_vec))
            sample1 = sample1[~nan_vec]
            sample2 = sample2[~nan_vec]
            d = es.cohen_d(sample1, sample2)
            t, p = ttest_rel(sample1, sample2)
            out_data[f'CohenD_{hemi}'].append(d)
            out_data[f't_{hemi}'].append(t)
            out_data[f'P_{hemi}'].append(p)

    # save out
    out_data = pd.DataFrame(out_data)
    out_data.to_csv(out_file, index=False)
예제 #8
0
def main():
    score_path1 = sys.argv[1]
    score_path2 = sys.argv[2]
    # print
    l1 = get_score_per_query(score_path1)
    l2 = get_score_per_query(score_path2)

    assert len(l1) == len(l2)

    d, p_value = stats.ttest_rel(l1, l2)
    print("baseline:", average(l1))
    print("treatment:", average(l2))
    print(d, p_value)
예제 #9
0
def t_test(arr):
    """ Moving window t-test function.
        to get two columns in to the function, one must be set as the index.

        Example:
            input_table = input_table.set_index(treatment_column, drop=False)

            input_table['p_value'] = input_table['controls_mean'].rolling(
                window=size, center=True).apply(t_test, raw=False)
    """

    tstat, pvalue = stats.ttest_rel(arr.index, arr)
    return pvalue
예제 #10
0
    def highlight_min(s):
        best_measure_idx = s[:-1].idxmax()

        best_mask = [
            stats.ttest_rel(
                experiments[s.name][best_measure_idx]
                ['best_model_test_ll'].flatten(), experiments[s.name][name]
                ['best_model_test_ll'].flatten()).pvalue > 0.05
            if name in experiments[s.name] else False
            for name, val in s.iteritems()
        ]
        best_mask = np.logical_or(best_mask, s.keys() == best_measure_idx)

        return ['font-weight: bold' if val else '' for val in best_mask]
예제 #11
0
def ttest(victim_run, allTheOther_runs, qrels, metric):
    """
    Computes ttest between victim_run and all runs contained in allTheOther_runs
    using relevance judgements contained in qrels to compute the specified metric.
    Returns a dictionary d[otherRunName] = p-value.
    The ttest used is a two-tail student ttest on 2 related samples.
    """
    victimAvg, victimDetails = evaluate(victim_run, qrels, metric, True)
    # to read the scores always in the same order
    keyList = list(victimDetails.keys())
    victimScores = [victimDetails[k] for k in keyList]
    result = {}
    for othertrun in allTheOther_runs:
        otherAvg, otherDetails = evaluate(othertrun, qrels, metric, True)
        otherScores = [otherDetails[k] for k in keyList]
        _, p = stats.ttest_rel(victimScores, otherScores)
        result[othertrun.name] = p
    return result
예제 #12
0
파일: utils.py 프로젝트: XI-lab/pytrec_eval
def ttest(victim_run, allTheOther_runs, qrels, metric):
    """
    Computes ttest between victim_run and all runs contained in allTheOther_runs
    using relevance judgements contained in qrels to compute the specified metric.
    Returns a dictionary d[otherRunName] = p-value.
    The ttest used is a two-tail student ttest on 2 related samples.
    """
    victimAvg, victimDetails = evaluate(victim_run, qrels, metric, True)
    # to read the scores always in the same order
    keyList = list(victimDetails.keys())
    victimScores = [ victimDetails[k] for k in keyList ]
    result = {}
    for othertrun in allTheOther_runs:
        otherAvg, otherDetails = evaluate(othertrun, qrels, metric, True)
        otherScores = [otherDetails[k] for k in keyList]
        _, p = stats.ttest_rel(victimScores, otherScores)
        result[othertrun.name] = p
    return result
예제 #13
0
def export_to_latext(experiments, df, col_names, row_names, file):
    with open(file, 'w') as file:
        file.write('\\begin{tabular}{l%s}\n' % "".join(
            (['r'] * len(df.columns))))
        file.write('\\toprule\n')
        file.write('{}')
        for col in df.columns.values:
            file.write(' & \makecell{%s}' % col_names[col])
        file.write('\\\\\n')

        file.write('%s ' % df.index.name.replace('_', ' '))
        file.write("".join(['&    '] * len(df.columns)))
        file.write("\\\\\n")

        for i in range(len(df)):
            row = df.iloc[i]

            best_measure_idx = row[:-1].idxmax()
            best_mask = [
                stats.ttest_rel(
                    experiments[row.name][best_measure_idx]
                    ['best_model_test_ll'].flatten(), experiments[row.name]
                    [name]['best_model_test_ll'].flatten()).pvalue > 0.05
                if name in experiments[row.name] else False
                for name, val in row.iteritems()
            ]
            best_mask = np.logical_or(best_mask,
                                      row.keys() == best_measure_idx)

            file.write('\makecell[l]{%s}' % row_names[row.name])
            for col_i, col in enumerate(df.columns.values):
                if np.isnan(row[col]):
                    val = '&  '
                else:
                    val = '%.3f ' % row[col]
                    if best_mask[col_i]:
                        val = "\\textbf{%s}" % val
                    val = "& %s" % val

                file.write(val)
            file.write('\\\\\n')

        file.write('\\bottomrule\n')
        file.write('\\end{tabular}\n')
예제 #14
0
def compare_gdist():
    import numpy as np
    import pandas as pd
    from scipy.stats.stats import ttest_rel

    items = ('pFus-mFus', )
    data_file = pjoin(work_dir, 'gdist_peak.csv')

    df = pd.read_csv(data_file)
    for item in items:
        col1 = 'lh_' + item
        col2 = 'rh_' + item
        data1 = np.array(df[col1])
        data2 = np.array(df[col2])
        nan_vec1 = np.isnan(data1)
        nan_vec2 = np.isnan(data2)
        not_nan_vec = ~np.logical_or(nan_vec1, nan_vec2)
        data1 = data1[not_nan_vec]
        data2 = data2[not_nan_vec]
        print(f'#{item}: {len(data1)}')
        print(f'{col1} vs {col2}:', ttest_rel(data1, data2))
예제 #15
0
def main(prefix1, prefix2):
    topic = "abortion"
    tfrecord_path = "./data/ukp_tfrecord/dev_" + topic
    tfrecord = list(load_tfrecord(tfrecord_path))

    get_correctness_arr_fn = partial(get_correctness_arr, tfrecord)

    prediction_list_1 = list(get_existing_predictions(prefix1, topic))
    prediction_list_2 = list(get_existing_predictions(prefix2, topic))

    num_runs = min(len(prediction_list_1), len(prediction_list_2))
    prediction_list_1 = prediction_list_1[:num_runs]
    prediction_list_2 = prediction_list_2[:num_runs]

    c1 = flatten(lmap(get_correctness_arr_fn, prediction_list_1))
    c2 = flatten(lmap(get_correctness_arr_fn, prediction_list_2))

    print(len(c1))
    print(len(c2))

    _, p_value = stats.ttest_rel(c1, c2)
    print(p_value)
예제 #16
0
def student_t_test_rel(approaches, accuracy_values, save_path):
    # calculate the two sided paired students t-test from scipy
    # it compare all approaches with each other
    # returns nan for same dataset necause standard deviation of the differences between all pairs stands in divider
    student_t_test_rel_frame = pd.DataFrame()
    for i in range(len(approaches)):
        for j in range(i, len(approaches), 1):
            # iterate through approaches
            approach_i = approaches[i]
            approach_j = approaches[j]
            values_i = accuracy_values.loc[:, approach_i]
            values_j = accuracy_values.loc[:, approach_j]
            t_statistic, two_tailed_p_test = stats.ttest_rel(values_i, values_j)
            student_t_test_rel_frame.at[approach_i, approach_j] = two_tailed_p_test

        save_path.mkdir(parents=True, exist_ok=True)
        fig = plt.figure(figsize=(4, 2))
        ax = fig.subplots()
        ax = sns.heatmap(student_t_test_rel_frame, ax=ax, annot=True, fmt="0.3f", cmap="autumn", vmin=0, vmax=0.05)
        plt.xticks(rotation=45)
        fig.canvas.start_event_loop(sys.float_info.min)
        path = save_path / 'students-test_scipy_rel.png'
        fig.savefig(path, bbox_inches='tight', dpi=100)
        plt.close(fig)
예제 #17
0
from scipy.stats import stats
import math

#equations from
#https://www.statisticshowto.datasciencecentral.com/probability-and-statistics/t-test/

path = 'ResamplesT.txt'
data = pd.read_csv(path, sep="\t")
data_top = data.head()
print(data_top)

hive = data['HIVE-COTE']
boss = data['BOSS']

#T-test paired
results = stats.ttest_rel(hive, boss)
print(results)

###"Manual" step-wise procedure

#Getting squared and summed differences
#sumsquaresΣD
Σ2 = sum(((hive) - (boss))**2)
Σ = sum(hive - boss)
Σ2_2 = Σ**2
#Number of samples
n = len(hive)

print("sumDiff")
print(Σ)
print("sumSquared")
        elif sit == 1:
            coordinate_x_phase2.append(x_pos)
            coordinate_y_phase2.append(y_pos)
        elif sit == 2:
            coordinate_x_phase3.append(x_pos)
            coordinate_y_phase3.append(y_pos)
    #arena data input
    data2 = open("%d.cfg" % (number), 'rU')
    diameter, center_x, center_y, radius, lstripex, lstripey, rstripex, rstripey = [
        int(l.split('=')[1]) for l in data2 if len(l.split('=')) > 1
    ]

    #------------------Calculate over all fixation index-----------------
    fixation_index_phase1.append(
        distribution_method(coordinate_x_phase1, coordinate_y_phase1, center_x,
                            center_y))
    fixation_index_phase2.append(
        distribution_method(coordinate_x_phase2, coordinate_y_phase2, center_x,
                            center_y))
    fixation_index_phase3.append(
        distribution_method(coordinate_x_phase3, coordinate_y_phase3, center_x,
                            center_y))

#------------------Student T test-----------------
t_static1, p_value1 = stats.ttest_rel(fixation_index_phase1,
                                      fixation_index_phase2)
t_static2, p_value2 = stats.ttest_rel(fixation_index_phase1,
                                      fixation_index_phase3)
print "p-value of phase 1 & 2 is %s" % p_value1
print "p-value of phase 1 & 3 is %s" % p_value2