Example #1
0
    def test_result_attributes(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes, ma=True)
Example #2
0
    def check_significance(gt_dist, seg_dist):

        normal_gt = normaltest(gt_dist)[1]
        normal_seg = normaltest(seg_dist)[1]

        # if both distributions are parametric use t-test, else use mann-whitney-u
        if normal_gt > 0.05 and normal_seg > 0.05:

            pvalue = ttest_ind(gt_dist, seg_dist)[1]

        else:
            pvalue = mannwhitneyu(gt_dist, seg_dist)[1]


        if pvalue > 0.05:
            return 0

        if 0.01 < pvalue < 0.05:
            return 1

        if 0.001 < pvalue < 0.01:
            return 2

        if pvalue < 0.001:
            return 3

        return pvalue
Example #3
0
    def test_result_attributes(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        attributes = ('statistic', 'pvalue')
        check_named_results(res, attributes, ma=True)
Example #4
0
    def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        # 1-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        assert_allclose(res1, res2)

        # 2-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)

        # Check default is axis=0
        res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3)
Example #5
0
    def test_vs_nonmasked(self):
        np.random.seed(1234567)
        outcome = np.random.randn(20, 4) + [0, 0, 1, 2]

        # 1-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1])
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1])
        assert_allclose(res1, res2)

        # 2-D inputs
        res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None)
        assert_allclose(res1, res2)
        res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0)
        assert_allclose(res1, res2)

        # Check default is axis=0
        res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:])
        assert_allclose(res2, res3)
Example #6
0
 def test_empty(self):
     res1 = mstats.ttest_ind([], [])
     assert_(np.all(np.isnan(res1)))
Example #7
0
 def test_empty(self):
     res1 = mstats.ttest_ind([], [])
     assert_(np.all(np.isnan(res1)))
Example #8
0
            tissueMat = np.loadtxt(ablnFile).reshape(80, 80)
            patchMat = tissueMat[patchPos:patchEnd, patchPos:patchEnd]
            patchConcentration = np.sum(patchMat) / (patchWidth * patchWidth)
            tissueConcentration = np.sum(tissueMat) / (80 * 80)
            patchConces[i] = patchConcentration
            tissueConces[i] = tissueConcentration

        with open(testLogFile, 'w') as testLog:
            testLog.write('patch ablated/area,\ttissue ablated/area\n')
            theString = '{},\t{}\n'
            for i in range(len(patchConces)):
                testLog.write(theString.format(patchConces[i], tissueConces[i]))

        # Finding the test statistics
        with open(testStatFile, 'w') as statLog:
            tval, pval = ttest_ind(patchConces, tissueConces)
            statLog.write('Testing Ha: tissue ablation concentration < Patch concentration')
            statLog.write('Raw tval: {}\tRaw pval: {}\n'.format(tval, pval))
            print('Raw tval: {}\tRaw pval: {}\n'.format(tval, pval))
            actualPval = pval / 2
            print('Ha: tissue ablation concentration < Patch concentration tval:{}\tpval:{}'.format(tval, actualPval))
            statLog.write('Raw_tval: {}\t1-tail_pval:{}'.format(tval, actualPval))
        print('\tTesting & logging complete')

    if boundaryTest:
        # Test boundary area connection importance
        print('\nTesting if boundary connection matters')
        ## First create 10 tissues never seen before
        testDir = os.path.join(tempDir, 'BoundaryConnTest')
        if(not os.path.exists(testDir)):
            run(['mkdir', testDir])
Example #9
0
def groupmeans(data, groups, numbers, cutoff=0.01, quantile=0.95, minsize=None):
    """
    Yields the significant differences in average between every pair of
    groups and numbers.

    Parameters
    ----------
    data : blaze data object
    groups : non-empty iterable containing category column names in data
    numbers : non-empty iterable containing numeric column names in data
    cutoff : ignore anything with prob > cutoff.
        cutoff=None ignores significance checks, speeding it up a LOT.
    quantile : number that represents target improvement. Defaults to .95.
        The ``diff`` returned is the % impact of everyone moving to the 95th
        percentile
    minsize : each group should contain at least minsize values.
        If minsize=None, automatically set the minimum size to
        1% of the dataset, or 10, whichever is larger.
    """

    if minsize is None:
        minsize = max(data.nrows / 100, 10)

    means = {col: data[col].mean() for col in numbers}
    results = []

    for group in groups:
        agg = {number: bz.mean(data[number]) for number in numbers}
        agg["#"] = bz.count(data)
        ave = bz.by(data[group], **agg).sort("#", ascending=False)
        ave = bz.into(pd.DataFrame, ave)
        ave.index = ave[group]
        sizes = ave["#"]

        # Each group should contain at least minsize values
        biggies = sizes[sizes >= minsize].index
        # ... and at least 2 groups overall, to compare.
        if len(biggies) < 2:
            continue
        for number in numbers:
            if number == group:
                continue
            sorted_cats = ave[number][biggies].dropna().sort_values()
            if len(sorted_cats) < 2:
                continue
            lo = bz.into(list, data[number][data[group] == sorted_cats.index[0]])
            hi = bz.into(list, data[number][data[group] == sorted_cats.index[-1]])
            _, prob = ttest_ind(np.ma.masked_array(lo, np.isnan(lo)), np.ma.masked_array(hi, np.isnan(hi)))
            if prob > cutoff:
                continue
            results.append(
                {
                    "group": group,
                    "number": number,
                    "prob": prob,
                    "gain": (sorted_cats.iloc[-1] / means[number] - 1)[0],
                    "biggies": ave.ix[biggies][number],
                    "means": ave[[number, "#"]].sort_values(by=number),
                }
            )

    results = pd.DataFrame(results)
    if len(results) > 0:
        results = results.set_index(["group", "number"])
    return results
Example #10
0
                 p=1,
                 y_dist=0.02,
                 distance=0.1)

sb.boxplot(data=all_data, color="skyblue")  #.set(ylabel="Dice coefficient")

plt.ylabel("Intersection over Union", size=18)
plt.yticks(fontsize=12)
plt.xticks(fontsize=12)

print(normaltest(all_data["Gaussian"])[1])
print(normaltest(all_data["Hessian"])[1])
print(normaltest(all_data["Laplacian"])[1])
print(normaltest(all_data["Ilastik"])[1])
print(normaltest(all_data["MitoSegNet"])[1])

print("\n")
"""
print(mannwhitneyu(all_data["Gaussian"], all_data["MitoSegNet"])[1])
print(mannwhitneyu(all_data["Hessian"], all_data["MitoSegNet"])[1])
print(mannwhitneyu(all_data["Laplacian"], all_data["MitoSegNet"])[1])
print(mannwhitneyu(all_data["Ilastik"], all_data["MitoSegNet"])[1])
"""

print(ttest_ind(all_data["Gaussian"], all_data["MitoSegNet"])[1])
print(ttest_ind(all_data["Hessian"], all_data["MitoSegNet"])[1])
print(ttest_ind(all_data["Laplacian"], all_data["MitoSegNet"])[1])
print(ttest_ind(all_data["Ilastik"], all_data["MitoSegNet"])[1])

plt.show()
Example #11
0
def groupmeans(data, groups, numbers, cutoff=.01, quantile=.95, minsize=None,
               weight=None):
    '''
    Yields the significant differences in average between every pair of
    groups and numbers.

    :arg DataFrame data: pandas.DataFrame to analyze
    :arg list groups: category column names to group data by
    :arg list numbers: numeric column names in to summarize data by
    :arg float cutoff: ignore anything with prob > cutoff.
        cutoff=None ignores significance checks, speeding it up a LOT.
    :arg float quantile: number that represents target improvement. Defaults to .95.
        The ``diff`` returned is the % impact of everyone moving to the 95th
        percentile
    :arg int minsize: each group should contain at least minsize values.
        If minsize=None, automatically set the minimum size to
        1% of the dataset, or 10, whichever is larger.
    '''
    from scipy.stats.mstats import ttest_ind
    if minsize is None:
        minsize = max(len(data.index) // 100, 10)

    if weight is None:
        means = data[numbers].mean()
    else:
        means = weighted_avg(data, numbers, weight)
    results = []
    for group in groups:
        grouped = data.groupby(group, sort=False)
        if weight is None:
            ave = grouped[numbers].mean()
        else:
            ave = grouped.apply(lambda v: weighted_avg(v, numbers, weight))
        ave['#'] = sizes = grouped.size()
        # Each group should contain at least minsize values
        biggies = sizes[sizes >= minsize].index
        # ... and at least 2 groups overall, to compare.
        if len(biggies) < 2:
            continue
        for number in numbers:
            if number == group:
                continue
            sorted_cats = ave[number][biggies].dropna().sort_values()
            if len(sorted_cats) < 2:
                continue
            lo = data[number][grouped.groups[sorted_cats.index[0]]].values
            hi = data[number][grouped.groups[sorted_cats.index[-1]]].values
            _, prob = ttest_ind(
                pd.np.ma.masked_array(lo, pd.np.isnan(lo)),
                pd.np.ma.masked_array(hi, pd.np.isnan(hi))
            )
            if prob > cutoff:
                continue
            results.append({
                'group': group,
                'number': number,
                'prob': prob,
                'gain': sorted_cats.iloc[-1] / means[number] - 1,
                'biggies': ave.loc[biggies][number].to_dict(),
                'means': ave[[number, '#']].sort_values(number).to_dict(),
            })

    results = pd.DataFrame(results)
    if len(results) > 0:
        results = results.set_index(['group', 'number'])
    return results.reset_index()  # Flatten multi-index.
Example #12
0
plt.legend(loc="best")

plt.show()



# %%
df['rs1'][df['rs1'].isin([2])] = 1
df['rs2'][df['rs2'].isin([2])] = 1

# %%
df1 = df[df['bloodsugar'].isin([0])]
df2 = df[df['bloodsugar'].isin([1])]
from scipy.stats.mstats import ttest_ind
for str in ['GDM','rs1','rs2']:
    stat, p = ttest_ind(df1[str], df2[str])
    print(str,'   ',p)


df['bloodsugar'].value_counts()

# %%
df['combine'] = 10
'''
change='GDM'
control='rs'
flag=1

if flag==0:
    df = df[df[control].isin([0])]
else:
Example #13
0
def groupmeans(data, groups, numbers,
               cutoff=.01,
               quantile=.95,
               min_size=None):
    '''
    Yields the significant differences in average between every pair of
    groups and numbers.

    Parameters
    ----------
    data : blaze data object
    groups : non-empty iterable containing category column names in data
    numbers : non-empty iterable containing numeric column names in data
    cutoff : ignore anything with prob > cutoff.
        cutoff=None ignores significance checks, speeding it up a LOT.
    quantile : number that represents target improvement. Defaults to .95.
        The ``diff`` returned is the % impact of everyone moving to the 95th
        percentile
    min_size : each group should contain at least min_size values.
        If min_size=None, automatically set the minimum size to
        1% of the dataset, or 10, whichever is larger.
    '''

    if min_size is None:
        # compute nrows, bz.compute(data.nrows) doesn't work for sqlite
        min_size = max(bz.into(int, data.nrows) / 100, 10)

    # compute mean of each number column
    means = {col: bz.into(float, data[col].mean()) for col in numbers}
    # pre-create aggregation expressions (mean, count)
    agg = {number: bz.mean(data[number]) for number in numbers}
    for group in groups:
        agg['#'] = data[group].count()
        ave = bz.by(data[group], **agg).sort('#', ascending=False)
        ave = bz.into(pd.DataFrame, ave)
        ave.index = ave[group]
        sizes = ave['#']
        # Each group should contain at least min_size values
        biggies = sizes[sizes >= min_size].index
        # ... and at least 2 groups overall, to compare.
        if len(biggies) < 2:
            continue
        for number in numbers:
            if number == group:
                continue
            sorted_cats = ave[number][biggies].dropna().sort_values()
            if len(sorted_cats) < 2:
                continue
            sohi = sorted_cats.index[-1]
            solo = sorted_cats.index[0]

            # If sorted_cats.index items are of numpy type, then
            # convert them to native type, skip conversion for unicode, str
            # See https://github.com/blaze/blaze/issues/1461
            if isinstance(solo, np.generic):
                solo, sohi = solo.item(), sohi.item()

            lo = bz.into(list, data[number][data[group] == solo])
            hi = bz.into(list, data[number][data[group] == sohi])
            _, prob = ttest_ind(
                np.ma.masked_array(lo, np.isnan(lo)),
                np.ma.masked_array(hi, np.isnan(hi))
            )
            # All results will be returned by default
            # Up to the user to ignore or show insignificant results
            # Uncomment below two lines to return only significant results
            # if prob > cutoff:
            #    continue

            yield ({
                'group': group,
                'number': number,
                'prob': float(prob),
                'gain': sorted_cats.iloc[-1] / means[number] - 1,
                'biggies': ave.ix[biggies][number].to_dict(),
                'means': ave[[number, '#']].sort_values(by=number).reset_index().to_dict(
                    orient='records'),
            })
Example #14
0
def groupmeans(data, groups, numbers,
               cutoff=.01,
               quantile=.95,
               min_size=None):
    '''
    Yields the significant differences in average between every pair of
    groups and numbers.

    Parameters
    ----------
    data : blaze data object
    groups : non-empty iterable containing category column names in data
    numbers : non-empty iterable containing numeric column names in data
    cutoff : ignore anything with prob > cutoff.
        cutoff=None ignores significance checks, speeding it up a LOT.
    quantile : number that represents target improvement. Defaults to .95.
        The ``diff`` returned is the % impact of everyone moving to the 95th
        percentile
    min_size : each group should contain at least min_size values.
        If min_size=None, automatically set the minimum size to
        1% of the dataset, or 10, whichever is larger.
    '''

    if min_size is None:
        # compute nrows, bz.compute(data.nrows) doesn't work for sqlite
        min_size = max(bz.into(int, data.nrows) / 100, 10)

    # compute mean of each number column
    means = {col: bz.into(float, data[col].mean()) for col in numbers}
    # pre-create aggregation expressions (mean, count)
    agg = {number: bz.mean(data[number]) for number in numbers}
    for group in groups:
        agg['#'] = data[group].count()
        ave = bz.by(data[group], **agg).sort('#', ascending=False)
        ave = bz.into(pd.DataFrame, ave)
        ave.index = ave[group]
        sizes = ave['#']
        # Each group should contain at least min_size values
        biggies = sizes[sizes >= min_size].index
        # ... and at least 2 groups overall, to compare.
        if len(biggies) < 2:
            continue
        for number in numbers:
            if number == group:
                continue
            sorted_cats = ave[number][biggies].dropna().sort_values()
            if len(sorted_cats) < 2:
                continue
            sohi = sorted_cats.index[-1]
            solo = sorted_cats.index[0]

            # If sorted_cats.index items are of numpy type, then
            # convert them to native type, skip conversion for unicode, str
            # See https://github.com/blaze/blaze/issues/1461
            if isinstance(solo, np.generic):
                solo, sohi = solo.item(), sohi.item()

            lo = bz.into(list, data[number][data[group] == solo])
            hi = bz.into(list, data[number][data[group] == sohi])

            _, prob = ttest_ind(
                np.ma.masked_array(lo, np.isnan(lo)),
                np.ma.masked_array(hi, np.isnan(hi))
            )
            if prob > cutoff:
                continue

            yield ({
                'group': group,
                'number': number,
                'prob': float(prob),
                'gain': sorted_cats.iloc[-1] / means[number] - 1,
                'biggies': ave.ix[biggies][number].to_dict(),
                'means': ave[[number, '#']].sort_values(by=number).reset_index().to_dict(
                    orient='records'),
            })
Example #15
0
def ttests(path_to_dict, name_dict='\\TOTresults'):
    import numpy as np
    import fnmatch
    from dictmanager import load_obj

    #NOTES TO UNDERSTAND NOTATIONS
    #d = list of distances from reference point
    #fl = list of FL
    #PAs = list of PA sup
    #PAi = list of PA inf
    #mt = list of MT
    #_s : simple images
    #_p : panoramic images
    #_m : manual
    #_a : automated
    #_filtered: matched fascicles only


    participants=['01_Kevin', '02_rafaelopes', '03_charlesbarrand', '04_guilhem',\
        '05_leandre', '06_thomasmartine', '10_victor',\
        '11_youssouf', '12_sufyan', '16_julien', '34_nicolas']

    '************************************************************************'
    '*****************************INITIALIZATION*****************************'

    d_s_m = [[] for par in range(len(participants))]
    mt_s_m = [[] for par in range(len(participants))]
    d_s_m_filtered = [[] for par in range(len(participants))]
    fl_s_m_filtered = [[] for par in range(len(participants))]
    PAs_s_m_filtered = [[] for par in range(len(participants))]
    PAi_s_m_filtered = [[] for par in range(len(participants))]

    d_s_a = [[] for par in range(len(participants))]
    mt_s_a = [[] for par in range(len(participants))]
    d_s_a_filtered = [[] for par in range(len(participants))]
    fl_s_a_filtered = [[] for par in range(len(participants))]
    PAs_s_a_filtered = [[] for par in range(len(participants))]
    PAi_s_a_filtered = [[] for par in range(len(participants))]

    d_p_m = [[] for par in range(len(participants))]
    mt_p_m = [[] for par in range(len(participants))]
    d_p_m_filtered = [[] for par in range(len(participants))]
    fl_p_m_filtered = [[] for par in range(len(participants))]
    PAs_p_m_filtered = [[] for par in range(len(participants))]
    PAi_p_m_filtered = [[] for par in range(len(participants))]

    d_p_a = [[] for par in range(len(participants))]
    mt_p_a = [[] for par in range(len(participants))]
    d_p_a_filtered = [[] for par in range(len(participants))]
    fl_p_a_filtered = [[] for par in range(len(participants))]
    PAs_p_a_filtered = [[] for par in range(len(participants))]
    PAi_p_a_filtered = [[] for par in range(len(participants))]

    #stats on the number of fascicles detected
    nb_fasc_tot_s = 0
    nb_fasc_in_s = 0
    nb_fasc_filt_s = 0
    nb_images_s = 0
    nb_fasc_tot_p = 0
    nb_fasc_in_p = 0
    nb_fasc_filt_p = 0
    nb_images_p = 0

    '************************************************************************'
    '*****************************DATA RETRIEVAL*****************************'

    dictio = load_obj(name_dict, path_to_dict)
    l2 = ['fasc*', 'fsc_*']

    for par in range(len(participants)):

        participant = participants[par]
        fam_folders = [str(d) for d in dictio[participant].keys()]

        s_manuFasc = []
        s_autoFasc = []
        p_manuFasc = []
        p_autoFasc = []

        for fam in fam_folders:

            ###################################################################
            # simple images
            dictioS = dictio[participant][fam]['BF']['simple']
            images = [str(im) for im in dictioS.keys()]
            for i in images:
                # if par == 9 and fam =='fam_2' and i=='img_2':
                #     print(par, fam, i)
                # else:
                nb_images_s = nb_images_s + 1

                ###############################################################
                # SIMPLE - manual
                dictioM = dictioS[i]['architecture manual']
                fascicles = [
                    str(fa) for fa in dictioM if any(
                        fnmatch.fnmatch(fa, p) for p in l2)
                ]
                for f in fascicles:
                    dictioF = dictioM[f]
                    idf = fam + '/' + i + '/' + f
                    if len(dictioF.keys()) > 1:
                        s_manuFasc.append(
                            (idf,
                             dictioF['dist from (0,0) of RGB image, in mm']))
                        d_s_m[par].append(
                            dictioF['dist from (0,0) of RGB image, in mm'])
                ###############################################################
                # SIMPLE - automatic
                if ('architecture auto' in dictioS[i]):
                    dictioA = dictioS[i]['architecture auto']
                    midRow = np.mean(dictioA['crop']['lines'])
                    midCol = np.mean(dictioA['crop']['columns'])
                    if dictioA and ('MT' in dictioA):
                        fascicles = [
                            fa for fa in dictioA if any(
                                fnmatch.fnmatch(fa, p) for p in l2)
                        ]
                        nb_fasc_tot_s = nb_fasc_tot_s + len(fascicles)
                        for f in fascicles:
                            dictioF = dictioA[f]
                            idf = fam + '/' + i + '/' + f
                            if len(dictioF.keys()) > 1:
                                #keep the fascicles that are in the lower half of the image,
                                #to compare with manual data - often taken in that region
                                PAi = dictioF['PAinf']['intersection with apo']
                                PAs = dictioF['PAsup']['intersection with apo']
                                fasc_row = (PAs[0] - PAi[0]) / (
                                    PAs[1] - PAi[1]) * (midCol -
                                                        PAs[1]) + PAs[0]

                                if fasc_row <= midRow:
                                    s_autoFasc.append((idf, dictioF[
                                        'dist from (0,0) of RGB image, in mm']
                                                       ))
                                    d_s_a[par].append(dictioF[
                                        'dist from (0,0) of RGB image, in mm'])
                                    nb_fasc_in_s = nb_fasc_in_s + 1

                        if ('MT for labelled points' in dictioM['MT']):
                            for ind0 in range(
                                    len(dictioM['MT']
                                        ['MT for labelled points'])):
                                elem = dictioM['MT']['MT for labelled points'][
                                    ind0]
                                if elem != 'error':
                                    mt_s_m[par].append(elem)  #MT in mm

                            for ind0 in range(
                                    len(dictioA['MT']
                                        ['MT for labelled points'])):
                                elem = dictioA['MT']['MT for labelled points'][
                                    ind0]
                                if elem != 'error':
                                    mt_s_a[par].append(elem)

            ###################################################################
            # panoramic images
            dictioP = dictio[participant][fam]['BF']['panoramic']
            images = [str(im) for im in dictioP.keys()]
            for i in images:
                nb_images_p = nb_images_p + 1

                ###############################################################
                # PANORAMIC - manual
                dictioM = dictioP[i]['architecture manual']
                fascicles = [
                    fa for fa in dictioM if any(
                        fnmatch.fnmatch(fa, p) for p in l2)
                ]
                for f in fascicles:
                    dictioF = dictioM[f]
                    idf = fam + '/' + i + '/' + f
                    if len(dictioF.keys()) > 1:
                        p_manuFasc.append(
                            (idf, dictioF['dist from insertion in mm']))
                        d_p_m[par].append(dictioF['dist from insertion in mm'])

                ###############################################################
                # PANORAMIC - automatic
                if ('architecture auto' in dictioP[i]):
                    dictioA = dictioP[i]['architecture auto']
                    if dictioA and ('MT' in dictioA):
                        fascicles = [
                            fa for fa in dictioA if any(
                                fnmatch.fnmatch(fa, p) for p in l2)
                        ]
                        nb_fasc_tot_p = nb_fasc_tot_p + len(fascicles)
                        for f in fascicles:
                            dictioF = dictioA[f]
                            idf = fam + '/' + i + '/' + f
                            #only keep fascicles that are entirely within the cropped image,
                            #to compare with manually identified fascicles
                            if len(dictioF.keys()) > 1 and dictioF['FL'][
                                    'in/out of the image'] == 'in image':
                                nb_fasc_in_p = nb_fasc_in_p + 1
                                p_autoFasc.append(
                                    (idf,
                                     dictioF['dist from insertion in mm']))
                                d_p_a[par].append(
                                    dictioF['dist from insertion in mm'])

                        if ('MT for labelled points' in dictioM['MT']):
                            for ind0 in range(
                                    len(dictioM['MT']
                                        ['MT for labelled points'])):
                                elem = dictioM['MT']['MT for labelled points'][
                                    ind0]
                                if elem != 'error':
                                    mt_p_m[par].append(elem)  #MT in mm

                            for ind0 in range(
                                    len(dictioA['MT']
                                        ['MT for labelled points'])):
                                elem = dictioA['MT']['MT for labelled points'][
                                    ind0]
                                if elem != 'error':
                                    mt_p_a[par].append(elem)

        '************************************************************************'
        '********************MATCHING AUTO & MANUAL FASCICLES*******************'

        listePair_manuF_s = []
        for n in range(len(s_manuFasc)):
            mf = s_manuFasc[n]
            subtr = [(tup, abs(tup[1] - mf[1])) for tup in s_autoFasc]
            subtr.sort(key=lambda x: x[1])
            closest = subtr[0]
            listePair_manuF_s.append(
                (mf[0], closest[0][0], closest[1])
            )  #tuple = ( ID manu fasc, ID auto fasc, distance entre les deux)
        listePair_manuF_s.sort(key=lambda x: x[1])
        uniqueMatching = []
        counterL = 0
        while counterL < len(listePair_manuF_s):
            currentAutoFasc = listePair_manuF_s[counterL][1]
            correspondingAutoFasc = [(listePair_manuF_s[counterL][0],
                                      listePair_manuF_s[counterL][2])]
            rank = counterL + 1
            while rank < len(listePair_manuF_s) and listePair_manuF_s[rank][
                    1] == currentAutoFasc:
                correspondingAutoFasc.append(
                    (listePair_manuF_s[rank][0], listePair_manuF_s[rank][2]))
                rank = rank + 1
            correspondingAutoFasc.sort(key=lambda x: x[1])
            uniqueMatching.append(
                (correspondingAutoFasc[0][0], currentAutoFasc,
                 correspondingAutoFasc[0][1]))
            counterL = rank
        for element in uniqueMatching:
            pathA = element[1].split('/')
            pathM = element[0].split('/')
            nb_fasc_filt_s = nb_fasc_filt_s + 1
            d_s_m_filtered[par].append(dictio[participant][
                pathM[0]]['BF']['simple'][pathM[1]]['architecture manual'][
                    pathM[2]]['dist from (0,0) of RGB image, in mm'])
            fl_s_m_filtered[par].append(
                dictio[participant][pathM[0]]['BF']['simple'][pathM[1]]
                ['architecture manual'][pathM[2]]['FL']['length in mm'])
            PAs_s_m_filtered[par].append(
                dictio[participant][pathM[0]]['BF']['simple'][pathM[1]]
                ['architecture manual'][pathM[2]]['PAsup']['value in degree'])
            PAi_s_m_filtered[par].append(
                dictio[participant][pathM[0]]['BF']['simple'][pathM[1]]
                ['architecture manual'][pathM[2]]['PAinf']['value in degree'])
            d_s_a_filtered[par].append(dictio[participant][
                pathA[0]]['BF']['simple'][pathA[1]]['architecture auto'][
                    pathA[2]]['dist from (0,0) of RGB image, in mm'])
            fl_s_a_filtered[par].append(
                dictio[participant][pathA[0]]['BF']['simple'][pathA[1]]
                ['architecture auto'][pathA[2]]['FL']['length in mm'])
            PAs_s_a_filtered[par].append(
                dictio[participant][pathA[0]]['BF']['simple'][pathA[1]]
                ['architecture auto'][pathA[2]]['PAsup']['value in degree'])
            PAi_s_a_filtered[par].append(
                dictio[participant][pathA[0]]['BF']['simple'][pathA[1]]
                ['architecture auto'][pathA[2]]['PAinf']['value in degree'])

        listePair_manuF_p = []
        for n in range(len(p_manuFasc)):
            mf = p_manuFasc[n]
            subtr = [(tup, abs(tup[1] - mf[1])) for tup in p_autoFasc]
            subtr.sort(key=lambda x: x[1])
            closest = subtr[0]
            listePair_manuF_p.append(
                (mf[0], closest[0][0], closest[1])
            )  #tuple = ( ID manu fasc, ID auto fasc, distance entre les deux)
        listePair_manuF_p.sort(key=lambda x: x[1])
        uniqueMatching = []
        counterL = 0
        while counterL < len(listePair_manuF_p):
            currentAutoFasc = listePair_manuF_p[counterL][1]
            correspondingAutoFasc = [(listePair_manuF_p[counterL][0],
                                      listePair_manuF_p[counterL][2])]
            rank = counterL + 1
            while rank < len(listePair_manuF_p) and listePair_manuF_p[rank][
                    1] == currentAutoFasc:
                correspondingAutoFasc.append(
                    (listePair_manuF_p[rank][0], listePair_manuF_p[rank][2]))
                rank = rank + 1
            correspondingAutoFasc.sort(key=lambda x: x[1])
            uniqueMatching.append(
                (correspondingAutoFasc[0][0], currentAutoFasc,
                 correspondingAutoFasc[0][1]))
            counterL = rank
        for element in uniqueMatching:
            pathA = element[1].split('/')
            pathM = element[0].split('/')
            nb_fasc_filt_p = nb_fasc_filt_p + 1
            d_p_m_filtered[par].append(
                dictio[participant][pathM[0]]['BF']['panoramic'][pathM[1]]
                ['architecture manual'][pathM[2]]['dist from insertion in mm'])
            fl_p_m_filtered[par].append(
                dictio[participant][pathM[0]]['BF']['panoramic'][pathM[1]]
                ['architecture manual'][pathM[2]]['FL']['length in mm'])
            PAs_p_m_filtered[par].append(
                dictio[participant][pathM[0]]['BF']['panoramic'][pathM[1]]
                ['architecture manual'][pathM[2]]['PAsup']['value in degree'])
            PAi_p_m_filtered[par].append(
                dictio[participant][pathM[0]]['BF']['panoramic'][pathM[1]]
                ['architecture manual'][pathM[2]]['PAinf']['value in degree'])
            d_p_a_filtered[par].append(
                dictio[participant][pathA[0]]['BF']['panoramic'][pathA[1]]
                ['architecture auto'][pathA[2]]['dist from insertion in mm'])
            fl_p_a_filtered[par].append(
                dictio[participant][pathA[0]]['BF']['panoramic'][pathA[1]]
                ['architecture auto'][pathA[2]]['FL']['length in mm'])
            PAs_p_a_filtered[par].append(
                dictio[participant][pathA[0]]['BF']['panoramic'][pathA[1]]
                ['architecture auto'][pathA[2]]['PAsup']['value in degree'])
            PAi_p_a_filtered[par].append(
                dictio[participant][pathA[0]]['BF']['panoramic'][pathA[1]]
                ['architecture auto'][pathA[2]]['PAinf']['value in degree'])

    #t_tests
    print('paired samples t-tests resuts: ')
    from scipy.stats.mstats import ttest_rel
    #NOTES: we cannot user '..._filtered' arrays directly because of their structure
    #we need to flatten them to 1-D lists
    t, p = ttest_rel(
        [item for sublist in PAs_s_m_filtered for item in sublist],
        [item for sublist in PAs_s_a_filtered for item in sublist],
        axis=None)
    print('PAS s', p)
    t2, p2 = ttest_rel(
        [item for sublist in PAs_p_m_filtered for item in sublist],
        [item for sublist in PAs_p_a_filtered for item in sublist],
        axis=None)
    print('PAS p', p2)
    t3, p3 = ttest_rel(
        [item for sublist in PAi_s_m_filtered for item in sublist],
        [item for sublist in PAi_s_a_filtered for item in sublist],
        axis=None)
    print('PAI s', p3)
    t4, p4 = ttest_rel(
        [item for sublist in PAi_p_m_filtered for item in sublist],
        [item for sublist in PAi_p_a_filtered for item in sublist],
        axis=None)
    print('PAI p', p4)
    t5, p5 = ttest_rel(
        [item for sublist in fl_s_m_filtered for item in sublist],
        [item for sublist in fl_s_a_filtered for item in sublist],
        axis=None)
    print('FL s', p5)
    t6, p6 = ttest_rel(
        [item for sublist in fl_p_m_filtered for item in sublist],
        [item for sublist in fl_p_a_filtered for item in sublist],
        axis=None)
    print('FL p', p6)
    t7, p7 = ttest_rel([item for sublist in mt_s_m for item in sublist],
                       [item for sublist in mt_s_a for item in sublist],
                       axis=None)
    print('mt s', p7)
    t7_2, p7_2 = ttest_rel([np.mean(sublist) for sublist in mt_s_m],
                           [np.mean(sublist) for sublist in mt_s_a],
                           axis=None)
    print('mt s for means', p7_2)
    t8, p8 = ttest_rel([item for sublist in mt_p_m for item in sublist],
                       [item for sublist in mt_p_a for item in sublist],
                       axis=None)
    print('mt p', p8)
    t8_2, p8_2 = ttest_rel([np.mean(sublist) for sublist in mt_p_m],
                           [np.mean(sublist) for sublist in mt_p_a],
                           axis=None)
    print('mt p for means', p8_2)

    print('independent samples t-tests resuts: ')
    from scipy.stats.mstats import ttest_rel, ttest_ind
    #NOTES: we cannot user '..._filtered' arrays directly because of their structure
    #we need to flatten them to 1-D lists
    t, p = ttest_ind(
        [item for sublist in PAs_s_m_filtered for item in sublist],
        [item for sublist in PAs_s_a_filtered for item in sublist],
        axis=None)
    print('PAS s', p)
    t2, p2 = ttest_ind(
        [item for sublist in PAs_p_m_filtered for item in sublist],
        [item for sublist in PAs_p_a_filtered for item in sublist],
        axis=None)
    print('PAS p', p2)
    t3, p3 = ttest_ind(
        [item for sublist in PAi_s_m_filtered for item in sublist],
        [item for sublist in PAi_s_a_filtered for item in sublist],
        axis=None)
    print('PAI s', p3)
    t4, p4 = ttest_ind(
        [item for sublist in PAi_p_m_filtered for item in sublist],
        [item for sublist in PAi_p_a_filtered for item in sublist],
        axis=None)
    print('PAI p', p4)
    t5, p5 = ttest_ind(
        [item for sublist in fl_s_m_filtered for item in sublist],
        [item for sublist in fl_s_a_filtered for item in sublist],
        axis=None)
    print('FL s', p5)
    t6, p6 = ttest_ind(
        [item for sublist in fl_p_m_filtered for item in sublist],
        [item for sublist in fl_p_a_filtered for item in sublist],
        axis=None)
    print('FL p', p6)
    t7, p7 = ttest_ind([item for sublist in mt_s_m for item in sublist],
                       [item for sublist in mt_s_a for item in sublist],
                       axis=None)
    print('mt s', p7)
    t7_2, p7_2 = ttest_ind([np.mean(sublist) for sublist in mt_s_m],
                           [np.mean(sublist) for sublist in mt_s_a],
                           axis=None)
    print('mt s for means', p7_2)
    t8, p8 = ttest_ind([item for sublist in mt_p_m for item in sublist],
                       [item for sublist in mt_p_a for item in sublist],
                       axis=None)
    print('mt p', p8)
    t8_2, p8_2 = ttest_ind([np.mean(sublist) for sublist in mt_p_m],
                           [np.mean(sublist) for sublist in mt_p_a],
                           axis=None)
    print('mt p for means', p8_2)

    #size effects
    s1 = sizeEffect(PAs_s_m_filtered, PAs_s_a_filtered)
    s2 = sizeEffect(PAs_p_m_filtered, PAs_p_a_filtered)
    s3 = sizeEffect(PAi_s_m_filtered, PAi_s_a_filtered)
    s4 = sizeEffect(PAi_p_m_filtered, PAi_p_a_filtered)
    s5 = sizeEffect(fl_s_m_filtered, fl_s_a_filtered)
    s6 = sizeEffect(fl_p_m_filtered, fl_p_a_filtered)
    s7 = sizeEffect(mt_s_m, mt_s_a)
    s8 = sizeEffect(mt_p_m, mt_p_a)
    print('Size effects: ')
    print('PAS s', s1)
    print('PAS p', s2)
    print('PAi s', s3)
    print('PAi p', s4)
    print('fl s', s5)
    print('fl p', s6)
    print('mt s', s7)
    print('mt p', s8)

    mt_s_a_filt = [[] for par in range(len(participants))]
    mt_s_m_filt = [[] for par in range(len(participants))]
    for p in range(len(mt_s_a)):
        for val in range(len(mt_s_a[p])):
            if p == 9:
                if mt_s_a[p][val] > mt_s_m[p][val] + 2.37 or mt_s_a[p][
                        val] < mt_s_m[p][val] - 2.08:
                    print('aberrante valeur: participant ', p, ' , place val ',
                          val)
                else:
                    mt_s_a_filt[p].append(mt_s_a[p][val])
                    mt_s_m_filt[p].append(mt_s_m[p][val])
            else:
                mt_s_a_filt[p].append(mt_s_a[p][val])
                mt_s_m_filt[p].append(mt_s_m[p][val])

    print('apres avoir enleve les valeurs out of LoA: ')
    t7, p7 = ttest_rel([item for sublist in mt_s_m_filt for item in sublist],
                       [item for sublist in mt_s_a_filt for item in sublist],
                       axis=None)
    print('mt s', p7)
# 计算AUC、f1-score和准确率
from sklearn.metrics import roc_auc_score
auc_log = roc_auc_score(y, log_y_scores)
auc_svm = roc_auc_score(y, svm_y_scores)
auc_knn = roc_auc_score(y, knn_y_scores)
# 计算准确率
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import cross_val_predict
for clf in [knn_clf, svm_clf, log_clf]:
    y_pred = cross_val_predict(clf, X, y, cv=5)
    accuracy = accuracy_score(y, y_pred)
    f1 = f1_score(y, y_pred)
    print(accuracy, f1)
auc_knn
auc_svm
auc_log

# %%
before_test = pd.concat([y_tr, X_select], axis=1)

from scipy.stats.mstats import ttest_ind
for col in list(before_test.columns):
    if col == '产后血糖异常(有=1,无=0)':
        continue
    n1 = before_test[col][before_test['产后血糖异常(有=1,无=0)'].isin([0])]
    n2 = before_test[col][before_test['产后血糖异常(有=1,无=0)'].isin([1])]
    stat, p = ttest_ind(n1, n2)
    print(col, p)

before_test['产后血糖异常(有=1,无=0)'].value_counts()