Exemple #1
0
    def _evaluate_composite_score(y_true, y_pred, score_func):
        """
        Evaluate composite scores based on the contingency table like
        precision, specificity or sensitivity by using jackknife re-sampling.
        :param y_true:
        :param y_pred:
        :param score_func:
        :return: score and standard error

        References:
        Efron and Stein, (1981), "The jackknife estimate of variance."
        """
        def _compute_jackknife_stderr(x):
            n = x.shape[0]
            # np.sqrt((((n - 1) / n) * np.sum((x - x.mean()) ** 2)))
            return np.sqrt(n - 1) * np.std(x)

        composite_score = score_func(y_true, y_pred)

        # jackknifing to obtain std err estimate
        index = np.arange(y_true.shape[0])
        jack_idx = jackknife_resampling(index).astype(np.int)
        jack_scores = np.array(
            [score_func(y_true[idx], y_pred[idx]) for idx in jack_idx])
        jack_stderr = _compute_jackknife_stderr(jack_scores)
        return composite_score, jack_stderr
def jack_knife(p, q):
    p_jk_resampling = jackknife_resampling(p)
    q_jk_resampling = jackknife_resampling(q)
    min_dist = wasserstein_distance(p_jk_resampling[0, :],
                                    q_jk_resampling[0, :])
    m, k = p_jk_resampling.shape
    for i in range(1, m):
        temp_dist = wasserstein_distance(p_jk_resampling[i, :],
                                         q_jk_resampling[i, :])
        if min_dist > temp_dist:
            min_dist = temp_dist

    return min_dist


# dodaj jos koju ne budi lenj ...
Exemple #3
0
def jackknife(data, n):
    resamples = jackknife_resampling(data)
    x_resamples = [
        np.var(resamples[i]) / (n * n) for i in range(len(resamples))
    ]
    v = np.var(data) / (n * n)
    return np.sqrt(
        np.sum([(x_resamples[i] - v) * (x_resamples[i] - v)
                for i in range(len(x_resamples))]))
Exemple #4
0
def jackknife(data, N, kb, T, method):
    resamples = jackknife_resampling(data)
    x_resamples = [
        method(variance(resamples[i]), N, kb, T) for i in range(len(resamples))
    ]
    v = variance(data)
    c = method(v, N, kb, T)
    return np.sqrt(
        np.sum([(x_resamples[i] - c) * (x_resamples[i] - c)
                for i in range(len(x_resamples))]))
Exemple #5
0
from astropy.stats import jackknife_stats
from statistics import harmonic_mean
import math

#data = np.array([1.01, 0.99, 0.78, 1.12, 1.20, 0.86, 0.65, 0.56, 0.87, 0.63, 0.70, 1.24, 1.40])
data = np.array([
    50.69, 52.95, 52.47, 51.85, 51.55, 51.49, 52.08, 52.19, 51.49, 52.34,
    51.95, 51.73, 51.93, 51.3, 51.7, 51.28, 52.0, 52.07, 52.31, 52.28, 51.42,
    52.26, 51.64, 51.74, 50.34, 51.11, 52.55, 51.5, 53.22, 51.4
])

#data = np.array([1,2,3,4,5,6,7,8,9,0])

harmonica = harmonic_mean(data)

resamples = jackknife_resampling(data)

#print(resamples)

print(resamples.shape)

hrList = []

for x in resamples:
    hrList.append(harmonic_mean(x))

#print(" Hermonic mean list : ",hrList)

print(" Arithmetic mean of harmonic means ", np.mean(hrList))

hmean = np.mean(hrList)
Exemple #6
0
def sensitivity_jacknife_fullP(root):
    # get the name of all folders under root
    all_conditions = []
    folder_paths = []
    for folder in os.listdir(root):
        if folder[0] != '.':
            folder_paths.append(root + '/' + folder)
            all_conditions.append(folder)

    # initialize results dataframe
    all_cond_angles = pd.DataFrame(
    )  # all ori pitch including all conditions, for validation
    binned_angles = pd.DataFrame(
    )  # binned mean of pitch for plotting as "raw data"
    coef_ori = pd.DataFrame(
    )  # coef results calculated with all original pitch data
    fitted_y_ori = pd.DataFrame()  # fitted y using all original pitch data
    jackknifed_coef = pd.DataFrame(
    )  # coef results calculated with jackknifed pitch data
    jackknifed_y = pd.DataFrame()  # fitted y using jackknifed pitch data

    # for each folder (condition)
    for condition_idx, folder in enumerate(folder_paths):
        # enter each condition folder (e.g. 7dd_ctrl)
        for subpath, subdir_list, subfile_list in os.walk(folder):
            # if folder is not empty
            if subdir_list:
                all_day_angles = pd.DataFrame()
                # loop through each sub-folder (experiment) under each condition
                for expNum, exp in enumerate(subdir_list):
                    # for each sub-folder, get the path
                    exp_path = os.path.join(subpath, exp)
                    df = pd.read_hdf(f"{exp_path}/IEI_data.h5",
                                     key='prop_bout_IEI2')
                    body_angles = df.loc[:, [
                        'propBoutIEI', 'propBoutIEI_pitch', 'propBoutIEItime'
                    ]]
                    day_angles = day_night_split2(
                        body_angles, 'propBoutIEItime').assign(expNum=expNum,
                                                               date=exp[0:6])
                    day_angles.dropna(inplace=True)
                    all_day_angles = pd.concat([
                        all_day_angles, day_angles[[
                            'propBoutIEI', 'propBoutIEI_pitch', 'expNum',
                            'date'
                        ]]
                    ],
                                               ignore_index=True)
                    # enter next folder under this condition

                all_day_angles = all_day_angles.assign(
                    y_boutFreq=1 / all_day_angles['propBoutIEI'])

                # get binned mean of angles for plotting "raw" data
                binned_angles = pd.concat([
                    binned_angles,
                    distribution_binned_average(all_day_angles, BIN_WIDTH,
                                                all_conditions[condition_idx])
                ],
                                          ignore_index=True)

                # fit angles condition by condition and concatenate results
                coef, fitted_y = parabola_fit1(all_day_angles, X_RANGE_FULL)
                coef_ori = pd.concat([
                    coef_ori,
                    coef.assign(dpf=all_conditions[condition_idx][0],
                                condition=all_conditions[condition_idx][4:])
                ])
                fitted_y_ori = pd.concat([
                    fitted_y_ori,
                    fitted_y.assign(
                        dpf=all_conditions[condition_idx][0],
                        condition=all_conditions[condition_idx][4:])
                ])

                # jackknife for the index
                jackknife_idx = jackknife_resampling(
                    np.array(list(range(expNum + 1))))
                for excluded_exp, idx_group in enumerate(jackknife_idx):
                    coef, fitted_y = parabola_fit1(
                        all_day_angles.loc[all_day_angles['expNum'].isin(
                            idx_group)], X_RANGE_FULL)
                    jackknifed_coef = pd.concat([
                        jackknifed_coef,
                        coef.assign(
                            dpf=all_conditions[condition_idx][0],
                            condition=all_conditions[condition_idx][4:],
                            excluded_exp=all_day_angles.loc[
                                all_day_angles['expNum'] == excluded_exp,
                                'date'].iloc[0])
                    ])
                    jackknifed_y = pd.concat([
                        jackknifed_y,
                        fitted_y.assign(
                            dpf=all_conditions[condition_idx][0],
                            condition=all_conditions[condition_idx][4:],
                            excluded_exp=all_day_angles.loc[
                                all_day_angles['expNum'] == excluded_exp,
                                'date'].iloc[0])
                    ])

                # get all angles at all conditions, for validation. not needed for plotting
                all_cond_angles = pd.concat([
                    all_cond_angles,
                    all_day_angles.assign(
                        condition=all_conditions[condition_idx])
                ],
                                            ignore_index=True)
                # enter next condition

    coef_all_cond, fitted_y_all_cond = parabola_fit1(all_cond_angles,
                                                     X_RANGE_FULL)

    jackknifed_coef.columns = [
        'sensitivity', 'x_inter', 'y_inter', 'dpf', 'condition',
        'jackknife_excluded_sample'
    ]
    jackknifed_coef.sort_values(by=['condition', 'dpf'],
                                inplace=True,
                                ignore_index=True)
    jackknifed_coef['sensitivity'] = jackknifed_coef[
        'sensitivity'] * 1000  # unit: mHz/deg**2

    jackknifed_y.columns = [
        'y', 'x', 'dpf', 'condition', 'jackknife_excluded_sample'
    ]
    jackknifed_y.sort_values(by=['condition', 'dpf'],
                             inplace=True,
                             ignore_index=True)
    binned_angles.sort_values(by=['condition', 'dpf'],
                              inplace=True,
                              ignore_index=True)

    coef_ori.columns = [
        'sensitivity', 'x_inter', 'y_inter', 'dpf', 'condition'
    ]
    coef_ori.sort_values(by=['condition', 'dpf'],
                         inplace=True,
                         ignore_index=True)

    fitted_y_ori.columns = ['y', 'x', 'dpf', 'condition']
    fitted_y_ori.sort_values(by=['condition', 'dpf'],
                             inplace=True,
                             ignore_index=True)

    # %%
    print("Fitted coefs using ALL data (for reference):")
    print(coef_all_cond)

    # plot fitted parabola and sensitivity
    defaultPlotting()

    # Separate data by age.
    age_condition = set(jackknifed_y['dpf'].values)
    age_cond_num = len(age_condition)

    # initialize a multi-plot, feel free to change the plot size
    f, axes = plt.subplots(nrows=4,
                           ncols=age_cond_num,
                           figsize=(2.5 * (age_cond_num), 12),
                           sharey='row')
    axes = axes.flatten()  # flatten if multidimenesional (multiple dpf)
    # setup color scheme for dot plots
    flatui = ["#D0D0D0"] * (jackknifed_coef.groupby('condition').size()[0])
    defaultPlotting()

    # loop through differrent age (dpf), plot parabola in the first row and sensitivy in the second.
    for i, age in enumerate(age_condition):
        fitted = jackknifed_y.loc[jackknifed_y['dpf'] == age]
        # dots are plotted with binned average pitches
        binned = binned_angles.loc[binned_angles['dpf'] == age]
        g = sns.lineplot(x='x',
                         y='y',
                         hue='condition',
                         data=fitted,
                         ci="sd",
                         ax=axes[i])
        g = sns.scatterplot(x='propBoutIEI_pitch',
                            y='y_boutFreq',
                            hue='condition',
                            s=20,
                            data=binned,
                            alpha=0.3,
                            ax=axes[i],
                            linewidth=0)
        g.set_xticks(np.arange(-90, 135, 45))  # adjust ticks
        g.set_ylim(0, None, 30)

        # SENSITIVITY
        coef_plt = jackknifed_coef.loc[jackknifed_coef['dpf'] == age]
        # plot jackknifed paired data
        p = sns.pointplot(
            x='condition',
            y='sensitivity',
            hue='jackknife_excluded_sample',
            data=coef_plt,
            palette=sns.color_palette(flatui),
            scale=0.5,
            ax=axes[i + age_cond_num],
            #   order=['Sibs','Tau','Lesion'],
        )
        # plot mean data
        p = sns.pointplot(
            x='condition',
            y='sensitivity',
            hue='condition',
            data=coef_plt,
            linewidth=0,
            alpha=0.9,
            ci=None,
            markers='d',
            ax=axes[i + age_cond_num],
            #   order=['Sibs','Tau','Lesion'],
        )
        p.legend_.remove()
        # p.set_yticks(np.arange(0.1,0.52,0.04))

        # sns.despine(trim=True)

        # p values for sensitivity
        condition_s = set(coef_plt['condition'].values)
        condition_s = list(condition_s)

        # Paired T Test for 2 conditions
        if len(condition_s) == 2:
            # Separate data by condition.
            coef_cond1 = coef_plt.loc[coef_plt['condition'] ==
                                      condition_s[0]].sort_values(
                                          by='jackknife_excluded_sample')
            coef_cond2 = coef_plt.loc[coef_plt['condition'] ==
                                      condition_s[1]].sort_values(
                                          by='jackknife_excluded_sample')
            ttest_res, ttest_p = ttest_rel(coef_cond1['sensitivity'],
                                           coef_cond2['sensitivity'])
            print(
                f'* Age {age} Sensitivity: {condition_s[0]} v.s. {condition_s[1]} paired t-test p-value = {ttest_p}'
            )
        elif len(condition_s) > 2:
            multi_comp = MultiComparison(
                coef_plt['sensitivity'],
                coef_plt['dpf'] + coef_plt['condition'])
            print(f'* Age {age} Sensitivity:')
            print(multi_comp.tukeyhsd().summary())
        else:
            pass

        # X INTERSECT
        sns.swarmplot(x='condition',
                      y='x_inter',
                      data=coef_plt,
                      ax=axes[i + age_cond_num * 2])
        # Y INTERSECT
        sns.swarmplot(x='condition',
                      y='y_inter',
                      data=coef_plt,
                      ax=axes[i + age_cond_num * 3])

    plt.show()
Exemple #7
0
            # fit angles condition by condition and concatenate results
            coef, fitted_y = parabola_fit1(all_day_angles, X_RANGE_FULL)
            coef_ori = pd.concat([
                coef_ori,
                coef.assign(dpf=all_conditions[condition_idx][0],
                            condition=all_conditions[condition_idx][4:])
            ])
            fitted_y_ori = pd.concat([
                fitted_y_ori,
                fitted_y.assign(dpf=all_conditions[condition_idx][0],
                                condition=all_conditions[condition_idx][4:])
            ])

            # jackknife for the index
            jackknife_idx = jackknife_resampling(
                np.array(list(range(expNum + 1))))
            for excluded_exp, idx_group in enumerate(jackknife_idx):
                coef, fitted_y = parabola_fit1(
                    all_day_angles.loc[all_day_angles['expNum'].isin(
                        idx_group)], X_RANGE_FULL)
                jackknifed_coef = pd.concat([
                    jackknifed_coef,
                    coef.assign(dpf=all_conditions[condition_idx][0],
                                condition=all_conditions[condition_idx][4:],
                                excluded_exp=all_day_angles.loc[
                                    all_day_angles['expNum'] == excluded_exp,
                                    'date'].iloc[0])
                ])
                jackknifed_y = pd.concat([
                    jackknifed_y,
                    fitted_y.assign(
Exemple #8
0
def bout_speed_aligned_jacknife(root):
    all_conditions = []
    folder_paths = []
    # get the name of all folders under root
    for folder in os.listdir(root):
        if folder[0] != '.':
            folder_paths.append(root + '/' + folder)
            all_conditions.append(folder)

    jack_y_all = pd.DataFrame()
    ang_std_all = pd.DataFrame()
    # go through each condition folders under the root
    for condition_idx, folder in enumerate(folder_paths):
        # enter each condition folder (e.g. 7dd_ctrl)
        for subpath, subdir_list, subfile_list in os.walk(folder):
            # if folder is not empty
            if subdir_list:
                all_speed = pd.DataFrame()
                ang_std = []
                # loop through each sub-folder (experiment) under each condition
                for expNum, exp in enumerate(subdir_list):
                    # for each sub-folder, get the path
                    exp_path = os.path.join(subpath, exp)
                    df = pd.read_hdf(f"{exp_path}/bout_data.h5",
                                     key='prop_bout_aligned')
                    # get pitch
                    swim_speed = df.loc[:, ['propBoutAligned_speed']].rename(
                        columns={
                            'propBoutAligned_speed': f'exp{expNum}'
                        }).transpose()
                    all_speed = pd.concat([all_speed, swim_speed])
                # jackknife for the index
                jackknife_idx = jackknife_resampling(
                    np.array(list(range(expNum + 1))))
                # get the distribution of every jackknifed sample
                jack_y = pd.concat([
                    pd.DataFrame(
                        np.histogram(
                            all_speed.iloc[idx_group].to_numpy().flatten(),
                            bins=bins,
                            density=True)) for idx_group in jackknife_idx
                ],
                                   axis=1).transpose()
                jack_y_all = pd.concat([
                    jack_y_all,
                    jack_y.assign(age=all_conditions[condition_idx][0],
                                  condition=all_conditions[condition_idx][4:])
                ],
                                       axis=0,
                                       ignore_index=True)
                # # get the std of every jackknifed sample
                # for idx_group in jackknife_idx:
                #     ang_std.append(np.nanstd(all_speed.iloc[idx_group].to_numpy().flatten()))
                # ang_std = pd.DataFrame(ang_std)
                # ang_std_all = pd.concat([ang_std_all, ang_std.assign(age=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:])], axis=0, ignore_index=True)

    jack_y_all.columns = ['Probability', 'swim_speed', 'dpf', 'condition']
    jack_y_all.sort_values(by=['dpf', 'condition'], inplace=True)

    # %%
    defaultPlotting()
    g = sns.lineplot(x='swim_speed',
                     y='Probability',
                     hue='condition',
                     style='dpf',
                     data=jack_y_all,
                     ci='sd',
                     err_style='band')

    plt.show()
def IEI_pitch_mean_jacknife(root):
    all_conditions = []
    folder_paths = []
    # get the name of all folders under root
    for folder in os.listdir(root):
        if folder[0] != '.':
            folder_paths.append(root+'/'+folder)
            all_conditions.append(folder)

    bins = list(range(-90,95,5))

    jack_y_all = pd.DataFrame()
    ang_std_all = pd.DataFrame()
    # go through each condition folders under the root
    for condition_idx, folder in enumerate(folder_paths):
        # enter each condition folder (e.g. 7dd_ctrl)
        for subpath, subdir_list, subfile_list in os.walk(folder):
            # if folder is not empty
            if subdir_list:
                all_angles = pd.DataFrame()
                exp_date_match = pd.DataFrame()
                ang_std = []
                # loop through each sub-folder (experiment) under each condition
                for expNum, exp in enumerate(subdir_list):
                    # for each sub-folder, get the path
                    exp_path = os.path.join(subpath, exp)
                    df = pd.read_hdf(f"{exp_path}/IEI_data.h5", key='prop_bout_IEI2')
                    # get pitch
                    body_angles = df.loc[:,['propBoutIEI_pitch']].rename(columns={'propBoutIEI_pitch':f'exp{expNum}'}).transpose()
                    all_angles = pd.concat([all_angles, body_angles])
                    exp_date_match = pd.concat([exp_date_match, pd.DataFrame( data= {'expNum':expNum,'date':[exp[0:6]]} )],ignore_index=True)

                # jackknife for the index
                jackknife_idx = jackknife_resampling(np.array(list(range(expNum+1))))
                # get the distribution of every jackknifed sample for the current condition
                jack_y = pd.concat([pd.DataFrame(
                    np.histogram(all_angles.iloc[idx_group].to_numpy().flatten(), bins=bins, density=True)
                ) for idx_group in jackknife_idx], axis=1).transpose()
                    
                # combine conditions
                jack_y_all = pd.concat([jack_y_all, jack_y.assign(age=all_conditions[condition_idx][0], 
                                                                condition=all_conditions[condition_idx][4:])], axis=0, ignore_index=True)
                # get the std of every jackknifed sample
                for excluded_exp, idx_group in enumerate(jackknife_idx):
                    ang_std.append(np.nanstd(all_angles.iloc[idx_group].to_numpy().flatten())) 
                ang_std = pd.DataFrame(ang_std).assign(excluded_exp=exp_date_match['date'])
                ang_std_all = pd.concat([ang_std_all, ang_std.assign(age=all_conditions[condition_idx][0], condition=all_conditions[condition_idx][4:])], axis=0, ignore_index=True)

    jack_y_all.columns = ['Probability','Posture (deg)','dpf','condition']
    jack_y_all.sort_values(by=['condition'],inplace=True)
    ang_std_all.columns = ['std(posture)','excluded_exp','dpf','condition']                
    ang_std_all.sort_values(by=['condition'],inplace=True)

    # %%
    # Stats
    # # For multiple comparison
    # multi_comp = MultiComparison(ang_std_all['std(posture)'], ang_std_all['dpf']+ang_std_all['condition'])
    # print(multi_comp.tukeyhsd().summary())

    # %%
    # Plot posture distribution and its standard deviation

    defaultPlotting()

    # Separate data by age.
    age_condition = set(jack_y_all['dpf'].values)
    age_cond_num = len(age_condition)

    # initialize a multi-plot, feel free to change the plot size
    f, axes = plt.subplots(nrows=2, ncols=age_cond_num, figsize=(2.5*(age_cond_num), 10), sharey='row')
    axes = axes.flatten()  # flatten if multidimenesional (multiple dpf)
    # setup color scheme for dot plots
    flatui = ["#D0D0D0"] * (ang_std_all.groupby('condition').size()[0])
    defaultPlotting()

    # loop through differrent age (dpf), plot parabola in the first row and sensitivy in the second.
    for i, age in enumerate(age_condition):
        fitted = jack_y_all.loc[jack_y_all['dpf']==age]
        g = sns.lineplot(x='Posture (deg)',y='Probability', hue='condition', style='dpf', data=fitted, ci='sd', err_style='band', ax=axes[i])
        # g.set_yticks(np.arange(x,y,step))  # adjust y ticks
        g.set_xticks(np.arange(-90,135,45))  # adjust x ticks

        # plot std
        std_plt = ang_std_all.loc[ang_std_all['dpf']==age]
        # plot jackknifed paired data
        p = sns.pointplot(x='condition', y='std(posture)', hue='excluded_exp',data=std_plt,
                        palette=sns.color_palette(flatui), scale=0.5,
                        ax=axes[i+age_cond_num],
                    #   order=['Sibs','Tau','Lesion'],
        )
        # plot mean data
        p = sns.pointplot(x='condition', y='std(posture)',hue='condition',data=std_plt, 
                        linewidth=0,
                        alpha=0.9,
                        ci=None,
                        markers='d',
                        ax=axes[i+age_cond_num],
                        #   order=['Sibs','Tau','Lesion'],
        )
        p.legend_.remove()
        # p.set_yticks(np.arange(0.1,0.52,0.04))
        sns.despine(trim=True)
        
        condition_s = set(std_plt['condition'].values)
        condition_s = list(condition_s)

        if len(condition_s) == 2:      
            # Paired T Test for 2 conditions
            # Separate data by condition.
            std_cond1 = std_plt.loc[std_plt['condition']==condition_s[0]].sort_values(by='excluded_exp')
            std_cond2 = std_plt.loc[std_plt['condition']==condition_s[1]].sort_values(by='excluded_exp')
            ttest_res, ttest_p = ttest_rel(std_cond1['std(posture)'],std_cond2['std(posture)'])
            print(f'* Age {age}: {condition_s[0]} v.s. {condition_s[1]} paired t-test p-value = {ttest_p}')
        elif len(condition_s) > 2: 
            # multiple comparison for more than 2 conditions
            print(f'* Age {age}:' )
            multi_comp = MultiComparison(ang_std_all['std(posture)'], ang_std_all['dpf']+ang_std_all['condition'])
            print(multi_comp.tukeyhsd().summary())
        else:
            pass
        
    plt.show()