Exemple #1
0
def loaddata(data1, data2, parameter, name1, name2, rsmpls=None):
    temp = pd.concat([data1[parameter], data2[parameter]], axis=1, sort=True)
    temp.columns = [name1, name2]
    if rsmpls is not None:
        bootstrap = dabest.load(temp, idx=(name1, name2), resamples=rsmpls)
    else:
        bootstrap = dabest.load(temp, idx=(name1, name2))
    return bootstrap
Exemple #2
0
def esci_indep_cohens_d(data1, data2, n_boot=5000, has_preds=False):
    '''Compute Cohen's d effect size and its bootstrap 95% confidence interval.
    (using bias corrected accelerated bootstrap).

    Parameters
    ----------
    data1 : np.ndarray
        One dimensional array of values for the "high" group (for example
        diagnosed participants).
    data2 : np.ndarray
        One dimensional array of values for the "low" group (for example
        healthy controls).
    n_boot : int
        Number of bootstraps to use.
    has_preds : bool
        Wheter array of predictors is provided in the data. If so the first
        column of data1 and data2 are data for separate groups and the
        following columns are the predictors used in regression with the
        predictor of interest (group membership) being the last one
        and the rest treated as confounds.

    Returns
    -------
    stats : dict
        Dictionary of results.
        * ``stats['es']`` contains effect size.
        * ``stats['ci']`` contains 95% confidence interval for the effect size.
        * ``stats['bootstraps']`` contains bootstrap effect size values.
    '''
    if not has_preds:
        assert data2 is not None
        import dabest
        df = utils.psd_to_df(data1, data2)
        dbst_set = dabest.load(df,
                               idx=("controls", "diagnosed"),
                               x="group",
                               y="FAA",
                               resamples=n_boot)
        results = dbst_set.cohens_d.results
        cohen_d = results.difference.values[0]
        cohen_d_ci = (results.bca_low.values[0], results.bca_high.values[0])
        bootstraps = results.bootstraps[0]
    else:
        from borsar.stats import compute_regression_t
        import scikits.bootstrap as boot

        def regression_Cohens_d(data1, data2):
            data = np.concatenate([data1, data2], axis=0)
            preds = data[:, 1:]
            tvals = compute_regression_t(data[:, [0]], preds)
            return d_from_t_categorical(tvals[-1, 0], preds)

        cohen_d = regression_Cohens_d(data1, data2)
        cohen_d_ci, bootstraps = boot.ci((data1, data2),
                                         regression_Cohens_d,
                                         multi='independent',
                                         n_samples=n_boot,
                                         return_dist=True)
    stats = dict(es=cohen_d, ci=cohen_d_ci, bootstraps=bootstraps)
    return stats
    def plotContrasts(self,
                      y,
                      colorBy,
                      compareBy,
                      groupBy='Temperature',
                      plot_kwargs=dict()):
        resultsDf = self.resultsDf
        resultsDf[
            'newPlotColumn'] = resultsDf[groupBy] + '  ' + resultsDf[compareBy]
        listIdx = tuple(
            np.unique(resultsDf[groupBy])[0] + '  ' +
            np.unique(resultsDf[compareBy])[::-1])
        for i in range(1, len(np.unique(resultsDf[groupBy]))):
            listIdx = (listIdx,
                       tuple(
                           np.unique(resultsDf[groupBy])[i] + '  ' +
                           np.unique(resultsDf[compareBy])[::-1]))

        print(listIdx)
        customPalette = locoPlotters.espressoCreatePalette(resultsDf[colorBy])
        setFont('Source Sans Bold', 10)
        dabestContrastData = dabest.load(
            resultsDf,
            x='newPlotColumn',  # the default for this test config is to group flies by genotype
            y=y,
            idx=listIdx,
            paired=False)

        fig = dabestContrastData.mean_diff.plot(color_col=colorBy,
                                                custom_palette=customPalette,
                                                **plot_kwargs)
        if len(np.unique(resultsDf[groupBy])) == 1:
            flatListIdxC = [item.split('  ')[1] for item in listIdx]
            flatListIdxG = [item.split('  ')[0] for item in listIdx]
        else:
            flatListIdxC = [item.split('  ')[1] for t in listIdx for item in t]
            flatListIdxG = [item.split('  ')[0] for t in listIdx for item in t]
        fig.axes[0].set_xticklabels(flatListIdxC, rotation=45, ha="right")
        ylim = fig.axes[0].get_ylim()
        for i in range(0, len(np.unique(resultsDf[groupBy]))):
            # fig.axes[0].text(0.5, ylim[1], flatListIdxG[0],  ha="center")
            fig.axes[0].text(0.5 + 2 * i,
                             ylim[1] * 1.1,
                             flatListIdxG[2 * i],
                             ha="center")
        locoUtilities.espressoSaveFig(fig, y + '_contrast',
                                      self.metaDataDf.Date[0],
                                      self.outputFolder)
        return fig
Exemple #4
0
    def _parallel_region_dabest(self, Region, Freq):
        """
		Compute Regionwise differences.
		"""
        print(f'DABEST on region {Region}, Frequency: {Freq}')
        df_pivot = self.GBC_df.pivot(index=['Subject', 'Group'],
                                     columns='Frequency',
                                     values=Region).reset_index()
        # Bootstrap test with DABEST
        analysis = dabest.load(df_pivot,
                               idx=("Control", "FEP"),
                               x='Group',
                               y=Freq,
                               ci=90)
        results = analysis.mean_diff.results
        # Levene Test
        _, pval = scipy.stats.levene(
            df_pivot.loc[df_pivot['Group'] == 'Control', Freq],
            df_pivot.loc[df_pivot['Group'] == 'Control', Freq])
        results['levene-p-value'] = pval
        # Insert Region Name in result df
        results.insert(loc=0, column='Region', value=Region)
        return results
Exemple #5
0
    def _parallel_net_dabest(self, Measure, Freq):
        """
		Apply Dabest on Graph Measure, is called in dabest_net_measures.
		"""
        print(f'DABEST on Graph Measure {Measure}, Frequency: {Freq}')
        df_pivot = self.Net_df.pivot(index=['Subject', 'Group'],
                                     columns='Frequency',
                                     values=Measure).reset_index()
        # Bootstrap test with DABEST
        analysis = dabest.load(df_pivot,
                               idx=("Control", "FEP"),
                               x='Group',
                               y=Freq,
                               ci=90)
        results = analysis.mean_diff.results
        # Levene Test
        _, pval = scipy.stats.levene(
            df_pivot.loc[df_pivot['Group'] == 'Control', Freq],
            df_pivot.loc[df_pivot['Group'] == 'FEP', Freq])
        results['levene-p-value'] = pval
        # Insert Region Name in result df
        results.insert(loc=0, column='Measure', value=Measure)
        return results
Exemple #6
0
    def dabest_avg_GBC(self):
        """
		Function to calculate effect size and t/p value for average GBC. 
		"""
        df_long = pd.read_pickle(
            self.find(suffix='GBC', filetype='.pkl', Freq=self.Frequencies))
        df_wide = pd.pivot_table(df_long,
                                 index=['Group', 'Subject'],
                                 columns='Frequency',
                                 values='Avg. GBC').reset_index()
        res_list = []
        for Freq in self.FrequencyBands.keys():
            analysis = dabest.load(df_wide,
                                   idx=("Control", "FEP"),
                                   x='Group',
                                   y=Freq,
                                   ci=90)
            results = analysis.mean_diff.results
            results.insert(loc=0, column='Frequency', value=Freq)
            res_list.append(results)
        result_df = pd.concat(res_list)

        # Save Pickle
        FileName = self.createFileName(suffix='Mean-GBC-DABEST',
                                       filetype='.pkl',
                                       Freq=self.Frequencies)
        FilePath = self.createFilePath(self.EdgeStatsDir, 'GBC', 'Stats',
                                       FileName)
        result_df.to_pickle(FilePath)

        # Save CSV
        FileName = self.createFileName(suffix='Mean-GBC-DABEST',
                                       filetype='.csv',
                                       Freq=self.Frequencies)
        FilePath = self.createFilePath(self.EdgeStatsDir, 'GBC', 'Stats',
                                       FileName)
        result_df.to_csv(FilePath)
Exemple #7
0
def plot_massplot(MASS, cluster2MASS):
    with open(str(MASS)+'_mass.csv', mode='w') as mass_file:
        mass_writer = csv.writer(mass_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        mass_writer.writerow(list(cluster2MASS.keys()))
        max_length = 0
        for cluster in cluster2MASS:
            if len(cluster2MASS[cluster])>max_length:
                    max_length = len(cluster2MASS[cluster])
        for i in range(max_length):
            tmp = np.full(len(list(cluster2MASS.keys())), np.nan)
            for cluster in cluster2MASS:
                    if len(cluster2MASS[cluster]) > i:
                            tmp[cluster] = cluster2MASS[cluster][i]
            mass_writer.writerow(tmp)

    # Load the iris dataset. Requires internet access.
    mass = pd.read_csv(str(MASS)+'_mass.csv')
    mass = mass.rename(columns={"0": "Cluster 0", "1": "Cluster 1", "2": "Cluster 2", "3": "Cluster 3", "4": "Cluster 4", "5": "Cluster 5"})
    #mass['Cluster 0'] = 0
    # Load the above data into `dabest`.
    shared_control = dabest.load(mass, idx=("Cluster 0", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"))
    # Produce a Cumming estimation plot.
    shared_control.mean_diff.plot()
    plt.show()
Exemple #8
0
import matplotlib
%matplotlib inline
import numpy as np
import locoDataMunger
import locoUtilities
import pandas as pd
import locoPlotters
import espresso as esp
import dabest
#%%

dataFolder = '/Users/sangyuxu/xy1/'
TrhLxR50Gal = EspressoLocomotion.EspressoLocomotion(dataFolder, 0, 120)


#%%
# resultsDf = TrhCsCh.metaDataDf


groupBy = 'Temperature'
compareBy = 'Status'
colorBy = 'Genotype'
dabestContrastData = dabest.load(TrhLxR50Gal.resultsDf,
                       x=compareBy, # the default for this test config is to group flies by genotype
                       y='averageSpeed'
                       ,
                       idx=np.unique(TrhLxR50Gal.metaDataDf.Status),
                       paired=False
                      )

fig = dabestContrastData.mean_diff.plot( color_col=colorBy)
Exemple #9
0
print(scipy.stats.levene(EBUSCO_plot_data, Enon_BUSCO_plot_data))

print("2")
print(np.var(CBUSCO_plot_data))
print(np.var(Cnon_BUSCO_plot_data))
print(scipy.stats.levene(CBUSCO_plot_data, Cnon_BUSCO_plot_data))

print("3")
print(np.var(BBUSCO_plot_data))
print(np.var(Bnon_BUSCO_plot_data))
print(scipy.stats.levene(BBUSCO_plot_data, Bnon_BUSCO_plot_data))

#dabest
dict_data = {
    "BUSCO (2)": pd.Series(CBUSCO_plot_data),
    "non-BUSCO (2)": pd.Series(Cnon_BUSCO_plot_data),
    "BUSCO (1)": pd.Series(EBUSCO_plot_data),
    "non-BUSCO (1)": pd.Series(Enon_BUSCO_plot_data),
    "BUSCO (3)": pd.Series(BBUSCO_plot_data),
    "non-BUSCO (3)": pd.Series(Bnon_BUSCO_plot_data)
}
df = pd.DataFrame(dict_data)

multi = dabest.load(df,
                    idx=(("BUSCO (2)", "non-BUSCO (2)"),
                         ("BUSCO (1)", "non-BUSCO (1)"), ("BUSCO (3)",
                                                          "non-BUSCO (3)")))

(multi.mean_diff.statistical_tests
 ).to_csv("/FULL/PATH/TO/OUTPUT/DIRECTORY/dabest_BUSCO_comparison.csv")
Exemple #10
0
# ax.set_xlabel('Time')
# ax.set_ylabel('Linear Corr.')
# ax.set_title('Correlation: recall vs. ISC change')
# ax.xaxis.set_major_formatter(FormatStrFormatter('%d'))
# ax.legend()
# sns.despine()
# f.tight_layout()
#
# xticklabels = [f'RM-{cond}' for cond in has_memory_conds]
# f, ax = plt.subplots(1, 1, figsize=(6, 4))
# sns.violinplot(data=[r_mu_tisc[cond] for cond in has_memory_conds])
# ax.axhline(0, color='grey', linestyle='--')
# ax.set_xticks(range(len(xticklabels)))
# ax.set_xticklabels(xticklabels)
# ax.set_xlabel('Condition')
# ax.set_ylabel('Linear Correlation')
# ax.set_title('Correlation: recall vs. ISC change')
# sns.despine()
# f.tight_layout()
#

data_dict = {}
for cond in list(r_mu_sisc.keys()):
    data_dict[f'RM-{cond}'] = np.mean(r_val_tisc[cond], axis=-1)

df = make_df(data_dict)
db = dabest.load(data=df, x="Condition", y="Value", idx=list(data_dict.keys()))
db.mean_diff.plot(swarm_label='Linear correlation',
                  fig_size=(7, 5),
                  custom_palette=c_pal)
Exemple #11
0
            del dk[ptest][cond]['er'][i_ms]
            for lca_pid, lca_pname in lca_pnames.items():
                del lca_param[ptest][lca_pid][cond]['mu'][i_ms]
                del lca_param[ptest][lca_pid][cond]['er'][i_ms]
        del ma_lca[ptest][i_ms]
'''process the data: extract differences between the two penalty conds'''

# compute RT
rt = {ptest: None for ptest in penaltys_test}
time_vector = np.reshape(np.arange(n_param) + 1, (n_param, 1))
for ptest in penaltys_test:
    ig_p2_ = np.array(lca_param[ptest][0]['DM']['mu'])[:, n_param:].T
    ig_p2_norm = ig_p2_ / np.sum(ig_p2_, axis=0)
    rt[ptest] = np.reshape(np.dot(ig_p2_norm.T, time_vector), (-1, ))
'''slope graph'''
data_dict = {'low': rt[0], 'high': rt[4]}
df = pd.DataFrame(data_dict)
df['ids'] = np.arange(n_subjs)
df.head()

# Load the data into dabest
dabest_data = dabest.load(data=df,
                          idx=list(data_dict.keys()),
                          paired=True,
                          id_col='ids')
dabest_data.mean_diff.plot(swarm_label='Recall time',
                           fig_size=(8, 5),
                           swarm_ylim=[0, 6])
print(dabest_data.mean_diff)
dabest_data.mean_diff.statistical_tests
Exemple #12
0
# resultsDf = TrhCsCh.metaDataDf
allSpeedData.plotBoundedSpeedLines(colorBy = 'Sex', col = 'Status', rp = '600s')


#%%
allSpeedData.plotMeanHeatMaps(row = 'Status', col = 'Temperature')
#%%
groupBy = 'Status'
compareBy = 'Temperature'
colorBy = 'Genotype'
results = allSpeedData.resultsDf.loc[allSpeedData.resultsDf['Status'] == 'Offspring']
allSpeedData 
dabestContrastData = dabest.load(results,
                       x=compareBy, # the default for this test config is to group flies by genotype
                       y='TB Preference'
                       ,
                       idx=np.unique(allSpeedData.metaDataDf.Temperature),
                       paired=False
                      )

fig = dabestContrastData.mean_diff.plot( color_col=colorBy)

#%%

# put in the parameters you don't want to type over and over again when using the plot function
groupby='Sex'
compareby='Status'
colorby='Genotype'
startHour=0 #hours
endHour=2#hours
figAspectRatio=(8, 5)
idx_list = []
for dtype_ in data_types:
    idx_tuple_ = ('%s.%s' % (dtype_, exc_lines[0]),
                  '%s.%s' % (dtype_, exc_lines[1]))
    idx_list.append(idx_tuple_)

# %% Cummings plot

palette_mod = {
    comp_type: exc_palette[comp_type.split('.')[-1]]
    for comp_type in comp_types
}
sns.set(font_scale=1.2)
gene_comp_figname = 'figures/diff_gene_expression_exc.svg'
gene_df = dabest.load(exc_expression_melted,
                      idx=idx_list,
                      x="Cre_gene",
                      y='cpm')
f = gene_df.cliffs_delta.plot(
    custom_palette=palette_mod,
    group_summaries='median_quartiles',
    swarm_desat=.9,
    #                 swarm_ylim=(1e-5,1e-3),
    swarmplot_kwargs={'size': 2.5})

rawdata_axes = f.axes[0]
rawdata_axes = man_utils.annotate_sig_level(data_types,
                                            exc_lines,
                                            'Cre_line',
                                            gene_sig_grouped,
                                            'Comp_type',
                                            exc_expression_melted,
Exemple #14
0
print ("Correlation matrix\n")
print (sheet.corr(method='pearson'))
print ("----------------------------")

fig, (ax1, ax2) = plt.subplots(2, 1)
for col in sheet.columns:
    ax1.plot(sheet[col].values)
ax2 = sheet.boxplot()    

"""
From Seaborn
"""
sns.pairplot(sheet)

"""
From dabest
"""
print ("----------------------------")
two_groups_unpaired = dabest.load(sheet, idx=(sheet.columns[0], sheet.columns[1]), resamples=5000)
two_groups_unpaired.mean_diff.plot()
two_groups_unpaired.hedges_g.plot()
stat=two_groups_unpaired.mean_diff.statistical_tests
print ('Further Statistics from the first 2 columns')
print (stat.transpose())


#two_groups_unpaired.mean_diff
#two_groups_unpaired.mean_diff.results
#two_groups_unpaired.mean_diff.statistical_tests
#two_groups_unpaired.hedges_g.results
Exemple #15
0
comp_types = sag_features_all['Cre_type'].unique().tolist()
data_types = sag_features_all.type.unique().tolist()
idx_list = []
for dtype_ in data_types:
    idx_tuple_ = ('%s.%s' % (dtype_, exc_lines[0]),
                  '%s.%s' % (dtype_, exc_lines[1]))
    idx_list.append(idx_tuple_)

sns.set(font_scale=1)
palette_mod = {
    comp_type: palette[comp_type.split('.')[-1]]
    for comp_type in comp_types
}

analysis_of_long_df = dabest.load(sag_features_all,
                                  idx=idx_list,
                                  x="Cre_type",
                                  y=select_sag_feature)

f = analysis_of_long_df.cliffs_delta.plot(custom_palette=palette_mod,
                                          group_summaries='median_quartiles',
                                          swarm_desat=.9)
rawdata_axes = f.axes[0]
rawdata_axes = man_utils.annotate_sig_level(sig_vars,
                                            exc_lines,
                                            'Cre_line',
                                            ephys_sig_group,
                                            'Comp_type',
                                            sag_features_all,
                                            'type',
                                            select_sag_feature,
                                            rawdata_axes,
Exemple #16
0
    if type_.split('.')[-1] != 'L5 CF' else subclass_colors['L5 PT']
    for type_ in feature_data.feat_ttype.unique()
}

sns.set(font_scale=1.2)
fig, ax = plt.subplots(figsize=(15, 8))
idx_feat = []
for dtype_ in feature_select:
    idx_tuple_ = ('%s.%s' % (dtype_, exc_subclasses[0]),
                  '%s.%s' % (dtype_, exc_subclasses[1]))
    idx_feat.append(idx_tuple_)

feature_data_select = feature_data.loc[
    feature_data.features.isin(feature_select), ]
analysis_df_feat = dabest.load(feature_data_select,
                               idx=idx_feat,
                               x='feat_ttype',
                               y='value')

f = analysis_df_feat.cliffs_delta.plot(ax=ax,
                                       custom_palette=palette_features,
                                       group_summaries='median_quartiles',
                                       swarm_desat=.9)
ax = man_utils.annotate_sig_level(feature_select, exc_subclasses, 'ttype',
                                  feat_sig_grouped, 'Comp_type',
                                  feature_data_select, 'features', 'value', ax)

rawdata_axes = f.axes[0]
raw_xticklabels = rawdata_axes.get_xticklabels()
labels = []
for label in raw_xticklabels:
    txt = label.get_text()
                           dpi=100)

    channel_select = [
        channel_ for channel_ in cond_types if channel in channel_
    ]
    idx_channel = []
    for dtype_ in channel_select:
        idx_tuple_ = ('%s.%s' % (dtype_, exc_subclasses[0]),
                      '%s.%s' % (dtype_, exc_subclasses[1]))
        idx_channel.append(idx_tuple_)

    param_data_select = param_data.loc[
        param_data.conductance.isin(channel_select), ]

    analysis_df_channel = dabest.load(param_data_select,
                                      idx=idx_channel,
                                      x='param_ttype',
                                      y='value')

    analysis_df_channel.cliffs_delta.plot(ax=ax[0],
                                          custom_palette=palette_channel,
                                          group_summaries='median_quartiles',
                                          swarm_desat=.9)

    ax[0] = man_utils.annotate_sig_level(channel_select, exc_subclasses,
                                         'ttype', cond_sig_grouped,
                                         'Comp_type', param_data_select,
                                         'conductance', 'value', ax[0])

    #    ax[0].set_title(r'-log(p-val) = %.2f' % -np.log10(cond_p_val))

    genes = [gene for gene in genes if gene in gene_types]