Python load Exemples, dabest.load Python Exemples

Exemple #1

0

Afficher le fichier

def loaddata(data1, data2, parameter, name1, name2, rsmpls=None):
    temp = pd.concat([data1[parameter], data2[parameter]], axis=1, sort=True)
    temp.columns = [name1, name2]
    if rsmpls is not None:
        bootstrap = dabest.load(temp, idx=(name1, name2), resamples=rsmpls)
    else:
        bootstrap = dabest.load(temp, idx=(name1, name2))
    return bootstrap

Exemple #2

0

Afficher le fichier

Fichier : analysis.py Projet : mmagnuski/DiamSar

def esci_indep_cohens_d(data1, data2, n_boot=5000, has_preds=False):
    '''Compute Cohen's d effect size and its bootstrap 95% confidence interval.
    (using bias corrected accelerated bootstrap).

    Parameters
    ----------
    data1 : np.ndarray
        One dimensional array of values for the "high" group (for example
        diagnosed participants).
    data2 : np.ndarray
        One dimensional array of values for the "low" group (for example
        healthy controls).
    n_boot : int
        Number of bootstraps to use.
    has_preds : bool
        Wheter array of predictors is provided in the data. If so the first
        column of data1 and data2 are data for separate groups and the
        following columns are the predictors used in regression with the
        predictor of interest (group membership) being the last one
        and the rest treated as confounds.

    Returns
    -------
    stats : dict
        Dictionary of results.
        * ``stats['es']`` contains effect size.
        * ``stats['ci']`` contains 95% confidence interval for the effect size.
        * ``stats['bootstraps']`` contains bootstrap effect size values.
    '''
    if not has_preds:
        assert data2 is not None
        import dabest
        df = utils.psd_to_df(data1, data2)
        dbst_set = dabest.load(df,
                               idx=("controls", "diagnosed"),
                               x="group",
                               y="FAA",
                               resamples=n_boot)
        results = dbst_set.cohens_d.results
        cohen_d = results.difference.values[0]
        cohen_d_ci = (results.bca_low.values[0], results.bca_high.values[0])
        bootstraps = results.bootstraps[0]
    else:
        from borsar.stats import compute_regression_t
        import scikits.bootstrap as boot

        def regression_Cohens_d(data1, data2):
            data = np.concatenate([data1, data2], axis=0)
            preds = data[:, 1:]
            tvals = compute_regression_t(data[:, [0]], preds)
            return d_from_t_categorical(tvals[-1, 0], preds)

        cohen_d = regression_Cohens_d(data1, data2)
        cohen_d_ci, bootstraps = boot.ci((data1, data2),
                                         regression_Cohens_d,
                                         multi='independent',
                                         n_samples=n_boot,
                                         return_dist=True)
    stats = dict(es=cohen_d, ci=cohen_d_ci, bootstraps=bootstraps)
    return stats

Exemple #3

0

Afficher le fichier

Fichier : EspressoLocomotion.py Projet : sangyu/SangyuShared

    def plotContrasts(self,
                      y,
                      colorBy,
                      compareBy,
                      groupBy='Temperature',
                      plot_kwargs=dict()):
        resultsDf = self.resultsDf
        resultsDf[
            'newPlotColumn'] = resultsDf[groupBy] + '  ' + resultsDf[compareBy]
        listIdx = tuple(
            np.unique(resultsDf[groupBy])[0] + '  ' +
            np.unique(resultsDf[compareBy])[::-1])
        for i in range(1, len(np.unique(resultsDf[groupBy]))):
            listIdx = (listIdx,
                       tuple(
                           np.unique(resultsDf[groupBy])[i] + '  ' +
                           np.unique(resultsDf[compareBy])[::-1]))

        print(listIdx)
        customPalette = locoPlotters.espressoCreatePalette(resultsDf[colorBy])
        setFont('Source Sans Bold', 10)
        dabestContrastData = dabest.load(
            resultsDf,
            x='newPlotColumn',  # the default for this test config is to group flies by genotype
            y=y,
            idx=listIdx,
            paired=False)

        fig = dabestContrastData.mean_diff.plot(color_col=colorBy,
                                                custom_palette=customPalette,
                                                **plot_kwargs)
        if len(np.unique(resultsDf[groupBy])) == 1:
            flatListIdxC = [item.split('  ')[1] for item in listIdx]
            flatListIdxG = [item.split('  ')[0] for item in listIdx]
        else:
            flatListIdxC = [item.split('  ')[1] for t in listIdx for item in t]
            flatListIdxG = [item.split('  ')[0] for t in listIdx for item in t]
        fig.axes[0].set_xticklabels(flatListIdxC, rotation=45, ha="right")
        ylim = fig.axes[0].get_ylim()
        for i in range(0, len(np.unique(resultsDf[groupBy]))):
            # fig.axes[0].text(0.5, ylim[1], flatListIdxG[0],  ha="center")
            fig.axes[0].text(0.5 + 2 * i,
                             ylim[1] * 1.1,
                             flatListIdxG[2 * i],
                             ha="center")
        locoUtilities.espressoSaveFig(fig, y + '_contrast',
                                      self.metaDataDf.Date[0],
                                      self.outputFolder)
        return fig

Exemple #4

0

Afficher le fichier

Fichier : B_Analysis_FC.py Projet : FabianKamp/ScZ

    def _parallel_region_dabest(self, Region, Freq):
        """
		Compute Regionwise differences.
		"""
        print(f'DABEST on region {Region}, Frequency: {Freq}')
        df_pivot = self.GBC_df.pivot(index=['Subject', 'Group'],
                                     columns='Frequency',
                                     values=Region).reset_index()
        # Bootstrap test with DABEST
        analysis = dabest.load(df_pivot,
                               idx=("Control", "FEP"),
                               x='Group',
                               y=Freq,
                               ci=90)
        results = analysis.mean_diff.results
        # Levene Test
        _, pval = scipy.stats.levene(
            df_pivot.loc[df_pivot['Group'] == 'Control', Freq],
            df_pivot.loc[df_pivot['Group'] == 'Control', Freq])
        results['levene-p-value'] = pval
        # Insert Region Name in result df
        results.insert(loc=0, column='Region', value=Region)
        return results

Exemple #5

0

Afficher le fichier

Fichier : B_Analysis_FC.py Projet : FabianKamp/ScZ

    def _parallel_net_dabest(self, Measure, Freq):
        """
		Apply Dabest on Graph Measure, is called in dabest_net_measures.
		"""
        print(f'DABEST on Graph Measure {Measure}, Frequency: {Freq}')
        df_pivot = self.Net_df.pivot(index=['Subject', 'Group'],
                                     columns='Frequency',
                                     values=Measure).reset_index()
        # Bootstrap test with DABEST
        analysis = dabest.load(df_pivot,
                               idx=("Control", "FEP"),
                               x='Group',
                               y=Freq,
                               ci=90)
        results = analysis.mean_diff.results
        # Levene Test
        _, pval = scipy.stats.levene(
            df_pivot.loc[df_pivot['Group'] == 'Control', Freq],
            df_pivot.loc[df_pivot['Group'] == 'FEP', Freq])
        results['levene-p-value'] = pval
        # Insert Region Name in result df
        results.insert(loc=0, column='Measure', value=Measure)
        return results

Exemple #6

0

Afficher le fichier

Fichier : B_Analysis_FC.py Projet : FabianKamp/ScZ

    def dabest_avg_GBC(self):
        """
		Function to calculate effect size and t/p value for average GBC. 
		"""
        df_long = pd.read_pickle(
            self.find(suffix='GBC', filetype='.pkl', Freq=self.Frequencies))
        df_wide = pd.pivot_table(df_long,
                                 index=['Group', 'Subject'],
                                 columns='Frequency',
                                 values='Avg. GBC').reset_index()
        res_list = []
        for Freq in self.FrequencyBands.keys():
            analysis = dabest.load(df_wide,
                                   idx=("Control", "FEP"),
                                   x='Group',
                                   y=Freq,
                                   ci=90)
            results = analysis.mean_diff.results
            results.insert(loc=0, column='Frequency', value=Freq)
            res_list.append(results)
        result_df = pd.concat(res_list)

        # Save Pickle
        FileName = self.createFileName(suffix='Mean-GBC-DABEST',
                                       filetype='.pkl',
                                       Freq=self.Frequencies)
        FilePath = self.createFilePath(self.EdgeStatsDir, 'GBC', 'Stats',
                                       FileName)
        result_df.to_pickle(FilePath)

        # Save CSV
        FileName = self.createFileName(suffix='Mean-GBC-DABEST',
                                       filetype='.csv',
                                       Freq=self.Frequencies)
        FilePath = self.createFilePath(self.EdgeStatsDir, 'GBC', 'Stats',
                                       FileName)
        result_df.to_csv(FilePath)

Exemple #7

0

Afficher le fichier

def plot_massplot(MASS, cluster2MASS):
    with open(str(MASS)+'_mass.csv', mode='w') as mass_file:
        mass_writer = csv.writer(mass_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        mass_writer.writerow(list(cluster2MASS.keys()))
        max_length = 0
        for cluster in cluster2MASS:
            if len(cluster2MASS[cluster])>max_length:
                    max_length = len(cluster2MASS[cluster])
        for i in range(max_length):
            tmp = np.full(len(list(cluster2MASS.keys())), np.nan)
            for cluster in cluster2MASS:
                    if len(cluster2MASS[cluster]) > i:
                            tmp[cluster] = cluster2MASS[cluster][i]
            mass_writer.writerow(tmp)

    # Load the iris dataset. Requires internet access.
    mass = pd.read_csv(str(MASS)+'_mass.csv')
    mass = mass.rename(columns={"0": "Cluster 0", "1": "Cluster 1", "2": "Cluster 2", "3": "Cluster 3", "4": "Cluster 4", "5": "Cluster 5"})
    #mass['Cluster 0'] = 0
    # Load the above data into `dabest`.
    shared_control = dabest.load(mass, idx=("Cluster 0", "Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4", "Cluster 5"))
    # Produce a Cumming estimation plot.
    shared_control.mean_diff.plot()
    plt.show()

Exemple #8

0

Afficher le fichier

import matplotlib
%matplotlib inline
import numpy as np
import locoDataMunger
import locoUtilities
import pandas as pd
import locoPlotters
import espresso as esp
import dabest
#%%

dataFolder = '/Users/sangyuxu/xy1/'
TrhLxR50Gal = EspressoLocomotion.EspressoLocomotion(dataFolder, 0, 120)


#%%
# resultsDf = TrhCsCh.metaDataDf


groupBy = 'Temperature'
compareBy = 'Status'
colorBy = 'Genotype'
dabestContrastData = dabest.load(TrhLxR50Gal.resultsDf,
                       x=compareBy, # the default for this test config is to group flies by genotype
                       y='averageSpeed'
                       ,
                       idx=np.unique(TrhLxR50Gal.metaDataDf.Status),
                       paired=False
                      )

fig = dabestContrastData.mean_diff.plot( color_col=colorBy)

Exemple #9

0

Afficher le fichier

Fichier : BUSCO_comparison.py Projet : ksielemann/QUOD

print(scipy.stats.levene(EBUSCO_plot_data, Enon_BUSCO_plot_data))

print("2")
print(np.var(CBUSCO_plot_data))
print(np.var(Cnon_BUSCO_plot_data))
print(scipy.stats.levene(CBUSCO_plot_data, Cnon_BUSCO_plot_data))

print("3")
print(np.var(BBUSCO_plot_data))
print(np.var(Bnon_BUSCO_plot_data))
print(scipy.stats.levene(BBUSCO_plot_data, Bnon_BUSCO_plot_data))

#dabest
dict_data = {
    "BUSCO (2)": pd.Series(CBUSCO_plot_data),
    "non-BUSCO (2)": pd.Series(Cnon_BUSCO_plot_data),
    "BUSCO (1)": pd.Series(EBUSCO_plot_data),
    "non-BUSCO (1)": pd.Series(Enon_BUSCO_plot_data),
    "BUSCO (3)": pd.Series(BBUSCO_plot_data),
    "non-BUSCO (3)": pd.Series(Bnon_BUSCO_plot_data)
}
df = pd.DataFrame(dict_data)

multi = dabest.load(df,
                    idx=(("BUSCO (2)", "non-BUSCO (2)"),
                         ("BUSCO (1)", "non-BUSCO (1)"), ("BUSCO (3)",
                                                          "non-BUSCO (3)")))

(multi.mean_diff.statistical_tests
 ).to_csv("/FULL/PATH/TO/OUTPUT/DIRECTORY/dabest_BUSCO_comparison.csv")

Exemple #10

0

Afficher le fichier

# ax.set_xlabel('Time')
# ax.set_ylabel('Linear Corr.')
# ax.set_title('Correlation: recall vs. ISC change')
# ax.xaxis.set_major_formatter(FormatStrFormatter('%d'))
# ax.legend()
# sns.despine()
# f.tight_layout()
#
# xticklabels = [f'RM-{cond}' for cond in has_memory_conds]
# f, ax = plt.subplots(1, 1, figsize=(6, 4))
# sns.violinplot(data=[r_mu_tisc[cond] for cond in has_memory_conds])
# ax.axhline(0, color='grey', linestyle='--')
# ax.set_xticks(range(len(xticklabels)))
# ax.set_xticklabels(xticklabels)
# ax.set_xlabel('Condition')
# ax.set_ylabel('Linear Correlation')
# ax.set_title('Correlation: recall vs. ISC change')
# sns.despine()
# f.tight_layout()
#

data_dict = {}
for cond in list(r_mu_sisc.keys()):
    data_dict[f'RM-{cond}'] = np.mean(r_val_tisc[cond], axis=-1)

df = make_df(data_dict)
db = dabest.load(data=df, x="Condition", y="Value", idx=list(data_dict.keys()))
db.mean_diff.plot(swarm_label='Linear correlation',
                  fig_size=(7, 5),
                  custom_palette=c_pal)

Exemple #11

0

Afficher le fichier

            del dk[ptest][cond]['er'][i_ms]
            for lca_pid, lca_pname in lca_pnames.items():
                del lca_param[ptest][lca_pid][cond]['mu'][i_ms]
                del lca_param[ptest][lca_pid][cond]['er'][i_ms]
        del ma_lca[ptest][i_ms]
'''process the data: extract differences between the two penalty conds'''

# compute RT
rt = {ptest: None for ptest in penaltys_test}
time_vector = np.reshape(np.arange(n_param) + 1, (n_param, 1))
for ptest in penaltys_test:
    ig_p2_ = np.array(lca_param[ptest][0]['DM']['mu'])[:, n_param:].T
    ig_p2_norm = ig_p2_ / np.sum(ig_p2_, axis=0)
    rt[ptest] = np.reshape(np.dot(ig_p2_norm.T, time_vector), (-1, ))
'''slope graph'''
data_dict = {'low': rt[0], 'high': rt[4]}
df = pd.DataFrame(data_dict)
df['ids'] = np.arange(n_subjs)
df.head()

# Load the data into dabest
dabest_data = dabest.load(data=df,
                          idx=list(data_dict.keys()),
                          paired=True,
                          id_col='ids')
dabest_data.mean_diff.plot(swarm_label='Recall time',
                           fig_size=(8, 5),
                           swarm_ylim=[0, 6])
print(dabest_data.mean_diff)
dabest_data.mean_diff.statistical_tests

Exemple #12

0

Afficher le fichier

Fichier : EspressoScript.py Projet : sangyu/SangyuShared

# resultsDf = TrhCsCh.metaDataDf
allSpeedData.plotBoundedSpeedLines(colorBy = 'Sex', col = 'Status', rp = '600s')


#%%
allSpeedData.plotMeanHeatMaps(row = 'Status', col = 'Temperature')
#%%
groupBy = 'Status'
compareBy = 'Temperature'
colorBy = 'Genotype'
results = allSpeedData.resultsDf.loc[allSpeedData.resultsDf['Status'] == 'Offspring']
allSpeedData 
dabestContrastData = dabest.load(results,
                       x=compareBy, # the default for this test config is to group flies by genotype
                       y='TB Preference'
                       ,
                       idx=np.unique(allSpeedData.metaDataDf.Temperature),
                       paired=False
                      )

fig = dabestContrastData.mean_diff.plot( color_col=colorBy)

#%%

# put in the parameters you don't want to type over and over again when using the plot function
groupby='Sex'
compareby='Status'
colorby='Genotype'
startHour=0 #hours
endHour=2#hours
figAspectRatio=(8, 5)

Exemple #13

0

Afficher le fichier

Fichier : gene_expression_comp.py Projet : Costas13/All-active-Manuscript

idx_list = []
for dtype_ in data_types:
    idx_tuple_ = ('%s.%s' % (dtype_, exc_lines[0]),
                  '%s.%s' % (dtype_, exc_lines[1]))
    idx_list.append(idx_tuple_)

# %% Cummings plot

palette_mod = {
    comp_type: exc_palette[comp_type.split('.')[-1]]
    for comp_type in comp_types
}
sns.set(font_scale=1.2)
gene_comp_figname = 'figures/diff_gene_expression_exc.svg'
gene_df = dabest.load(exc_expression_melted,
                      idx=idx_list,
                      x="Cre_gene",
                      y='cpm')
f = gene_df.cliffs_delta.plot(
    custom_palette=palette_mod,
    group_summaries='median_quartiles',
    swarm_desat=.9,
    #                 swarm_ylim=(1e-5,1e-3),
    swarmplot_kwargs={'size': 2.5})

rawdata_axes = f.axes[0]
rawdata_axes = man_utils.annotate_sig_level(data_types,
                                            exc_lines,
                                            'Cre_line',
                                            gene_sig_grouped,
                                            'Comp_type',
                                            exc_expression_melted,

Exemple #14

0

Afficher le fichier

print ("Correlation matrix\n")
print (sheet.corr(method='pearson'))
print ("----------------------------")

fig, (ax1, ax2) = plt.subplots(2, 1)
for col in sheet.columns:
    ax1.plot(sheet[col].values)
ax2 = sheet.boxplot()    

"""
From Seaborn
"""
sns.pairplot(sheet)

"""
From dabest
"""
print ("----------------------------")
two_groups_unpaired = dabest.load(sheet, idx=(sheet.columns[0], sheet.columns[1]), resamples=5000)
two_groups_unpaired.mean_diff.plot()
two_groups_unpaired.hedges_g.plot()
stat=two_groups_unpaired.mean_diff.statistical_tests
print ('Further Statistics from the first 2 columns')
print (stat.transpose())


#two_groups_unpaired.mean_diff
#two_groups_unpaired.mean_diff.results
#two_groups_unpaired.mean_diff.statistical_tests
#two_groups_unpaired.hedges_g.results

Exemple #15

0

Afficher le fichier

comp_types = sag_features_all['Cre_type'].unique().tolist()
data_types = sag_features_all.type.unique().tolist()
idx_list = []
for dtype_ in data_types:
    idx_tuple_ = ('%s.%s' % (dtype_, exc_lines[0]),
                  '%s.%s' % (dtype_, exc_lines[1]))
    idx_list.append(idx_tuple_)

sns.set(font_scale=1)
palette_mod = {
    comp_type: palette[comp_type.split('.')[-1]]
    for comp_type in comp_types
}

analysis_of_long_df = dabest.load(sag_features_all,
                                  idx=idx_list,
                                  x="Cre_type",
                                  y=select_sag_feature)

f = analysis_of_long_df.cliffs_delta.plot(custom_palette=palette_mod,
                                          group_summaries='median_quartiles',
                                          swarm_desat=.9)
rawdata_axes = f.axes[0]
rawdata_axes = man_utils.annotate_sig_level(sig_vars,
                                            exc_lines,
                                            'Cre_line',
                                            ephys_sig_group,
                                            'Comp_type',
                                            sag_features_all,
                                            'type',
                                            select_sag_feature,
                                            rawdata_axes,

Exemple #16

0

Afficher le fichier

    if type_.split('.')[-1] != 'L5 CF' else subclass_colors['L5 PT']
    for type_ in feature_data.feat_ttype.unique()
}

sns.set(font_scale=1.2)
fig, ax = plt.subplots(figsize=(15, 8))
idx_feat = []
for dtype_ in feature_select:
    idx_tuple_ = ('%s.%s' % (dtype_, exc_subclasses[0]),
                  '%s.%s' % (dtype_, exc_subclasses[1]))
    idx_feat.append(idx_tuple_)

feature_data_select = feature_data.loc[
    feature_data.features.isin(feature_select), ]
analysis_df_feat = dabest.load(feature_data_select,
                               idx=idx_feat,
                               x='feat_ttype',
                               y='value')

f = analysis_df_feat.cliffs_delta.plot(ax=ax,
                                       custom_palette=palette_features,
                                       group_summaries='median_quartiles',
                                       swarm_desat=.9)
ax = man_utils.annotate_sig_level(feature_select, exc_subclasses, 'ttype',
                                  feat_sig_grouped, 'Comp_type',
                                  feature_data_select, 'features', 'value', ax)

rawdata_axes = f.axes[0]
raw_xticklabels = rawdata_axes.get_xticklabels()
labels = []
for label in raw_xticklabels:
    txt = label.get_text()

Exemple #17

0

Afficher le fichier

Fichier : L5IT_vs_L5PT.py Projet : Costas13/All-active-Manuscript

                           dpi=100)

    channel_select = [
        channel_ for channel_ in cond_types if channel in channel_
    ]
    idx_channel = []
    for dtype_ in channel_select:
        idx_tuple_ = ('%s.%s' % (dtype_, exc_subclasses[0]),
                      '%s.%s' % (dtype_, exc_subclasses[1]))
        idx_channel.append(idx_tuple_)

    param_data_select = param_data.loc[
        param_data.conductance.isin(channel_select), ]

    analysis_df_channel = dabest.load(param_data_select,
                                      idx=idx_channel,
                                      x='param_ttype',
                                      y='value')

    analysis_df_channel.cliffs_delta.plot(ax=ax[0],
                                          custom_palette=palette_channel,
                                          group_summaries='median_quartiles',
                                          swarm_desat=.9)

    ax[0] = man_utils.annotate_sig_level(channel_select, exc_subclasses,
                                         'ttype', cond_sig_grouped,
                                         'Comp_type', param_data_select,
                                         'conductance', 'value', ax[0])

    #    ax[0].set_title(r'-log(p-val) = %.2f' % -np.log10(cond_p_val))

    genes = [gene for gene in genes if gene in gene_types]