Ejemplo n.º 1
0
    def perform_kappa_anova(self):

        # MIXED ANOVA  ------------------------------------------------------------------------------------------------
        print(
            "\nPerforming Group x Comparison mixed ANOVA on Cohen's Kappa values."
        )

        # Group x Intensity mixed ANOVA
        self.kappa_aov = pg.mixed_anova(dv="Kappa",
                                        within="Comparison",
                                        between="Group",
                                        subject="ID",
                                        data=self.df_kappa_long,
                                        correction=True)
        pg.print_table(self.kappa_aov)

        # POST HOC ----------------------------------------------------------------------------------------------------
        self.kappa_posthoc = pg.pairwise_ttests(dv="Kappa",
                                                subject='ID',
                                                within="Comparison",
                                                between='Group',
                                                data=self.df_kappa_long,
                                                padjust="bonf",
                                                effsize="hedges",
                                                parametric=True)
Ejemplo n.º 2
0
def twoMixANOVA(adaptation, var):
    """ Calculates and prints 2-way Mix ANOVA results for every light intensity in a specified light-adaptation series
		Group is the between-subject factor: male wildtype (mWT), female Wildtype (fWT), male knockout(mKO), female knockout (fKO) male heterozygous (mHT), female heterozygous (fHT)
		Time is the within-subject factor: the different time points at which the ERG was recorded for the same animal (TP1, TP2, TP3)
	
	User specifies:
	First parameter (str): Adaptation condition in which the ERG was recorded (Dark-adapted ('DA'), Light-adapted ('LA'), and Mesopic-adapted ('MA'))
	Second Parameter (var): Dependent variable to calculate ('a_amp', 'b_amp','a_time', 'b_time')
	
	Returns 2-way mix ANOVA table

	"""
    df_adaptation = depvar.loc[depvar['Adaptation'] == adaptation]
    grouped = df_adaptation.groupby('Light_intensity')
    results = pd.DataFrame()
    #light= []

    for name, group in df_adaptation.groupby('Light_intensity'):
        light_df = pd.DataFrame(
            data=group, columns=group.columns
        )  #Place the tuples created with groupby into a new Dataframe
        #Results.append(name)
        aov = pg.mixed_anova(
            data=light_df,
            dv=var,
            between='Group',
            within='Timepoint',
            subject='Animal',
            correction=False
        )  # correction true/false depends on whether you have a balanced design or not
        results = results.append(aov)
        pg.print_table(aov)

    results.to_excel(savestatsto + adaptation + '_' + '_' + var + '.xlsx')
    return results
Ejemplo n.º 3
0
def tukey_pairwise_ph(tidy_df,
                      hour_col: str = "Hour",
                      dep_var: str = "Value",
                      protocol_col: str = "Protocol"):
    """

    :type protocol_col: object
    """
    hours = tidy_df[hour_col].unique()
    ph_dict = {}
    for hour in hours:
        print(hour)
        hour_df = tidy_df.query("%s == '%s'" % (hour_col, hour))
        ph = pg.pairwise_tukey(dv=dep_var, between=protocol_col, data=hour_df)
        pg.print_table(ph)
        ph_dict[hour] = ph
    ph_df = pd.concat(ph_dict)

    return ph_df
    def comparison_by_group_anova(self, dependent_var):
        """Performs a Group x Comparison mixed ANOVA on the dependent variable that is passed in.
           Performs pairwise T-test comparisons for post-hoc analysis.
           Plots group means using Seaborn package.

        :argument
        -dependent_var: name of column in self.df to use as dependent variable

        :returns
        -data objects from pingouin ANOVA and posthoc objects
        """

        print("\nPerforming Group x Comparison mixed ANOVA for"
              "dependent variable {}.".format(dependent_var.capitalize()))

        aov = pg.mixed_anova(dv=dependent_var, within="COMPARISON", between="GROUP", subject="ID", data=self.df)
        pg.print_table(aov.iloc[:, 0:8])
        print()
        pg.print_table(aov.iloc[:, 9:])

        sns.pointplot(data=self.df, x='GROUP', y=dependent_var, hue='COMPARISON',
                      dodge=False, markers='o', capsize=.1, errwidth=1, palette='Set1')
        plt.title("Group x Comparison Mixed ANOVA: {}".format(dependent_var.capitalize()))

        posthoc = pg.pairwise_ttests(dv=dependent_var, within="COMPARISON", between='GROUP',
                                     subject='ID', data=self.df)
        pg.print_table(posthoc)

        return aov, posthoc
'Pca100_ridge','Pca300_ridge','Pca300_cca30']
subjects_ = [np.tile(el,pnts4subj) for el in subjects]
subjects_= np.concatenate(subjects_)
subjects_=subjects_.tolist()*len(models)
models_=[np.tile(el, pnts4subj*len(subjects)) for el in models]
models_=np.concatenate(models_)
model_types_=[np.tile(el,pnts4subj*len(subjects)) for el in model_types]
model_types_=np.concatenate(model_types_)
data= {'model':models_, 'model_type':model_types_, 'subject':subjects_,'data':data}
df = pd.DataFrame.from_dict(data)
df.to_csv('/data/akitaitsev/decoding_model_bids/decoding_data/statistics/df_long_cor_spectr.csv',\
    index=False)

# violin plot
fig, ax = plt.subplots(figsize=(16,9))
sea.violinplot(ax=ax, x='model',y='data', hue='model_type', kind='violin',inner='quartile',hue_order=['SM','STM'],data=df)
plt.show()
fig.savefig('/data/akitaitsev/decoding_model_bids/decoding_data/violinplots_spectr.png', dpi=300)

### statistical analyis
aov=pg.mixed_anova(dv='data', between='model_type',within='model',subject='subject', data=df)
aov.to_csv('/data/akitaitsev/decoding_model_bids/decoding_data/statistics/anova_rep_measures_spectr.csv')
print(aov)

pg.print_table(aov)
model=ols('data~C(model)+C(model_type)+C(subject)', data=df).fit()
anova=sm.stats.anova_lm(model, typ=2)
anova.to_csv('/data/akitaitsev/decoding_model_bids/decoding_data/statistics/anova_3way_spect.csv')
print(anova)

Ejemplo n.º 6
0
	def analyse(self, parameter_list={"all"}, between_factor_list=["Subject_type"], within_factor_list=["Stimuli_type"], statistical_test="Mixed_anova", file_creation=True, ttest_type=1):
		"""This function carries out the required statistical analysis.

		 The analysis is carried out on the specified indicators/parameters using the data extracted from all the subjects that were mentioned in the json file. There are 4 different tests that can be run, namely - Mixed ANOVA, Repeated Measures ANOVA, T Test and Simple ANOVA (both 1 and 2 way)

		Parameters
		----------
		parameter_list: set (optional)
			Set of the different indicators/parameters (Pupil_size, Blink_rate) on which statistical analysis is to be performed, by default it will be "all" so that all the parameter are considered.
		between_factor_list: list(str) (optional)
			List of between group factors, by default it will only contain "Subject_type".
			If any additional parameter (eg: Gender) needs to be considered, then the list will be: between_factor_list = ["Subject_type", "Gender"].
			DO NOT FORGET TO INCLUDE "Subject_type", if you wish to consider "Subject_type" as a between group factor.
			Eg: between_factor_list = ["factor_x"] will no longer consider "Subject_type" as a factor.
			Please go through the README FILE to understand how the JSON FILE is to be written for between group factors to be considered.
		within_factor_list: list(str) (optional)
			List of within group factors, by default it will only contain "Stimuli_type"
			If any additional parameter, needs to be considered, then the list will be: between_factor_list = ["Subject_type", "factor_X"].
			DO NOT FORGET TO INCLUDE "Stimuli_type", if you wish to consider "Stimuli_type" as a within group factor.
			Eg: within_factor_list = ["factor_x"] will no longer consider "Stimuli_type" as a factor.
			Please go through how the README FILE to understand how the JSON FILE is to be written for within group factors to be considered.
		statistical_test: str {"Mixed_anova","RM_anova","ttest","anova","None"} (optional)
			Name of the statistical test that has to be performed.
				NOTE:

				- ttest: There are 3 options for ttest, and your choice of factors must comply with one of those options, for more information, please see description of `ttest_type` variable given below.
				- Welch_ttest: There are 2 options for Welch Ttest, and your choice of factors must comply with one of those options, for more information, please see description of `ttest_type` variable given below.
				- Mixed_anova: Only 1 between group factor and 1 within group factor can be considered at any point of time
				- anova: Any number of between group factors can be considered for analysis
				
				- RM_anova: Upto 2 within group factors can be considered at any point of time
		file_creation: bool (optional)
			Indicates whether a csv file containing the statistical results should be created.
				NOTE:
				The name of the csv file created will be by the name of the statistical test that has been chosen.
				A directory called "Results" will be created within the Directory whose path is mentioned in the json file and the csv files will be stored within "Results" directory.
				If any previous file by the same name exists, it will be overwritten.
		ttest_type: int {1,2,3} (optional)
			Indicates what type of parameters will be considered for the ttest and Welch Ttest
				NOTE:
				For ttest-

				- 1: Upto 2 between group factors will be considered for ttest
				- 2: 1 within group factor will be considered for ttest
				
				- 3: 1 within group and 1 between group factor will be considered for ttest

				For Welch ttest-

				- 1: Will consider the first factor in 'between_factor_list'

				- 2: Will consider the first factor in 'within_factor_list' 

		Examples
		--------

		For calculating Mixed ANOVA, on all the parameters, with standardisation, NOT averaging across stimuli of the same type
		and considering Subject_type and Stimuli_type as between and within group factors respectively

		>>> analyse(self, standardise_flag=False, average_flag=False, parameter_list={"all"}, between_factor_list=["Subject_type"], within_factor_list=["Stimuli_type"], statistical_test="Mixed_anova", file_creation = True)
		OR
		>>> analyse(self, standardise_flag=True) (as many of the option are present by default)

		For calculating 2-way ANOVA, for "blink_rate" and "avg_blink_duration", without standardisation with averaging across stimuli of the same type
		and considering Subject_type and Gender as the between group factors while NOT creating a new csv file with the results

		>>> analyse(self, average_flag=True, parameter_list={"blink_rate", "avg_blink_duration"}, between_factor_list=["Subject_type", "Gender"], statistical_test="anova", file_creation = False)

		"""

		with open(self.json_file, "r") as json_f:
			json_data = json.load(json_f)

		csvFile = None
		if file_creation:
			directory_path = json_data["Path"] + "/Results"
			if not os.path.isdir(directory_path):
				os.mkdir(directory_path)

			if not os.path.isdir(directory_path + '/Data/'):
				os.mkdir(directory_path + '/Data/')

			if statistical_test != None:
				file_path = directory_path + "/" + statistical_test + ".csv"
				csvFile = open(file_path, 'w')
				writer = csv.writer(csvFile)


		meta_not_to_be_considered = ["pupil_size", "pupil_size_downsample"]

		sacc_flag=0
		ms_flag=0

		for sen in self.sensors:
			for meta in Sensor.meta_cols[sen]:
				if meta in meta_not_to_be_considered:
					continue

				if ('all' not in parameter_list) and (meta not in parameter_list):
					continue

				print("\n\n")
				print("\t\t\t\tAnalysis for ",meta)

				#For the purpose of statistical analysis, a pandas dataframe needs to be created that can be fed into the statistical functions
				#The columns required are - meta (indicator), the between factors (eg: Subject type or Gender), the within group factor (eg: Stimuli Type), Subject name/id

				#Defining the list of columns required for the statistical analysis
				column_list = [meta]

				column_list.extend(between_factor_list)
				column_list.extend(within_factor_list)
				column_list.append("subject")
				column_list.append("stimuli_name")

				data =  pd.DataFrame(columns=column_list)

				#For each subject
				for sub_index, sub in enumerate(self.subjects):
					#For each Question Type
					for stimuli_index, stimuli_type in enumerate(sub.aggregate_meta):

						if meta in ["sacc_duration", "sacc_vel", "sacc_amplitude", "ms_duration", "ms_vel", "ms_amplitude"]:
							summation_array = self.summationArrayCalculation(meta, sub_index, stimuli_index)
						
						value_array = self.meta_matrix_dict[1][meta][sub_index,stimuli_index]

						index_extra = 0

						for value_index, _ in enumerate(value_array):

							if meta in ["sacc_duration", "sacc_vel", "sacc_amplitude", "ms_duration", "ms_vel", "ms_amplitude"]:

								if value_array[value_index] == 0:
									index_extra += 1
									continue

								proper_index = self.return_index(value_index-index_extra, summation_array)
								stimulus_name = self.stimuli[stimuli_type][proper_index]
							else:
								stimulus_name = self.stimuli[stimuli_type][value_index]

							row = []
							row.append(value_array[value_index])

							#Add the between group factors (need to be defined in the json file)
							for param in between_factor_list:

								if param == "Subject_type":
									row.append(sub.subj_type)
									continue

								try:
									row.append(json_data["Subjects"][sub.subj_type][sub.name][param])
								except:
									print("Between subject paramter: ", param, " not defined in the json file")

							for param in within_factor_list:

								if param == "Stimuli_type":
									row.append(stimuli_type)
									continue

								try:
									stimulus_name = self.stimuli[stimuli_type][value_index]
									row.append(json_data["Stimuli"][stimuli_type][stimulus_name][param])
								except:
									print("Within stimuli parameter: ", param, " not defined in the json file")

							row.append(sub.name)
							row.append(stimulus_name)

							if np.isnan(value_array[value_index]):
								print("The data being read for analysis contains null value: ", row)

							#Instantiate into the pandas dataframe
							data.loc[len(data)] = row

				data.to_csv(directory_path + '/Data/' + meta + "_data.csv")

				#print(data)

				#Depending on the parameter, choose the statistical test to be done
				if statistical_test == "Mixed_anova":

					if len(within_factor_list)>1:
						print("Error: Too many within group factors,\nMixed ANOVA can only accept 1 within group factor\n")
					elif len(between_factor_list)>1:
						print("Error: Too many between group factors,\nMixed ANOVA can only accept 1 between group factor\n")

					print(meta, ":\tMixed ANOVA")
					aov = pg.mixed_anova(dv=meta, within=within_factor_list[0], between=between_factor_list[0], subject='subject', data=data)
					pg.print_table(aov)

					if file_creation:

						values_list = ["Mixed Anova: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, aov, values_list)

					posthocs = pg.pairwise_ttests(dv=meta, within=within_factor_list[0], between=between_factor_list[0], subject='subject', data=data)
					pg.print_table(posthocs)

					if file_creation:

						values_list = ["Post Hoc Analysis"]
						self.fileWriting(writer, csvFile, posthocs, values_list)

				elif statistical_test == "RM_anova":

					if len(within_factor_list)>2 or len(within_factor_list)<1:
						print("Error: Too many or too few within group factors,\nRepeated Measures ANOVA can only accept 1 or 2 within group factors\n")

					print(meta, ":\tRM ANOVA")
					aov = pg.rm_anova(dv=meta, within= within_factor_list, subject = 'subject', data=data)
					pg.print_table(aov)

					if file_creation:

						values_list = ["Repeated Measures Anova: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, aov, values_list)

				elif statistical_test == "anova":

					print(meta, ":\tANOVA")
					length = len(between_factor_list)
					model_equation = meta + " ~ C("

					for factor_index, _ in enumerate(between_factor_list):
						if(factor_index<length-1):
							model_equation = model_equation + between_factor_list[factor_index] + ")*C("
						else:
							model_equation = model_equation + between_factor_list[factor_index] + ")"

					print("Including interaction effect")
					print(model_equation)
					model = ols(model_equation, data).fit()
					res = sm.stats.anova_lm(model, typ= 2)
					print(res)

					if file_creation:

						values_list = ["Anova including interaction effect: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, res, values_list)

					print("\nExcluding interaction effect")
					model_equation = model_equation.replace("*", "+")
					print(model_equation)
					model = ols(model_equation, data).fit()
					res = sm.stats.anova_lm(model, typ= 2)
					print(res)

					if file_creation:

						values_list = ["Anova excluding interaction effect: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, res, values_list)

				elif statistical_test == "ttest":

					print(meta, ":\tt test")

					if ttest_type==1:
						aov = pg.pairwise_ttests(dv=meta, between=between_factor_list, subject='subject', data=data)
						pg.print_table(aov)
					elif ttest_type==2:
						aov = pg.pairwise_ttests(dv=meta, within=within_factor_list, subject='subject', data=data)
						pg.print_table(aov)
					elif ttest_type==3:
						aov = pg.pairwise_ttests(dv=meta, between=between_factor_list, within=within_factor_list, subject='subject', data=data)
						pg.print_table(aov)
					else:
						print("The value given to ttest_type is not acceptable, it must be either 1 or 2 or 3")


					if file_creation:

						values_list = ["Pairwise ttest: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, aov, values_list)

				elif statistical_test == "welch_ttest":

					print(meta, ":\tWelch t test")

					if ttest_type==1:
						normality,aov = self.welch_ttest(dv=meta, factor=between_factor_list[0], subject='subject', data=data)
						pg.print_table(normality)
						pg.print_table(aov)
					elif ttest_type==2:
						normality,aov = self.welch_ttest(dv=meta, factor=within_factor_list[0], subject='subject', data=data)
						pg.print_table(normality)
						pg.print_table(aov)
					else:
						print("The value given to ttest_type for welch test is not acceptable, it must be either 1 or 2")

					if file_creation:

						values_list = ["Welch Pairwise ttest: "]
						values_list.append(meta)
						self.fileWriting(writer, csvFile, normality, values_list)
						self.fileWriting(writer, csvFile, aov, values_list)


		if csvFile != None:
			csvFile.close()
Ejemplo n.º 7
0
    # perform rm anova for each stage type
    ph_part_dict = {}
    for key, df in zip(totals_dict.keys(), totals_dict.values()):
        print(key)

        # tidy data
        long_df = df.stack().reset_index()
        long_df.columns = stat_colnames
        part_df = long_df.query("%s == '%s'" % (time, part))

        # do anova
        part_rm = pg.rm_anova(dv=dep_var,
                              within=day,
                              subject=anim,
                              data=part_df)
        pg.print_table(part_rm)

        # do posthoc
        ph = pg.pairwise_tukey(dv=dep_var, between=day, data=part_df)
        pg.print_table(ph)
        ph_part_dict[key] = ph

        stage_test_dir = part_dir / key
        anova_file = stage_test_dir / "01_anova.csv"
        ph_file = stage_test_dir / "02_posthoc.csv"

        part_rm.to_csv(anova_file)
        ph.to_csv(ph_file)

    ph_part_df = pd.concat(ph_part_dict)
    ph_total_dict[part] = ph_part_df
Ejemplo n.º 8
0
    def perform_activity_anova(self, activity_intensity, data_type="percent"):

        if data_type == "percent":
            df = self.df_percent
            activity_intensity = activity_intensity + "%"
        if data_type == "minutes":
            df = self.df_mins

        # PLOTTING ---------------------------------------------------------------------------------------------------
        # Creates 2x1 subplots of group means
        plt.title("Group x Model Mixed ANOVA: {} Activity".format(
            activity_intensity))

        # Two activity level groups: one line for each intensity
        sns.pointplot(data=df,
                      x="Group",
                      y=activity_intensity,
                      hue="Model",
                      ci=95,
                      dodge=False,
                      markers='o',
                      capsize=.1,
                      errwidth=1,
                      palette='Set1')
        plt.ylabel("{}".format(data_type.capitalize()))

        # STATISTICAL ANALYSIS ---------------------------------------------------------------------------------------
        print("\nPerforming Group x Model mixed ANOVA on {} activity.".format(
            activity_intensity))

        # Group x Intensity mixed ANOVA
        self.aov = pg.mixed_anova(dv=activity_intensity,
                                  within="Model",
                                  between="Group",
                                  subject="ID",
                                  data=df,
                                  correction='auto')
        pg.print_table(self.aov)

        group_p = self.aov.loc[self.aov["Source"] == "Group"]["p-unc"]
        group_sig = group_p.values[0] <= 0.05

        model_p = self.aov.loc[self.aov["Source"] == "Model"]["p-unc"]
        model_sig = model_p.values[0] <= 0.05

        interaction_p = self.aov.loc[self.aov["Source"] ==
                                     "Interaction"]["p-unc"]
        interaction_sig = interaction_p.values[0] <= 0.05

        print("ANOVA quick summary:")
        if model_sig:
            print("-Main effect of Model (p = {})".format(
                round(model_p.values[0], 3)))
        if not model_sig:
            print("-No main effect of Model")
        if group_sig:
            print("-Main effect of Group (p = {})".format(
                round(group_p.values[0], 3)))
        if not group_sig:
            print("-No main effect of Group")
        if interaction_sig:
            print("-Signficiant Group x Model interaction (p = {})".format(
                round(interaction_p.values[0], 3)))
        if not interaction_sig:
            print("-No Group x Model interaction")

        posthoc_para = pg.pairwise_ttests(dv=activity_intensity,
                                          subject='ID',
                                          within="Model",
                                          between='Group',
                                          data=df,
                                          padjust="bonf",
                                          effsize="hedges",
                                          parametric=True)
        posthoc_nonpara = pg.pairwise_ttests(dv=activity_intensity,
                                             subject='ID',
                                             within="Model",
                                             between='Group',
                                             data=df,
                                             padjust="bonf",
                                             effsize="hedges",
                                             parametric=False)

        self.posthoc_para = posthoc_para
        self.posthoc_nonpara = posthoc_nonpara

        pg.print_table(posthoc_para)
Ejemplo n.º 9
0
stats_spec_df = np.log10(nrem_mean)
stats_spec_df.index = stats_spec_df.index.droplevel(2)
stats_spec_df = stats_spec_df.stack().reset_index()

anim_col = stats_spec_df.columns[0]
day_col = stats_spec_df.columns[1]
freq_col = stats_spec_df.columns[2]
power_col = stats_spec_df.columns[3]

spec_rm = pg.mixed_anova(dv=power_col,
                         within=day_col,
                         between=freq_col,
                         subject=anim_col,
                         data=stats_spec_df)
pg.print_table(spec_rm)

spec_name = save_test_dir / "01_spec_anova.csv"
spec_rm.to_csv(spec_name)

# Q2 Does the Number of episodes change between day?
# Rpeated two way anova of Count ~ Time*day | anim

count_stats_df = long_frag.copy()

anim_col = count_stats_df.columns[0]
time_col = count_stats_df.columns[1]
day_col = count_stats_df.columns[2]
count_col = count_stats_df.columns[3]
mean_col = count_stats_df.columns[4]
Ejemplo n.º 10
0
        os.mkdir(marker_test_dir)

    count_dir = marker_test_dir / "01_count"
    mean_dir = marker_test_dir / "02_mean"
    hist_dir = marker_test_dir / "03_hist"
    for dir in [count_dir, mean_dir, hist_dir]:
        if not os.path.exists(dir):
            os.mkdir(dir)

    curr_count = count_data_dict[curr_label]
    curr_mean = mean_data_dict[curr_label]
    curr_hist = hist_data_dict[curr_label]

    count = count_cols[-1]
    count_anova = pg.anova(dv=count, between=condition_col, data=curr_count)
    pg.print_table(count_anova)
    count_ph = pg.pairwise_tukey(dv=count,
                                 between=condition_col,
                                 data=curr_count)
    pg.print_table(count_ph)
    count_anova.to_csv(count_dir / anova_str)
    count_ph.to_csv(count_dir / ph_str)
    count_stats_dict[curr_label] = count_ph

    mean = mean_cols[-1]
    mean_anova = pg.anova(dv=mean, between=condition_col, data=curr_mean)
    pg.print_table(mean_anova)
    mean_ph = pg.pairwise_tukey(dv=mean, between=condition_col, data=curr_mean)
    pg.print_table(mean_ph)
    mean_anova.to_csv(mean_dir / anova_str)
    mean_ph.to_csv(mean_dir / ph_str)
Ejemplo n.º 11
0
import researchpy as rp
import statsmodels.api as sm
from statsmodels.formula.api import ols
import numpy as np
import pingouin as pg
import seaborn as sns
from statsmodels.stats.multicomp import pairwise_tukeyhsd

df = pd.read_csv("Matrix.csv", index_col=None )

logX = np.log1p(df["Average"])
df = df.assign(media_log=logX.values)
df.drop(["Average"], axis= 1, inplace= True)
df["Generator"]
factores=["Generator"]
plt.figure(figsize=(8, 6))
for i in factores:
    print(rp.summary_cont(df['media_log'].groupby(df[i])))
    ANV=pg.anova (dv='media_log', between=i, data=df, detailed=True)
    pg.print_table (ANV)

    ax=sns.boxplot(x=df["media_log"], y=df[i], data=df, palette="Set1")
    tukey = pairwise_tukeyhsd(endog = df["media_log"],     # Data
                          groups= df[i],   # Groups
                          alpha=0.05)          # Significance level
    plt.savefig('fig1.jpeg', bbox_inches='tight')
    tukey.plot_simultaneous(xlabel='Time', ylabel=i)    # Plot group confidence intervals
    plt.vlines(x=49.57,ymin=-0.5,ymax=4.5, color="red")
    plt.savefig('fig2.jpeg', bbox_inches='tight')
    print(tukey.summary())
    plt.show()
for year, df in metrics.groupby(['Year']):
    for level, data in df.groupby('type_cat'):
        pubs_list[f'{year}_{level}'] = list(data['pubs_awarded'])
        fwci_list[f'{year}_{level}'] = list(data['fwci_awarded'])
# Generate separate dataframes
pubs = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in pubs_list.items()]))
fwci = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in fwci_list.items()]))

FileHandling.df_to_excel(data_frames=[pubs, fwci], sheetnames=['pubs', 'fwci'], output_path=f'{output_folder}metrics_per_year.xlsx')

# Collect cols for each level
levels = ['_1', '_2', '_3']

for level in levels:
    
    cols = [col for col in pubs.columns.tolist() if level in col]
    test_df = pubs[cols].melt(value_name='publications', var_name='Group')

    # Two-way ANOVA
    aov = pg.anova(data=test_df, dv='publications', between='Group',
                   export_filename=f'{output_folder}anova_pubs{level}.csv')
    pg.print_table(aov)

    # FDR-corrected post hocs with Hedges'g effect size
    posthoc = pg.pairwise_ttests(data=test_df, dv='publications', between='Group', within=None, parametric=True, alpha=.05, tail='two-sided', padjust='bonf', effsize='none', return_desc=False, export_filename=f'{output_folder}bonf_pubs{level}.csv')

    # Pretty printing of table
    pg.print_table(posthoc, floatfmt='.3f')

Ejemplo n.º 13
0
import pandas as pd
import pingouin as pg

## Load data
datafile = "tmp.csv"
df = pd.read_csv(datafile)

## Compute ANOVA
paovm = pg.rm_anova(data=df,
                    dv='fraction_correct',
                    within=['presentation_condition', 'source_condition'],
                    subject='subject',
                    correction='auto',
                    detailed=True,
                    export_filename='tmpStts')
print("=== Pingouin ANOVA === sphericity: ", pg.sphericity(paovm))
pg.print_table(paovm)
Ejemplo n.º 14
0
         color="blue",
         label="AM",
         linestyle="none")
plt.xlabel("sujet")
plt.ylabel("différence de cadence de modulation (%)")
plt.xticks(subject)
plt.legend(loc=0)
plt.savefig(os.path.join(path_fig, "seuils_discrimination.png"))
plt.show()

#%% anova des seuils adaptatifs
import pingouin as pg

data_adapt = pd.read_csv("seuils_adaptatifs.txt", index_col=0)
data_discr = pd.read_csv("seuils_discrimination.txt", index_col=0)

adapt_am = data_adapt[data_adapt.modulation_type == "AM"]
adapt_fm = data_adapt[data_adapt.modulation_type == "FM"]

aov_adapt_am = pg.anova(data=adapt_am, dv="seuil", between="subject")
aov_adapt_fm = pg.anova(data=adapt_fm, dv="seuil", between="subject")

pg.print_table(aov_adapt_am)
pg.print_table(aov_adapt_fm)

#%% t-test des seuils de discrimination

discr_t_test = pg.ttest(x=am_discr, y=fm_discr, paired=True, tail="one-sided")
pg.print_table(discr_t_test)

#discr_t_test.to_excel("t_test_seuils_discrimination.xlsx")
Ejemplo n.º 15
0
marker_test_dir = save_test_dir / "01_markers"
if not os.path.exists(marker_test_dir):
    os.mkdir(marker_test_dir)

marker_ph_dict = {}
for marker_label, marker_df in zip(marker_dict.keys(), marker_dict.values()):
    
    print(marker_label)
    # run anova
    curr_anova_marker = pg.anova(
        dv=dep_var,
        between=condition_col,
        data=marker_df
    )
    pg.print_table(curr_anova_marker)
    
    curr_ph_marker = pg.pairwise_tukey(
        dv=dep_var,
        between=condition_col,
        data=marker_df
    )
    pg.print_table(curr_ph_marker)
    marker_ph_dict[marker_label] = curr_ph_marker
    
    # save the files
    label_test_dir = marker_test_dir / marker_label
    if not os.path.exists(label_test_dir):
        os.mkdir(label_test_dir)
    curr_anova_marker.to_csv(label_test_dir / anova_str)
    curr_ph_marker.to_csv(label_test_dir / ph_str)
    crosstab, res = researchpy.crosstab(dfExpTrail['hasAvoidPoint'],
                                        dfExpTrail['decisionSteps'],
                                        test="chi-square")

    print(crosstab)

    # Compute the two-way mixed-design ANOVA
    calAnova = 0
    if calAnova:
        import pingouin as pg
        aov = pg.mixed_anova(dv='ShowCommitmentPercent',
                             within='decisionSteps',
                             between='participantsType',
                             subject='name',
                             data=statDF)
        pg.print_table(aov)

        posthocs = pg.pairwise_ttests(dv='ShowCommitmentPercent',
                                      within='decisionSteps',
                                      between='participantsType',
                                      subject='name',
                                      data=statDF,
                                      within_first=0)
        pg.print_table(posthocs)

    VIZ = 0
    if VIZ:
        import seaborn as sns
        ax = sns.barplot(x="decisionSteps",
                         y="ShowCommitmentPercent",
                         hue="participantsType",
    def group_by_intensity_anova(self, model_comparison, data_type="percent", use_normed=False):
        """Performs a Group x Intensity mixed ANOVA on the dependent variable that is passed in.
           Performs pairwise T-test comparisons for post-hoc analysis.
           Plots group means using Seaborn package.

        :argument
        -model_comparison: name of column in self.df to use as dependent variable
        -data_types: 'minutes' or 'percent'; type of data to use
        -use_norm: whether or not to use normed data

        :returns
        -data objects from pingouin ANOVA and posthoc objects
        """

        # DATA FORMATTING ---------------------------------------------------------------------------------------------
        if use_normed:
            df = self.norm_df
        if not use_normed:
            df = self.df

        # Pulls rows from self.df for desired model comparison
        comp_names = ["Wrist-Ankle", "Wrist-HR", "Wrist-HRAcc", "Ankle-HR", "Ankle-HRAcc", "HR-HRAcc"]
        row_int = comp_names.index(model_comparison)
        df2 = df.iloc[0::6]

        # df for minutes data
        mins_df = df2[["SEDENTARY", "LIGHT", "MODERATE", "VIGOROUS"]]

        # df for % data
        perc_df = df2[["SEDENTARY%", "LIGHT%", "MODERATE%", "VIGOROUS%"]]

        # Sets df to correct data type
        if data_type == "percent":
            df = perc_df
        if data_type == "minutes":
            df = mins_df

        df["ID"] = self.high_active_ids + self.low_active_ids

        # Creates column in df of IDs
        df_long = pd.melt(frame=df, id_vars="ID", var_name="INTENSITY", value_name="VALUE")

        high_list = ["HIGH" for i in range(5)]
        low_list = ["LOW" for i in range(5)]
        group_list = high_list + low_list
        df_long["GROUP"] = (group_list * 4)

        print(df_long)

        # DATA VISUALIZATION -----------------------------------------------------------------------------------------

        # Creates 2x1 subplots of group means
        plt.subplots(1, 2, figsize=(12, 7))
        plt.subplots_adjust(wspace=0.20)
        plt.suptitle("Group x Intensity Mixed ANOVA: {} "
                     "(normalized={})".format(model_comparison.capitalize(), use_normed))

        # Two activity level groups: one line for each intensity
        plt.subplot(1, 2, 1)
        sns.pointplot(data=df_long, x="GROUP", y="VALUE", hue="INTENSITY",
                      dodge=False, markers='o', capsize=.1, errwidth=1, palette='Set1')
        plt.ylabel("Difference ({})".format(data_type))
        plt.axhline(y=0, linestyle="dashed", color='black')

        # Four intensity groups: one line for each activity level group
        plt.subplot(1, 2, 2)
        sns.pointplot(data=df_long, x="INTENSITY", y="VALUE", hue="GROUP",
                      dodge=False, markers='o', capsize=.1, errwidth=1, palette='Set1')
        plt.ylabel("")
        plt.axhline(y=0, linestyle="dashed", color='black')

        # STATISTICAL ANALYSIS ---------------------------------------------------------------------------------------
        print("\nPerforming Group x Comparison mixed ANOVA using {} data "
              "for the {} model.".format(data_type, model_comparison))

        # Group x Intensity mixed ANOVA
        aov = pg.mixed_anova(dv="VALUE", within="INTENSITY", between="GROUP", subject="ID", data=df_long)
        pg.print_table(aov.iloc[:, 0:8])
        pg.print_table(aov.iloc[:, 9:])

        posthoc = pg.pairwise_ttests(dv="VALUE", within="INTENSITY", between='GROUP', subject='ID', data=df_long)
        pg.print_table(posthoc)

        return aov, posthoc
Ejemplo n.º 18
0
df_stats = df_stats[df_stats["RMSE"].notna()]
df_stats.to_pickle("stats_df_1.pkl")
aov = pg.anova(dv="RMSE",
               between=["EMG_objective", "co_contraction_level"],
               data=df_stats)
ptt = pg.pairwise_ttests(
    dv="RMSE",
    between=[
        "co_contraction_level",
        "EMG_objective",
    ],
    data=df_stats,
    padjust="bonf",
)
pg.print_table(aov.round(3))
pg.print_table(ptt.round(3))

# Figure of RMSE on force function of co-contraction level (Fig. 7)
import matplotlib

matplotlib.rcParams["legend.handlelength"] = 4
matplotlib.rcParams["legend.handleheight"] = 2.25
seaborn.set_style("whitegrid")
cp = seaborn.color_palette("YlOrRd", 5)
cp[-1] = (0, 102 / 255, 153 / 255)
plotpd = RMSEtrack_pd[RMSEtrack_pd["component"] == "force"]
plotpd = plotpd[plotpd["weight_level"] == "high"]

ax = seaborn.boxplot(
    y=plotpd["RMSE"],
Ejemplo n.º 19
0
                             "03_analysis_outputs/05_figures/00_csvs/03_fig3")
anova_csv = "01_anova.csv"
ph_csv = "02_posthoc.csv"

test_df = hourly_sleep_prop
long_df = test_df.stack().reset_index()
long_df.columns = stat_colnames

hourly_test_dir = save_test_dir / "hour_prop"

# prop 2 way rm
test_rm = pg.rm_anova2(dv=dep_var,
                       within=[day_col, hour_col],
                       subject=anim,
                       data=long_df)
pg.print_table(test_rm)

# prop post hoc
ph_dict = {}
for hour in hours:
    print(hour)
    hour_df = long_df.query("%s == '%s'" % (hour_col, hour))
    ph = pg.pairwise_tukey(dv=dep_var, between=day_col, data=hour_df)
    pg.print_table(ph)
    ph_dict[hour] = ph
hourly_ph_df = pd.concat(ph_dict)

hr_anova_file = hourly_test_dir / anova_csv
hr_ps_file = hourly_test_dir / ph_csv
test_rm.to_csv(hr_anova_file)
hourly_ph_df.to_csv(hr_ps_file)