def predict_pathology(self, c_Grp): # Initialization if self.use_expression_values: gene_exp = '_SNCA' suffix = "_{}_{}_{}".format(self.seed, condition, gene_exp) else: suffix = "_{}_{}".format(self.seed, condition) Xo = fitfunctions.make_Xo(ROI=self.seed, ROInames=self.ROInames) print("suffix is ", suffix) Xt_Grp = [ fitfunctions.predict_Lout(self.l_out, Xo, c_Grp, i) for i in timepoints ] data_to_export = pd.DataFrame( np.transpose(Xt_Grp), columns=['WPI{}'.format(i) for i in timepoints]) data_to_export['regions'] = self.ROInames data_to_export.to_csv('../output/predicted_pathology{}.csv'.format( suffix)) # condition added return Xt_Grp
def c_fit_individual(ind_patho, L_out, tp, c_rng, seed, roi_names): """ Iterates the c-values to extract subsequent predicted magnitude Xt and compare them to the quantified data to return the best tuning constant c et the equivalent R correlation coefficient To use with a multiindex DataFrame with first column index (1,3,6) (MPI), second column index (1,2,3,...) (Number of animals used) where calling a specific column needs to be processed this ways ind_grp.loc[:, ('1', '1')] --- Inputs: log_path --> log10 of grp_mean. Grp_mean is the Dataframe with mean pathology per group, timepoints and regions L_out --> Laplacian matrice, array tp --> Timepoint, list c_rng --> Constant to tune the time scale roi_names --> ROInames --- Outputs: c_fit_ani --> Panda Dataframe. Rows = 2 {c_fit, r} and Columns = Number of animals used """ Xo = make_Xo(seed, roi_names) # Compute fit at each time point for range of time c_fit_animal = pd.DataFrame(np.zeros((2, len(ind_patho.columns))), columns=ind_patho.columns, index=["c_fit", "r"]) for time in tp: for mouse in range(1, len(ind_patho.loc[:, str(time)].columns) + 1): log_path = np.log10(ind_patho[str(time)][str(mouse)]) mask = log_path != -np.inf exp_val = log_path[mask] local_c = 0 local_r = 0 for c_idx, c in enumerate(c_rng): predict_val = np.log10(predict_Lout(L_out, Xo, c, t=time))[mask] r, _ = stats.pearsonr(exp_val, predict_val) if r > local_r: local_r = r local_c = c c_fit_animal.loc["c_fit", (str(time), str(mouse))] = local_c c_fit_animal.loc["r", (str(time), str(mouse))] = local_r return c_fit_animal
def extract_c_and_r_iter(log_path, L_out, tp, seed, c_rng, roi_names): """ Extract each c and r values corresponding while iterating on c. --- Inputs: log_path --> log10 of grp_mean. Grp_mean is the Dataframe with mean pathology per group, timepoints and regions L_out --> Laplacian matrice, array tp --> Timepoint, list c_rng --> Constant to tune the time scale roi_names --> ROInames --- Outputs: extracted --> Panda DataFrame with Multi-indexed columns. Contains for each timepoint c values and corresponding r values """ global best_c_per_mpi1, best_c_per_mpi3 Xo = fitfunctions.make_Xo(seed, roi_names) # Exclusion mask; we do not count the regions with 0 path mask = log_path != -np.inf # Compute fit at each time point for range of time multi = [] for time in tp: for output in ["c", "r"]: multi.append((str(time), output)) col = pd.MultiIndex.from_tuples(multi, names=["MPI", "Condition"]) c_idx = [i for i in range(0, len(c_rng))] extracted = pd.DataFrame(0, index=c_idx, columns=col) for time in range(0, len(tp)): for c_idx, c_value in enumerate(c_rng): exp_val = log_path.iloc[:, time][mask.iloc[:, time]].values if tp[time] == 1: Xt_1 = np.dot(expm(-L_out * c_value * 1), Xo) + 0 predict_val = np.log10(Xt_1[mask.iloc[:, time]]) r, _ = pearsonr(exp_val, predict_val) extracted[str(tp[time]), "c"].loc[c_idx] = +c_value extracted[str(tp[time]), "r"].loc[c_idx] = +r idx = np.where( extracted[str(tp[time]), "r"] == np.max(extracted[str(tp[time]), "r"]))[0][0] best_c_per_mpi1 = extracted[str(tp[time]), "c"][idx] if tp[time] == 3: Xt_3 = np.dot( expm(-L_out * c_value * 3), np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0) + np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0 predict_val = np.log10(Xt_3[mask.iloc[:, time]]) r, _ = pearsonr(exp_val, predict_val) extracted[str(tp[time]), "c"].loc[c_idx] = +c_value extracted[str(tp[time]), "r"].loc[c_idx] = +r idx = np.where( extracted[str(tp[time]), "r"] == np.max(extracted[str(tp[time]), "r"]))[0][0] best_c_per_mpi3 = extracted[str(tp[time]), "c"][idx] if tp[time] == 6: Xt_6 = np.dot(expm(-L_out * c_value * 6), (np.dot(expm(-L_out * best_c_per_mpi3 * 3), np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0) + np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0)) \ + np.dot(expm(-L_out * best_c_per_mpi3 * 3), np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0) + np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0 predict_val = np.log10(Xt_6[mask.iloc[:, time]]) r, _ = pearsonr(exp_val, predict_val) extracted[str(tp[time]), "c"].loc[c_idx] = +c_value extracted[str(tp[time]), "r"].loc[c_idx] = +r idx = np.where( extracted[str(tp[time]), "r"] == np.max(extracted[str(tp[time]), "r"]))[0][0] best_c_per_mpi6 = extracted[str(tp[time]), "c"][idx] return extracted
def predict_pathology_iter(self, timepoints): # Initialization if self.use_expression_values: gene_exp = '_SNCA' suffix = "_{}{}".format(self.seed, gene_exp) else: suffix = "_{}".format(self.seed) try: os.mkdir('../Iterative_Model/') except WindowsError: # For Mac users need to replace by OSError. print("") Xo = fitfunctions.make_Xo(ROI=self.seed, ROInames=self.ROInames) print("suffix is ", suffix) c_r = extract_c_and_r_iter(log_path=np.log10(self.grp_mean), L_out=self.l_out, tp=timepoints, seed=self.seed, c_rng=self.c_rng, roi_names=self.ROInames) Xt_Grp = [] for i in timepoints: idx = np.where(c_r[str(i), "r"] == np.max(c_r[str(i), "r"]))[0][0] best_c_per_mpi = c_r[str(i), "c"][idx] if i == 1: Xt = np.dot(expm(-self.l_out * best_c_per_mpi * 1), Xo) + 0 c_0 = best_c_per_mpi Xt_Grp.append(Xt) if i == 3: Xt = np.dot(expm(-self.l_out * best_c_per_mpi * 3), Xt_Grp[0]) + Xt_Grp[0] Xt_Grp.append(Xt) if i == 6: Xt = np.dot(expm(-self.l_out * best_c_per_mpi * 6), Xt_Grp[1]) + Xt_Grp[1] Xt_Grp.append(Xt) #print('---------------------\n','timepoint',i,'Xt is',Xt_Grp) data_to_export = pd.DataFrame( np.transpose(Xt_Grp), columns=['MPI{}'.format(i) for i in timepoints]) data_to_export['regions'] = self.ROInames data_to_export.to_csv( '../Iterative_Model/iter_predicted_pathology{}.csv'.format(suffix)) stats_df = [] masks = dict() print('---------------------------------------------------') print('------------------ITERATIVE MODEL------------------') print('---------------------------------------------------\n') for M in range(0, len(timepoints)): Df = pd.DataFrame( { "experimental_data": np.log10(self.grp_mean.iloc[:, M]).values, "ndm_data": np.log10(Xt_Grp[M]) }, index=self.grp_mean.index) # Runtime Warning # exclude regions with 0 pathology at each time point for purposes of computing fit mask = (Df["experimental_data"] != -np.inf) & ( Df['ndm_data'] != -np.inf) & (Df['ndm_data'] != np.nan) masks["MPI %s" % timepoints[M]] = mask Df = Df[mask] cor = { "MPI": "%s" % (M), "Pearson r": pearsonr(Df["experimental_data"], Df["ndm_data"])[0], "p_value": pearsonr(Df["experimental_data"], Df["ndm_data"])[1] } stats_df.append(cor) print('---------------------------------------------------') print("Month Post Injection %s" % timepoints[M]) print("Number of Regions used: ", Df.shape[0]) print("Pearson correlation coefficient", cor['Pearson r']) print('Pvalue (non corrected)', cor['p_value']) print('---------------------------------------------------\n') slope, intercept, r_value, p_value, std_err = linregress( x=Df['ndm_data'], y=Df['experimental_data']) Df['linreg_data'] = slope * Df['ndm_data'] + intercept Df['residual'] = Df['experimental_data'] - Df['linreg_data'] # Saving the data as csv Df.to_csv('../Iterative_Model/iter_model_output_MPI{}{}.csv'.format( timepoints[M], suffix)) # Saving the lollipop plots for time in timepoints: mpi = pd.read_csv( '../Iterative_Model/iter_model_output_MPI{}{}.csv'.format( time, suffix)) mpi = mpi.rename(columns={'Unnamed: 0': 'region'}) plt.figure() plt.vlines(mpi["ndm_data"], mpi['linreg_data'], mpi['linreg_data'] + mpi['residual'] - 0.04, lw=0.8, color='blue', linestyles="dotted", label="Residual") sns.regplot(x=mpi["ndm_data"], y=mpi["experimental_data"], data=mpi, scatter_kws={ 's': 40, 'facecolor': 'blue' }) plt.xlabel("Log(Predicted)") plt.ylabel("Log(Path)") plt.title( "Iterative Model - Month Post Injection {} - Conditions{}".format( time, suffix)) plt.legend() plt.savefig( '../Iterative_Model/plots/iter_Predicted_VS_Path_MPI{}{}.png'. format(time, suffix), dpi=300) plt.savefig( '../Iterative_Model/plots/iter_Predicted_VS_Path_MPI{}{}.pdf'. format(time, suffix), dpi=300) plt.show() # Saving the density Vs Residual plots plt.figure() for time in timepoints: mpi = pd.read_csv( '../Iterative_Model/iter_model_output_MPI{}{}.csv'.format( time, suffix)) mpi = mpi.rename(columns={'Unnamed: 0': 'region'}) sns.kdeplot(x='residual', data=mpi, label='{} MPI'.format(time)) plt.title("Iterative Model - Density(residual) - Conditions{}".format( suffix)) plt.legend(title='Timepoints') plt.savefig( '../Iterative_Model/plots/Density_vs_residuals/Density_VS_residual{}.png' .format(suffix), dpi=300) plt.savefig( '../Iterative_Model/plots/Density_vs_residuals/Density_VS_residual{}.png' .format(suffix), dpi=300) plt.show() stats_df = pd.DataFrame(stats_df) # Boneferroni method for correction of pvalues _, stats_df['adj_p_value'], _, _ = multipletests(stats_df['p_value'], method="bonferroni") stats_df.to_csv('../Iterative_Model/stats{}.csv'.format(suffix))
Lap = "" L_out = np.loadtxt(Laplacian) # Fit time scaling parameter # c_rng = np.arange(0.01, 10, step = 0.1) # Step =0.1 for a total length of 100 c_rng = np.linspace(start=0.01, stop=10, num=100) log_path = np.log10(Grp_mean) c_Grp = c_fit( log_path, L_out, tp, 'R CPu', c_rng, ROInames) # Returns a best fit number. For the 'R Cpu' returns 1.6245 ############################################################# ### Test model at observed points for group (NTG or G20) ### ############################################################# Xo = make_Xo('R CPu', ROInames) # Where we seed our pathology vulnerability = pd.DataFrame( 0, columns=["MPI 1", "MPI 3", "MPI 6"], index=Grp_mean.index) # To double check but mask can be removed Xt_Grp = [predict_Lout(L_out, Xo, c_Grp, i) for i in tp] r_SCc = pd.DataFrame(columns=["MPI", "Pearson r"]) r_SCp = pd.DataFrame( columns=["MPI", "p_value"]) # Result df to store our correlation coefficients p_values_cor = list() masks = dict() os.chdir(os.path.join(basedir, opdir, "diffmodel")) for M in range(0, len(tp)): # M iterates according to the number of timepoint Df = pd.DataFrame( {