def predict_pathology(self, c_Grp):

        # Initialization
        if self.use_expression_values:
            gene_exp = '_SNCA'
            suffix = "_{}_{}_{}".format(self.seed, condition, gene_exp)
        else:
            suffix = "_{}_{}".format(self.seed, condition)

        Xo = fitfunctions.make_Xo(ROI=self.seed, ROInames=self.ROInames)

        print("suffix is ", suffix)

        Xt_Grp = [
            fitfunctions.predict_Lout(self.l_out, Xo, c_Grp, i)
            for i in timepoints
        ]

        data_to_export = pd.DataFrame(
            np.transpose(Xt_Grp),
            columns=['WPI{}'.format(i) for i in timepoints])
        data_to_export['regions'] = self.ROInames
        data_to_export.to_csv('../output/predicted_pathology{}.csv'.format(
            suffix))  # condition added

        return Xt_Grp
def c_fit_individual(ind_patho, L_out, tp, c_rng, seed, roi_names):
    """
    Iterates the c-values to extract subsequent predicted magnitude Xt and compare them to
    the quantified data to return the best tuning constant c et the equivalent R correlation coefficient
    To use with a multiindex DataFrame with first column index (1,3,6) (MPI), second column index (1,2,3,...)
    (Number of animals used) where calling a specific column needs to be processed this ways
    ind_grp.loc[:, ('1', '1')]
    ---
    Inputs:
    log_path --> log10 of grp_mean. Grp_mean is the Dataframe with mean pathology per group, timepoints and regions
    L_out --> Laplacian matrice, array
    tp --> Timepoint, list
    c_rng --> Constant to tune the time scale
    roi_names --> ROInames
    ---
    Outputs:
    c_fit_ani --> Panda Dataframe. Rows = 2 {c_fit, r} and Columns = Number of animals used
    """
    Xo = make_Xo(seed, roi_names)
    # Compute fit at each time point for range of time
    c_fit_animal = pd.DataFrame(np.zeros((2, len(ind_patho.columns))),
                                columns=ind_patho.columns,
                                index=["c_fit", "r"])
    for time in tp:
        for mouse in range(1, len(ind_patho.loc[:, str(time)].columns) + 1):
            log_path = np.log10(ind_patho[str(time)][str(mouse)])
            mask = log_path != -np.inf
            exp_val = log_path[mask]

            local_c = 0
            local_r = 0
            for c_idx, c in enumerate(c_rng):

                predict_val = np.log10(predict_Lout(L_out, Xo, c,
                                                    t=time))[mask]

                r, _ = stats.pearsonr(exp_val, predict_val)

                if r > local_r:
                    local_r = r
                    local_c = c

            c_fit_animal.loc["c_fit", (str(time), str(mouse))] = local_c
            c_fit_animal.loc["r", (str(time), str(mouse))] = local_r
    return c_fit_animal
Ejemplo n.º 3
0
def extract_c_and_r_iter(log_path, L_out, tp, seed, c_rng, roi_names):
    """
    Extract each c and r values corresponding while iterating on c.
    ---
    Inputs:
    log_path --> log10 of grp_mean. Grp_mean is the Dataframe with mean pathology per group, timepoints and regions
    L_out --> Laplacian matrice, array
    tp --> Timepoint, list
    c_rng --> Constant to tune the time scale
    roi_names --> ROInames
    ---
    Outputs:
    extracted --> Panda DataFrame with Multi-indexed columns. Contains for each timepoint c values and corresponding
    r values
    """
    global best_c_per_mpi1, best_c_per_mpi3
    Xo = fitfunctions.make_Xo(seed, roi_names)
    # Exclusion mask; we do not count the regions with 0 path
    mask = log_path != -np.inf
    # Compute fit at each time point for range of time
    multi = []
    for time in tp:
        for output in ["c", "r"]:
            multi.append((str(time), output))

    col = pd.MultiIndex.from_tuples(multi, names=["MPI", "Condition"])
    c_idx = [i for i in range(0, len(c_rng))]
    extracted = pd.DataFrame(0, index=c_idx, columns=col)
    for time in range(0, len(tp)):

        for c_idx, c_value in enumerate(c_rng):
            exp_val = log_path.iloc[:, time][mask.iloc[:, time]].values
            if tp[time] == 1:
                Xt_1 = np.dot(expm(-L_out * c_value * 1), Xo) + 0
                predict_val = np.log10(Xt_1[mask.iloc[:, time]])
                r, _ = pearsonr(exp_val, predict_val)
                extracted[str(tp[time]), "c"].loc[c_idx] = +c_value
                extracted[str(tp[time]), "r"].loc[c_idx] = +r
                idx = np.where(
                    extracted[str(tp[time]),
                              "r"] == np.max(extracted[str(tp[time]),
                                                       "r"]))[0][0]
                best_c_per_mpi1 = extracted[str(tp[time]), "c"][idx]

            if tp[time] == 3:
                Xt_3 = np.dot(
                    expm(-L_out * c_value * 3),
                    np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) +
                    0) + np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0
                predict_val = np.log10(Xt_3[mask.iloc[:, time]])
                r, _ = pearsonr(exp_val, predict_val)
                extracted[str(tp[time]), "c"].loc[c_idx] = +c_value
                extracted[str(tp[time]), "r"].loc[c_idx] = +r
                idx = np.where(
                    extracted[str(tp[time]),
                              "r"] == np.max(extracted[str(tp[time]),
                                                       "r"]))[0][0]
                best_c_per_mpi3 = extracted[str(tp[time]), "c"][idx]

            if tp[time] == 6:
                Xt_6 = np.dot(expm(-L_out * c_value * 6), (np.dot(expm(-L_out * best_c_per_mpi3 * 3), np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0) + np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0)) \
                       + np.dot(expm(-L_out * best_c_per_mpi3 * 3), np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0) + np.dot(expm(-L_out * best_c_per_mpi1 * 1), Xo) + 0
                predict_val = np.log10(Xt_6[mask.iloc[:, time]])
                r, _ = pearsonr(exp_val, predict_val)
                extracted[str(tp[time]), "c"].loc[c_idx] = +c_value
                extracted[str(tp[time]), "r"].loc[c_idx] = +r
                idx = np.where(
                    extracted[str(tp[time]),
                              "r"] == np.max(extracted[str(tp[time]),
                                                       "r"]))[0][0]
                best_c_per_mpi6 = extracted[str(tp[time]), "c"][idx]
    return extracted
Ejemplo n.º 4
0
def predict_pathology_iter(self, timepoints):
    # Initialization
    if self.use_expression_values:
        gene_exp = '_SNCA'
        suffix = "_{}{}".format(self.seed, gene_exp)
    else:
        suffix = "_{}".format(self.seed)

    try:
        os.mkdir('../Iterative_Model/')
    except WindowsError:  # For Mac users need to replace by OSError.
        print("")

    Xo = fitfunctions.make_Xo(ROI=self.seed, ROInames=self.ROInames)
    print("suffix is ", suffix)
    c_r = extract_c_and_r_iter(log_path=np.log10(self.grp_mean),
                               L_out=self.l_out,
                               tp=timepoints,
                               seed=self.seed,
                               c_rng=self.c_rng,
                               roi_names=self.ROInames)
    Xt_Grp = []
    for i in timepoints:
        idx = np.where(c_r[str(i), "r"] == np.max(c_r[str(i), "r"]))[0][0]
        best_c_per_mpi = c_r[str(i), "c"][idx]
        if i == 1:
            Xt = np.dot(expm(-self.l_out * best_c_per_mpi * 1), Xo) + 0
            c_0 = best_c_per_mpi
            Xt_Grp.append(Xt)
        if i == 3:
            Xt = np.dot(expm(-self.l_out * best_c_per_mpi * 3),
                        Xt_Grp[0]) + Xt_Grp[0]
            Xt_Grp.append(Xt)
        if i == 6:
            Xt = np.dot(expm(-self.l_out * best_c_per_mpi * 6),
                        Xt_Grp[1]) + Xt_Grp[1]
            Xt_Grp.append(Xt)

        #print('---------------------\n','timepoint',i,'Xt is',Xt_Grp)

    data_to_export = pd.DataFrame(
        np.transpose(Xt_Grp), columns=['MPI{}'.format(i) for i in timepoints])
    data_to_export['regions'] = self.ROInames
    data_to_export.to_csv(
        '../Iterative_Model/iter_predicted_pathology{}.csv'.format(suffix))

    stats_df = []
    masks = dict()
    print('---------------------------------------------------')
    print('------------------ITERATIVE MODEL------------------')
    print('---------------------------------------------------\n')
    for M in range(0, len(timepoints)):
        Df = pd.DataFrame(
            {
                "experimental_data": np.log10(self.grp_mean.iloc[:, M]).values,
                "ndm_data": np.log10(Xt_Grp[M])
            },
            index=self.grp_mean.index)  # Runtime Warning
        # exclude regions with 0 pathology at each time point for purposes of computing fit
        mask = (Df["experimental_data"] != -np.inf) & (
            Df['ndm_data'] != -np.inf) & (Df['ndm_data'] != np.nan)

        masks["MPI %s" % timepoints[M]] = mask
        Df = Df[mask]

        cor = {
            "MPI": "%s" % (M),
            "Pearson r": pearsonr(Df["experimental_data"], Df["ndm_data"])[0],
            "p_value": pearsonr(Df["experimental_data"], Df["ndm_data"])[1]
        }

        stats_df.append(cor)

        print('---------------------------------------------------')
        print("Month Post Injection %s" % timepoints[M])
        print("Number of Regions used: ", Df.shape[0])
        print("Pearson correlation coefficient", cor['Pearson r'])
        print('Pvalue (non corrected)', cor['p_value'])
        print('---------------------------------------------------\n')

        slope, intercept, r_value, p_value, std_err = linregress(
            x=Df['ndm_data'], y=Df['experimental_data'])
        Df['linreg_data'] = slope * Df['ndm_data'] + intercept
        Df['residual'] = Df['experimental_data'] - Df['linreg_data']
        # Saving the data as csv
        Df.to_csv('../Iterative_Model/iter_model_output_MPI{}{}.csv'.format(
            timepoints[M], suffix))

    # Saving the lollipop plots
    for time in timepoints:
        mpi = pd.read_csv(
            '../Iterative_Model/iter_model_output_MPI{}{}.csv'.format(
                time, suffix))
        mpi = mpi.rename(columns={'Unnamed: 0': 'region'})
        plt.figure()
        plt.vlines(mpi["ndm_data"],
                   mpi['linreg_data'],
                   mpi['linreg_data'] + mpi['residual'] - 0.04,
                   lw=0.8,
                   color='blue',
                   linestyles="dotted",
                   label="Residual")
        sns.regplot(x=mpi["ndm_data"],
                    y=mpi["experimental_data"],
                    data=mpi,
                    scatter_kws={
                        's': 40,
                        'facecolor': 'blue'
                    })
        plt.xlabel("Log(Predicted)")
        plt.ylabel("Log(Path)")
        plt.title(
            "Iterative Model - Month Post Injection {} - Conditions{}".format(
                time, suffix))
        plt.legend()

        plt.savefig(
            '../Iterative_Model/plots/iter_Predicted_VS_Path_MPI{}{}.png'.
            format(time, suffix),
            dpi=300)
        plt.savefig(
            '../Iterative_Model/plots/iter_Predicted_VS_Path_MPI{}{}.pdf'.
            format(time, suffix),
            dpi=300)

        plt.show()
    # Saving the density Vs Residual plots
    plt.figure()
    for time in timepoints:
        mpi = pd.read_csv(
            '../Iterative_Model/iter_model_output_MPI{}{}.csv'.format(
                time, suffix))
        mpi = mpi.rename(columns={'Unnamed: 0': 'region'})
        sns.kdeplot(x='residual', data=mpi, label='{} MPI'.format(time))
        plt.title("Iterative Model - Density(residual) - Conditions{}".format(
            suffix))
        plt.legend(title='Timepoints')

    plt.savefig(
        '../Iterative_Model/plots/Density_vs_residuals/Density_VS_residual{}.png'
        .format(suffix),
        dpi=300)
    plt.savefig(
        '../Iterative_Model/plots/Density_vs_residuals/Density_VS_residual{}.png'
        .format(suffix),
        dpi=300)
    plt.show()

    stats_df = pd.DataFrame(stats_df)
    # Boneferroni method for correction of pvalues
    _, stats_df['adj_p_value'], _, _ = multipletests(stats_df['p_value'],
                                                     method="bonferroni")

    stats_df.to_csv('../Iterative_Model/stats{}.csv'.format(suffix))
Ejemplo n.º 5
0
    Lap = ""
    L_out = np.loadtxt(Laplacian)

# Fit time scaling parameter
# c_rng = np.arange(0.01, 10, step = 0.1) # Step =0.1 for a total length of 100
c_rng = np.linspace(start=0.01, stop=10, num=100)

log_path = np.log10(Grp_mean)
c_Grp = c_fit(
    log_path, L_out, tp, 'R CPu', c_rng,
    ROInames)  # Returns a best fit number. For the 'R Cpu' returns 1.6245

#############################################################
### Test model at observed points for group (NTG or G20)  ###
#############################################################
Xo = make_Xo('R CPu', ROInames)  # Where we seed our pathology
vulnerability = pd.DataFrame(
    0, columns=["MPI 1", "MPI 3", "MPI 6"],
    index=Grp_mean.index)  # To double check but mask can be removed
Xt_Grp = [predict_Lout(L_out, Xo, c_Grp, i) for i in tp]
r_SCc = pd.DataFrame(columns=["MPI", "Pearson r"])
r_SCp = pd.DataFrame(
    columns=["MPI",
             "p_value"])  # Result df to store our correlation coefficients
p_values_cor = list()

masks = dict()
os.chdir(os.path.join(basedir, opdir, "diffmodel"))
for M in range(0, len(tp)):  # M iterates according to the number of timepoint
    Df = pd.DataFrame(
        {