def __get_funcs(database, D1, D2, verbose): """Empirically construct the conditional probabilities P(phi(t) | phi(inf)) Returns ------- funcs: 4 x 4 numpy array of scipy interp1d objects. Each joint_age_of_onsets[i,j] is a scipy 1-D interpolated function object representing the P(phi(t) | phi(inf)) conditional probabilities. Index j is the index of the phi(t) phenotype. Index j is the index of the phi(inf) phenotype. Possible value of index i and index j are: 0 : PHI0, "affected by neither D1 or D2" 1 : PHI1 = "affected by D1 but not D2" 2 : PHI2 = "affected by D2 but not D1" 3 : PHI12 = "affected by both D1 and D2" """ # Patients that have D1, D2 or both D1orD2_patients = database.query_emr_data([D1, D2], OR_match = True) joint_age_of_onset = JointAgeOfOnset(D1orD2_patients, verbose) funcs = joint_age_of_onset.get_funcs() return funcs
def setup_log_likelihood_func(self, database, D1, D2, tau1, tau2, overlap_type, threshold_type, prevalence_file=None, norm_prval_method=None): """Setup the log_likelihood function. Parameters ---------- database : EMRDatabase class object This class allow query of EMR data by diseases, gender, and ethnicity citeria D1 : string Name of the first disease. D2 : string Name of the first disease. tau1 : integer Model's parameter indicating the minimum number of deleterious mutation in S1 or S12. tua2 : integer Model's parameter indicating the minimum number of deleterious mutation in S2 or S12. overlap_type : ["independent", "cooperation", "competition"] Model's parameter describing the type of genetic overlap used in the model threshold_type : ["sharp", "soft"] Model's parameter describing the type of genetic penetrance used in the model prevalence_file : string Location of the prevalence data file norm_prval_method : [None, "rzhetsky", "max", "min", "avg", "wts_avg", "sum"] Protocol to normalize the prevalence data. """ # Query patients data D1orD2_patients = database.query_emr_data([D1, D2], OR_match=True) noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2]) # Conditional probabilities: P(phi(t) | phi(infty)) joint_age_of_onset = JointAgeOfOnset(D1orD2_patients) joint_age_of_onset_funcs = joint_age_of_onset.get_funcs() # Patient counts of as a distribution of the final age value. joint_final_age = JointFinalAge(D1orD2_patients, noD1D2_patients) final_age_array = joint_final_age.get_final_age_array() patient_counts = joint_final_age.get_patient_counts() # Setup the genetic_penetrance model to compute the age-integrated # phenotype probabilities: P(phi(infty) ; rho1, rho2, rho12) genetic_penetrance = GeneticPenetrance(tau1, tau2, overlap_type, threshold_type) # Create a function that will normalize the raw EMR data to match the # general population disease prevalence. if prevalence_file != None and norm_prval_method != None: prval_norm_func = (create_prval_norm_func( D1, D2, database.get_disease2count(), database.get_tot_patient_count(), prevalence_file, norm_prval_method)) else: prval_norm_func = None # Function to evaluate log-likelihood at given rho1, rho2 and rho12. log_likelihood_func = (create_log_likelihood_func( genetic_penetrance, joint_age_of_onset_funcs, patient_counts, final_age_array, prval_norm_func)) # Function to evaluate the derivative of the log-likelihod wrt # to rho1, rho2 and rho12. log_likelihood_fprime = (create_log_likelihood_fprime( genetic_penetrance, joint_age_of_onset_funcs, patient_counts, final_age_array, prval_norm_func)) self.tau1 = tau1 self.tau2 = tau2 self.overlap_type = overlap_type self.log_likelihood_func = log_likelihood_func self.log_likelihood_fprime = log_likelihood_fprime
def setup_log_likelihood_func(self, database, D1, D2, tau1, tau2, overlap_type, threshold_type, prevalence_file=None, norm_prval_method=None): """Setup the log_likelihood function. Parameters ---------- database : EMRDatabase class object This class allow query of EMR data by diseases, gender, and ethnicity citeria D1 : string Name of the first disease. D2 : string Name of the first disease. tau1 : integer Model's parameter indicating the minimum number of deleterious mutation in S1 or S12. tua2 : integer Model's parameter indicating the minimum number of deleterious mutation in S2 or S12. overlap_type : ["independent", "cooperation", "competition"] Model's parameter describing the type of genetic overlap used in the model threshold_type : ["sharp", "soft"] Model's parameter describing the type of genetic penetrance used in the model prevalence_file : string Location of the prevalence data file norm_prval_method : [None, "rzhetsky", "max", "min", "avg", "wts_avg", "sum"] Protocol to normalize the prevalence data. """ # Query patients data D1orD2_patients = database.query_emr_data([D1, D2], OR_match = True) noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2]) # Conditional probabilities: P(phi(t) | phi(infty)) joint_age_of_onset = JointAgeOfOnset(D1orD2_patients) joint_age_of_onset_funcs = joint_age_of_onset.get_funcs() # Patient counts of as a distribution of the final age value. joint_final_age = JointFinalAge(D1orD2_patients, noD1D2_patients) final_age_array = joint_final_age.get_final_age_array() patient_counts = joint_final_age.get_patient_counts() # Setup the genetic_penetrance model to compute the age-integrated # phenotype probabilities: P(phi(infty) ; rho1, rho2, rho12) genetic_penetrance = GeneticPenetrance(tau1, tau2, overlap_type, threshold_type) # Create a function that will normalize the raw EMR data to match the # general population disease prevalence. if prevalence_file != None and norm_prval_method != None: prval_norm_func = ( create_prval_norm_func(D1, D2, database.get_disease2count(), database.get_tot_patient_count(), prevalence_file, norm_prval_method)) else: prval_norm_func = None # Function to evaluate log-likelihood at given rho1, rho2 and rho12. log_likelihood_func = ( create_log_likelihood_func(genetic_penetrance, joint_age_of_onset_funcs, patient_counts, final_age_array, prval_norm_func)) # Function to evaluate the derivative of the log-likelihod wrt # to rho1, rho2 and rho12. log_likelihood_fprime = ( create_log_likelihood_fprime(genetic_penetrance, joint_age_of_onset_funcs, patient_counts, final_age_array, prval_norm_func)) self.tau1 = tau1 self.tau2 = tau2 self.overlap_type = overlap_type self.log_likelihood_func = log_likelihood_func self.log_likelihood_fprime = log_likelihood_fprime