def __get_counts(database, D1, D2, verbose): """Extract counts of patients as a distribution of the final age.""" # Patients with D1, D2 or both. D1orD2_patients = database.query_emr_data([D1, D2], OR_match=True) # Patients without D1 and without D2. noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2]) # Extract counts of patients as a distribution of the final # age value, seperately for each of 4 disease phenotypes. joint_final_age = JointFinalAge(D1orD2_patients, noD1D2_patients, verbose) final_age_array = joint_final_age.get_final_age_array() patient_counts = joint_final_age.get_patient_counts() return (patient_counts, final_age_array)
def __get_counts(database, D1, D2, verbose): """Extract counts of patients as a distribution of the final age.""" # Patients with D1, D2 or both. D1orD2_patients = database.query_emr_data([D1, D2], OR_match = True) # Patients without D1 and without D2. noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2]) # Extract counts of patients as a distribution of the final # age value, seperately for each of 4 disease phenotypes. joint_final_age = JointFinalAge(D1orD2_patients, noD1D2_patients, verbose) final_age_array = joint_final_age.get_final_age_array() patient_counts = joint_final_age.get_patient_counts() return (patient_counts, final_age_array)
def setup_log_likelihood_func(self, database, D1, D2, tau1, tau2, overlap_type, threshold_type, prevalence_file=None, norm_prval_method=None): """Setup the log_likelihood function. Parameters ---------- database : EMRDatabase class object This class allow query of EMR data by diseases, gender, and ethnicity citeria D1 : string Name of the first disease. D2 : string Name of the first disease. tau1 : integer Model's parameter indicating the minimum number of deleterious mutation in S1 or S12. tua2 : integer Model's parameter indicating the minimum number of deleterious mutation in S2 or S12. overlap_type : ["independent", "cooperation", "competition"] Model's parameter describing the type of genetic overlap used in the model threshold_type : ["sharp", "soft"] Model's parameter describing the type of genetic penetrance used in the model prevalence_file : string Location of the prevalence data file norm_prval_method : [None, "rzhetsky", "max", "min", "avg", "wts_avg", "sum"] Protocol to normalize the prevalence data. """ # Query patients data D1orD2_patients = database.query_emr_data([D1, D2], OR_match=True) noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2]) # Conditional probabilities: P(phi(t) | phi(infty)) joint_age_of_onset = JointAgeOfOnset(D1orD2_patients) joint_age_of_onset_funcs = joint_age_of_onset.get_funcs() # Patient counts of as a distribution of the final age value. joint_final_age = JointFinalAge(D1orD2_patients, noD1D2_patients) final_age_array = joint_final_age.get_final_age_array() patient_counts = joint_final_age.get_patient_counts() # Setup the genetic_penetrance model to compute the age-integrated # phenotype probabilities: P(phi(infty) ; rho1, rho2, rho12) genetic_penetrance = GeneticPenetrance(tau1, tau2, overlap_type, threshold_type) # Create a function that will normalize the raw EMR data to match the # general population disease prevalence. if prevalence_file != None and norm_prval_method != None: prval_norm_func = (create_prval_norm_func( D1, D2, database.get_disease2count(), database.get_tot_patient_count(), prevalence_file, norm_prval_method)) else: prval_norm_func = None # Function to evaluate log-likelihood at given rho1, rho2 and rho12. log_likelihood_func = (create_log_likelihood_func( genetic_penetrance, joint_age_of_onset_funcs, patient_counts, final_age_array, prval_norm_func)) # Function to evaluate the derivative of the log-likelihod wrt # to rho1, rho2 and rho12. log_likelihood_fprime = (create_log_likelihood_fprime( genetic_penetrance, joint_age_of_onset_funcs, patient_counts, final_age_array, prval_norm_func)) self.tau1 = tau1 self.tau2 = tau2 self.overlap_type = overlap_type self.log_likelihood_func = log_likelihood_func self.log_likelihood_fprime = log_likelihood_fprime
def setup_log_likelihood_func(self, database, D1, D2, tau1, tau2, overlap_type, threshold_type, prevalence_file=None, norm_prval_method=None): """Setup the log_likelihood function. Parameters ---------- database : EMRDatabase class object This class allow query of EMR data by diseases, gender, and ethnicity citeria D1 : string Name of the first disease. D2 : string Name of the first disease. tau1 : integer Model's parameter indicating the minimum number of deleterious mutation in S1 or S12. tua2 : integer Model's parameter indicating the minimum number of deleterious mutation in S2 or S12. overlap_type : ["independent", "cooperation", "competition"] Model's parameter describing the type of genetic overlap used in the model threshold_type : ["sharp", "soft"] Model's parameter describing the type of genetic penetrance used in the model prevalence_file : string Location of the prevalence data file norm_prval_method : [None, "rzhetsky", "max", "min", "avg", "wts_avg", "sum"] Protocol to normalize the prevalence data. """ # Query patients data D1orD2_patients = database.query_emr_data([D1, D2], OR_match = True) noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2]) # Conditional probabilities: P(phi(t) | phi(infty)) joint_age_of_onset = JointAgeOfOnset(D1orD2_patients) joint_age_of_onset_funcs = joint_age_of_onset.get_funcs() # Patient counts of as a distribution of the final age value. joint_final_age = JointFinalAge(D1orD2_patients, noD1D2_patients) final_age_array = joint_final_age.get_final_age_array() patient_counts = joint_final_age.get_patient_counts() # Setup the genetic_penetrance model to compute the age-integrated # phenotype probabilities: P(phi(infty) ; rho1, rho2, rho12) genetic_penetrance = GeneticPenetrance(tau1, tau2, overlap_type, threshold_type) # Create a function that will normalize the raw EMR data to match the # general population disease prevalence. if prevalence_file != None and norm_prval_method != None: prval_norm_func = ( create_prval_norm_func(D1, D2, database.get_disease2count(), database.get_tot_patient_count(), prevalence_file, norm_prval_method)) else: prval_norm_func = None # Function to evaluate log-likelihood at given rho1, rho2 and rho12. log_likelihood_func = ( create_log_likelihood_func(genetic_penetrance, joint_age_of_onset_funcs, patient_counts, final_age_array, prval_norm_func)) # Function to evaluate the derivative of the log-likelihod wrt # to rho1, rho2 and rho12. log_likelihood_fprime = ( create_log_likelihood_fprime(genetic_penetrance, joint_age_of_onset_funcs, patient_counts, final_age_array, prval_norm_func)) self.tau1 = tau1 self.tau2 = tau2 self.overlap_type = overlap_type self.log_likelihood_func = log_likelihood_func self.log_likelihood_fprime = log_likelihood_fprime