def __get_counts(database, D1, D2, verbose):
    """Extract counts of patients as a distribution of the final age."""

    # Patients with D1, D2 or both.
    D1orD2_patients = database.query_emr_data([D1, D2], OR_match=True)

    # Patients without D1 and without D2.
    noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2])

    # Extract counts of patients as a distribution of the final
    # age value, seperately for each of 4 disease phenotypes.
    joint_final_age = JointFinalAge(D1orD2_patients, noD1D2_patients, verbose)

    final_age_array = joint_final_age.get_final_age_array()

    patient_counts = joint_final_age.get_patient_counts()

    return (patient_counts, final_age_array)
def __get_counts(database, D1, D2, verbose):
    """Extract counts of patients as a distribution of the final age."""

    # Patients with D1, D2 or both.
    D1orD2_patients = database.query_emr_data([D1, D2], OR_match = True)

    # Patients without D1 and without D2.
    noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2])

    # Extract counts of patients as a distribution of the final
    # age value, seperately for each of 4 disease phenotypes.
    joint_final_age = JointFinalAge(D1orD2_patients, noD1D2_patients, verbose)

    final_age_array = joint_final_age.get_final_age_array()

    patient_counts = joint_final_age.get_patient_counts()

    return (patient_counts, final_age_array)
Esempio n. 3
0
    def setup_log_likelihood_func(self,
                                  database,
                                  D1,
                                  D2,
                                  tau1,
                                  tau2,
                                  overlap_type,
                                  threshold_type,
                                  prevalence_file=None,
                                  norm_prval_method=None):
        """Setup the log_likelihood function.

        Parameters
        ----------
        database : EMRDatabase class object
            This class allow query of EMR data by diseases, gender, and 
            ethnicity citeria

        D1 : string
            Name of the first disease.

        D2 : string
            Name of the first disease.

        tau1 : integer
            Model's parameter indicating the minimum number of deleterious
            mutation in S1 or S12.

        tua2 : integer
            Model's parameter indicating the minimum number of deleterious
            mutation in S2 or S12.

        overlap_type : ["independent", "cooperation", "competition"]
            Model's parameter describing the type of genetic overlap used in
            the model

        threshold_type : ["sharp", "soft"]
            Model's parameter describing the type of genetic penetrance used in
            the model

        prevalence_file : string
            Location of the prevalence data file

        norm_prval_method : [None, "rzhetsky", "max", "min",
                             "avg", "wts_avg", "sum"]
            Protocol to normalize the prevalence data.
        """

        # Query patients data
        D1orD2_patients = database.query_emr_data([D1, D2], OR_match=True)
        noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2])

        # Conditional probabilities: P(phi(t) | phi(infty))
        joint_age_of_onset = JointAgeOfOnset(D1orD2_patients)

        joint_age_of_onset_funcs = joint_age_of_onset.get_funcs()

        # Patient counts of as a distribution of the final age value.
        joint_final_age = JointFinalAge(D1orD2_patients, noD1D2_patients)

        final_age_array = joint_final_age.get_final_age_array()
        patient_counts = joint_final_age.get_patient_counts()

        # Setup the genetic_penetrance model to compute the age-integrated
        # phenotype probabilities: P(phi(infty) ; rho1, rho2, rho12)
        genetic_penetrance = GeneticPenetrance(tau1, tau2, overlap_type,
                                               threshold_type)

        # Create a function that will normalize the raw EMR data to match the
        # general population disease prevalence.
        if prevalence_file != None and norm_prval_method != None:

            prval_norm_func = (create_prval_norm_func(
                D1, D2, database.get_disease2count(),
                database.get_tot_patient_count(), prevalence_file,
                norm_prval_method))
        else:
            prval_norm_func = None

        # Function to evaluate log-likelihood at given rho1, rho2 and rho12.
        log_likelihood_func = (create_log_likelihood_func(
            genetic_penetrance, joint_age_of_onset_funcs, patient_counts,
            final_age_array, prval_norm_func))

        # Function to evaluate the derivative of the log-likelihod wrt
        # to rho1, rho2 and rho12.
        log_likelihood_fprime = (create_log_likelihood_fprime(
            genetic_penetrance, joint_age_of_onset_funcs, patient_counts,
            final_age_array, prval_norm_func))

        self.tau1 = tau1
        self.tau2 = tau2
        self.overlap_type = overlap_type
        self.log_likelihood_func = log_likelihood_func
        self.log_likelihood_fprime = log_likelihood_fprime
    def setup_log_likelihood_func(self,
                                  database,
                                  D1, D2,
                                  tau1, tau2,
                                  overlap_type,
                                  threshold_type,
                                  prevalence_file=None,
                                  norm_prval_method=None):
        """Setup the log_likelihood function.

        Parameters
        ----------
        database : EMRDatabase class object
            This class allow query of EMR data by diseases, gender, and 
            ethnicity citeria

        D1 : string
            Name of the first disease.

        D2 : string
            Name of the first disease.

        tau1 : integer
            Model's parameter indicating the minimum number of deleterious
            mutation in S1 or S12.

        tua2 : integer
            Model's parameter indicating the minimum number of deleterious
            mutation in S2 or S12.

        overlap_type : ["independent", "cooperation", "competition"]
            Model's parameter describing the type of genetic overlap used in
            the model

        threshold_type : ["sharp", "soft"]
            Model's parameter describing the type of genetic penetrance used in
            the model

        prevalence_file : string
            Location of the prevalence data file

        norm_prval_method : [None, "rzhetsky", "max", "min",
                             "avg", "wts_avg", "sum"]
            Protocol to normalize the prevalence data.
        """

        # Query patients data
        D1orD2_patients = database.query_emr_data([D1, D2], OR_match = True)
        noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2])

        # Conditional probabilities: P(phi(t) | phi(infty))
        joint_age_of_onset = JointAgeOfOnset(D1orD2_patients)

        joint_age_of_onset_funcs = joint_age_of_onset.get_funcs()

        # Patient counts of as a distribution of the final age value.
        joint_final_age = JointFinalAge(D1orD2_patients, noD1D2_patients)

        final_age_array = joint_final_age.get_final_age_array()
        patient_counts = joint_final_age.get_patient_counts()

        # Setup the genetic_penetrance model to compute the age-integrated
        # phenotype probabilities: P(phi(infty) ; rho1, rho2, rho12)
        genetic_penetrance = GeneticPenetrance(tau1,
                                               tau2,
                                               overlap_type,
                                               threshold_type)

        # Create a function that will normalize the raw EMR data to match the
        # general population disease prevalence.
        if prevalence_file != None and norm_prval_method != None:

            prval_norm_func = (
                create_prval_norm_func(D1, D2,
                                       database.get_disease2count(),
                                       database.get_tot_patient_count(),
                                       prevalence_file,
                                       norm_prval_method))
        else:
            prval_norm_func = None

        # Function to evaluate log-likelihood at given rho1, rho2 and rho12.
        log_likelihood_func = (
            create_log_likelihood_func(genetic_penetrance,
                                       joint_age_of_onset_funcs,
                                       patient_counts,
                                       final_age_array,
                                       prval_norm_func))

        # Function to evaluate the derivative of the log-likelihod wrt
        # to rho1, rho2 and rho12.
        log_likelihood_fprime = (
            create_log_likelihood_fprime(genetic_penetrance,
                                         joint_age_of_onset_funcs,
                                         patient_counts,
                                         final_age_array,
                                         prval_norm_func))

        self.tau1 = tau1
        self.tau2 = tau2
        self.overlap_type = overlap_type
        self.log_likelihood_func = log_likelihood_func
        self.log_likelihood_fprime = log_likelihood_fprime