def main():

    global options

    D1 = options.disease1
    D2 = options.disease2

    # Import EMR data into database
    database = EMRDatabase()

    database.import_data(options.emr_data_file,
                         options.diseases_file,
                         options.code2disease_file)

    # Instantiate the OptimizeLogLikelihood class
    opt_log_likelihood = OptimizeLogLikelihood(options.verbose)
    opt_log_likelihood.set_opt_method(options.opt_method)
    opt_log_likelihood.set_use_random_seed(options.use_random_seed)

    opt_log_likelihood.setup_log_likelihood_func(database,
                                                 D1, D2,
                                                 options.tau1,
                                                 options.tau2,
                                                 options.overlap_type,
                                                 options.threshold_type,
                                                 options.prevalence_file,
                                                 options.norm_prval_method)

    # Get optimized parameters
    _, optimized_param, _ = opt_log_likelihood.run()

    # Compute optimization paths
    optimization_paths = []
    for n in range(options.num_paths):
        _, _, path = opt_log_likelihood.run(save_path=True)
        optimization_paths.append(path)

    log_likelihood_func = opt_log_likelihood.get_log_likelihood_func()

    plot = __plot_contour(log_likelihood_func,
                          optimized_param,
                          optimization_paths,
                          options.tau1,
                          options.tau2,
                          options.overlap_type,
                          options.threshold_type,
                          options.norm_prval_method,
                          options.verbose)

    plt.show()
Example #2
0
def main():

    global options

    D1 = options.disease1
    D2 = options.disease2

    # Import EMR data into database
    database = EMRDatabase()

    database.import_data(options.emr_data_file, options.diseases_file,
                         options.code2disease_file)

    # Instantiate the OptimizeLogLikelihood class
    opt_log_likelihood = OptimizeLogLikelihood(options.verbose)
    opt_log_likelihood.set_opt_method(options.opt_method)
    opt_log_likelihood.set_use_random_seed(options.use_random_seed)

    opt_log_likelihood.setup_log_likelihood_func(database, D1, D2,
                                                 options.tau1, options.tau2,
                                                 options.overlap_type,
                                                 options.threshold_type,
                                                 options.prevalence_file,
                                                 options.norm_prval_method)

    # Get optimized parameters
    _, optimized_param, _ = opt_log_likelihood.run()

    # Compute optimization paths
    optimization_paths = []
    for n in range(options.num_paths):
        _, _, path = opt_log_likelihood.run(save_path=True)
        optimization_paths.append(path)

    log_likelihood_func = opt_log_likelihood.get_log_likelihood_func()

    plot = __plot_contour(log_likelihood_func, optimized_param,
                          optimization_paths, options.tau1, options.tau2,
                          options.overlap_type, options.threshold_type,
                          options.norm_prval_method, options.verbose)

    plt.show()
Example #3
0
def main():

    global options

    # Import EMR data into database
    database = EMRDatabase()

    database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file)

    # Instantiate the OptimizeLogLikelihood class
    opt_log_likelihood = OptimizeLogLikelihood(options.verbose)
    opt_log_likelihood.set_use_random_seed(options.use_random_seed)

    f = open("p_value.txt", "w")
    f.write("# emr_data_file = %s\n" % options.emr_data_file)
    f.write("# norm_prval_method = %s\n" % options.norm_prval_method)
    f.write("# threshold_type = %s\n" % options.threshold_type)
    f.write("\n")
    f.write("# D1, D2, overlap_type, ")
    f.write("overlap_LL, indepedent_LL, ")
    f.write("LLR, p_value\n")

    # Compute p-value for specified disease pair or for all disease pairs with
    # non-zero patient counts.
    if options.disease1 != None and options.disease2 != None:
        D1_list = [options.disease1]
        D2_list = [options.disease2]
    else:
        diseases = database.get_diseases()
        diseases.sort()
        filtered_diseases = __filter_diseases(diseases, database)
        D1_list = filtered_diseases
        D2_list = filtered_diseases

    for D1 in D1_list:
        for D2 in D2_list:

            if options.disease1 == None or options.disease2 == None:
                if D1 >= D2:
                    continue

            print "-" * 80
            print "D1= %s, D2= %s," % (D1, D2),
            print "overlap_type= %s" % options.overlap_type

            # Independent (no genetic overlap) model
            indep_log_likelihood = __compute_log_likelihood_wrapper(
                opt_log_likelihood,
                database,
                D1,
                D2,
                options.tau1,
                options.tau2,
                "independent",
                options.threshold_type,
                options.prevalence_file,
                options.norm_prval_method,
            )

            min_log_likelihood = indep_log_likelihood - 1.0

            # Allow genetic overlap model
            overlap_log_likelihood = __compute_log_likelihood_wrapper(
                opt_log_likelihood,
                database,
                D1,
                D2,
                options.tau1,
                options.tau2,
                options.overlap_type,
                options.threshold_type,
                options.prevalence_file,
                options.norm_prval_method,
                min_log_likelihood,
            )

            log_likelihood_ratio = 2.0 * (overlap_log_likelihood - indep_log_likelihood)

            # Degree of freedoms of the chi-square distribution.
            dof = 1

            # p-value is the area at the right tail of the chi-square
            # distribution
            p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof)

            text = "%s, %s, %s, " % (D1, D2, options.overlap_type)
            text += "%.3E, " % overlap_log_likelihood
            text += "%.3E, " % indep_log_likelihood
            text += "%.3E, %.3E" % (log_likelihood_ratio, p_value)

            print "overlap_LL= %.2E," % overlap_log_likelihood,
            print "indep_LL= %.2E," % indep_log_likelihood,
            print "LLR= %.2E," % log_likelihood_ratio,
            print "p_value= %.2E" % p_value
            print "-" * 80
            print

            f.write(text + "\n")

    f.close()
Example #4
0
def main():

    global options

    # Import EMR data into database
    database = EMRDatabase()

    database.import_data(options.emr_data_file, options.diseases_file,
                         options.code2disease_file)

    # Instantiate the OptimizeLogLikelihood class
    opt_log_likelihood = OptimizeLogLikelihood(options.verbose)
    opt_log_likelihood.set_use_random_seed(options.use_random_seed)

    f = open("p_value.txt", 'w')
    f.write("# emr_data_file = %s\n" % options.emr_data_file)
    f.write("# norm_prval_method = %s\n" % options.norm_prval_method)
    f.write("# threshold_type = %s\n" % options.threshold_type)
    f.write("\n")
    f.write("# D1, D2, overlap_type, ")
    f.write("overlap_LL, indepedent_LL, ")
    f.write("LLR, p_value\n")

    # Compute p-value for specified disease pair or for all disease pairs with
    # non-zero patient counts.
    if options.disease1 != None and options.disease2 != None:
        D1_list = [options.disease1]
        D2_list = [options.disease2]
    else:
        diseases = database.get_diseases()
        diseases.sort()
        filtered_diseases = __filter_diseases(diseases, database)
        D1_list = filtered_diseases
        D2_list = filtered_diseases

    for D1 in D1_list:
        for D2 in D2_list:

            if options.disease1 == None or options.disease2 == None:
                if D1 >= D2: continue

            print "-" * 80
            print "D1= %s, D2= %s," % (D1, D2),
            print "overlap_type= %s" % options.overlap_type

            # Independent (no genetic overlap) model
            indep_log_likelihood = (__compute_log_likelihood_wrapper(
                opt_log_likelihood, database, D1, D2, options.tau1,
                options.tau2, "independent", options.threshold_type,
                options.prevalence_file, options.norm_prval_method))

            min_log_likelihood = indep_log_likelihood - 1.0

            # Allow genetic overlap model
            overlap_log_likelihood = (__compute_log_likelihood_wrapper(
                opt_log_likelihood, database, D1, D2, options.tau1,
                options.tau2, options.overlap_type, options.threshold_type,
                options.prevalence_file, options.norm_prval_method,
                min_log_likelihood))

            log_likelihood_ratio = 2.0 * (overlap_log_likelihood -
                                          indep_log_likelihood)

            # Degree of freedoms of the chi-square distribution.
            dof = 1

            # p-value is the area at the right tail of the chi-square
            # distribution
            p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof)

            text = "%s, %s, %s, " % (D1, D2, options.overlap_type)
            text += "%.3E, " % overlap_log_likelihood
            text += "%.3E, " % indep_log_likelihood
            text += "%.3E, %.3E" % (log_likelihood_ratio, p_value)

            print "overlap_LL= %.2E," % overlap_log_likelihood,
            print "indep_LL= %.2E," % indep_log_likelihood,
            print "LLR= %.2E," % log_likelihood_ratio,
            print "p_value= %.2E" % p_value
            print "-" * 80
            print

            f.write(text + '\n')

    f.close()