def main(): global options D1 = options.disease1 D2 = options.disease2 # Import EMR data into database database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) # Instantiate the OptimizeLogLikelihood class opt_log_likelihood = OptimizeLogLikelihood(options.verbose) opt_log_likelihood.set_opt_method(options.opt_method) opt_log_likelihood.set_use_random_seed(options.use_random_seed) opt_log_likelihood.setup_log_likelihood_func(database, D1, D2, options.tau1, options.tau2, options.overlap_type, options.threshold_type, options.prevalence_file, options.norm_prval_method) # Get optimized parameters _, optimized_param, _ = opt_log_likelihood.run() # Compute optimization paths optimization_paths = [] for n in range(options.num_paths): _, _, path = opt_log_likelihood.run(save_path=True) optimization_paths.append(path) log_likelihood_func = opt_log_likelihood.get_log_likelihood_func() plot = __plot_contour(log_likelihood_func, optimized_param, optimization_paths, options.tau1, options.tau2, options.overlap_type, options.threshold_type, options.norm_prval_method, options.verbose) plt.show()
def main(): global options # Import EMR data into database database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) # Instantiate the OptimizeLogLikelihood class opt_log_likelihood = OptimizeLogLikelihood(options.verbose) opt_log_likelihood.set_use_random_seed(options.use_random_seed) f = open("p_value.txt", "w") f.write("# emr_data_file = %s\n" % options.emr_data_file) f.write("# norm_prval_method = %s\n" % options.norm_prval_method) f.write("# threshold_type = %s\n" % options.threshold_type) f.write("\n") f.write("# D1, D2, overlap_type, ") f.write("overlap_LL, indepedent_LL, ") f.write("LLR, p_value\n") # Compute p-value for specified disease pair or for all disease pairs with # non-zero patient counts. if options.disease1 != None and options.disease2 != None: D1_list = [options.disease1] D2_list = [options.disease2] else: diseases = database.get_diseases() diseases.sort() filtered_diseases = __filter_diseases(diseases, database) D1_list = filtered_diseases D2_list = filtered_diseases for D1 in D1_list: for D2 in D2_list: if options.disease1 == None or options.disease2 == None: if D1 >= D2: continue print "-" * 80 print "D1= %s, D2= %s," % (D1, D2), print "overlap_type= %s" % options.overlap_type # Independent (no genetic overlap) model indep_log_likelihood = __compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, "independent", options.threshold_type, options.prevalence_file, options.norm_prval_method, ) min_log_likelihood = indep_log_likelihood - 1.0 # Allow genetic overlap model overlap_log_likelihood = __compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, options.overlap_type, options.threshold_type, options.prevalence_file, options.norm_prval_method, min_log_likelihood, ) log_likelihood_ratio = 2.0 * (overlap_log_likelihood - indep_log_likelihood) # Degree of freedoms of the chi-square distribution. dof = 1 # p-value is the area at the right tail of the chi-square # distribution p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof) text = "%s, %s, %s, " % (D1, D2, options.overlap_type) text += "%.3E, " % overlap_log_likelihood text += "%.3E, " % indep_log_likelihood text += "%.3E, %.3E" % (log_likelihood_ratio, p_value) print "overlap_LL= %.2E," % overlap_log_likelihood, print "indep_LL= %.2E," % indep_log_likelihood, print "LLR= %.2E," % log_likelihood_ratio, print "p_value= %.2E" % p_value print "-" * 80 print f.write(text + "\n") f.close()
def main(): global options # Import EMR data into database database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) # Instantiate the OptimizeLogLikelihood class opt_log_likelihood = OptimizeLogLikelihood(options.verbose) opt_log_likelihood.set_use_random_seed(options.use_random_seed) f = open("p_value.txt", 'w') f.write("# emr_data_file = %s\n" % options.emr_data_file) f.write("# norm_prval_method = %s\n" % options.norm_prval_method) f.write("# threshold_type = %s\n" % options.threshold_type) f.write("\n") f.write("# D1, D2, overlap_type, ") f.write("overlap_LL, indepedent_LL, ") f.write("LLR, p_value\n") # Compute p-value for specified disease pair or for all disease pairs with # non-zero patient counts. if options.disease1 != None and options.disease2 != None: D1_list = [options.disease1] D2_list = [options.disease2] else: diseases = database.get_diseases() diseases.sort() filtered_diseases = __filter_diseases(diseases, database) D1_list = filtered_diseases D2_list = filtered_diseases for D1 in D1_list: for D2 in D2_list: if options.disease1 == None or options.disease2 == None: if D1 >= D2: continue print "-" * 80 print "D1= %s, D2= %s," % (D1, D2), print "overlap_type= %s" % options.overlap_type # Independent (no genetic overlap) model indep_log_likelihood = (__compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, "independent", options.threshold_type, options.prevalence_file, options.norm_prval_method)) min_log_likelihood = indep_log_likelihood - 1.0 # Allow genetic overlap model overlap_log_likelihood = (__compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, options.overlap_type, options.threshold_type, options.prevalence_file, options.norm_prval_method, min_log_likelihood)) log_likelihood_ratio = 2.0 * (overlap_log_likelihood - indep_log_likelihood) # Degree of freedoms of the chi-square distribution. dof = 1 # p-value is the area at the right tail of the chi-square # distribution p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof) text = "%s, %s, %s, " % (D1, D2, options.overlap_type) text += "%.3E, " % overlap_log_likelihood text += "%.3E, " % indep_log_likelihood text += "%.3E, %.3E" % (log_likelihood_ratio, p_value) print "overlap_LL= %.2E," % overlap_log_likelihood, print "indep_LL= %.2E," % indep_log_likelihood, print "LLR= %.2E," % log_likelihood_ratio, print "p_value= %.2E" % p_value print "-" * 80 print f.write(text + '\n') f.close()