def main(): global options D1 = options.disease1 D2 = options.disease2 # Import EMR data into database database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) # Instantiate the OptimizeLogLikelihood class opt_log_likelihood = OptimizeLogLikelihood(options.verbose) opt_log_likelihood.set_opt_method(options.opt_method) opt_log_likelihood.set_use_random_seed(options.use_random_seed) opt_log_likelihood.setup_log_likelihood_func(database, D1, D2, options.tau1, options.tau2, options.overlap_type, options.threshold_type, options.prevalence_file, options.norm_prval_method) # Get optimized parameters _, optimized_param, _ = opt_log_likelihood.run() # Compute optimization paths optimization_paths = [] for n in range(options.num_paths): _, _, path = opt_log_likelihood.run(save_path=True) optimization_paths.append(path) log_likelihood_func = opt_log_likelihood.get_log_likelihood_func() plot = __plot_contour(log_likelihood_func, optimized_param, optimization_paths, options.tau1, options.tau2, options.overlap_type, options.threshold_type, options.norm_prval_method, options.verbose) plt.show()
def main(): global options database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) diseases = database.get_diseases() diseases.sort() # Create plot for specified disease pair or for all disease pairs with # non-zero patient counts. if options.disease1 != None and options.disease2 != None: funcs = __get_funcs(database, options.disease1, options.disease2, options.verbose) plt = __plot_figure(funcs, options.disease1, options.disease2, options.verbose) plt.show() else: filtered_diseases = __filter_diseases(diseases, database) for D1 in filtered_diseases: for D2 in filtered_diseases: if D1 == D2: continue funcs = __get_funcs(database, D1, D2, options.verbose) plt = __plot_figure(funcs, D1, D2, options.verbose) plt.close() # close current figure
def main(): global options database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) diseases = database.get_diseases() diseases.sort() # Create plot for the specified disease or all diseases with non-zero # patient counts. if options.disease != None: M_func, F_func, M_count, F_count = __get_funcs(database, options.disease) plt = __plot_figure(M_func, F_func, M_count, F_count, options.disease, options.verbose) plt.show() else: filtered_diseases = __filter_diseases(diseases, database) for D in filtered_diseases: M_func, F_func, M_count, F_count = __get_funcs(database, D) plt = __plot_figure(M_func, F_func, M_count, F_count, D, options.verbose) plt.close()
def main(): global options # Import EMR data into database database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) # Instantiate the OptimizeLogLikelihood class opt_log_likelihood = OptimizeLogLikelihood(options.verbose) opt_log_likelihood.set_use_random_seed(options.use_random_seed) f = open("p_value.txt", "w") f.write("# emr_data_file = %s\n" % options.emr_data_file) f.write("# norm_prval_method = %s\n" % options.norm_prval_method) f.write("# threshold_type = %s\n" % options.threshold_type) f.write("\n") f.write("# D1, D2, overlap_type, ") f.write("overlap_LL, indepedent_LL, ") f.write("LLR, p_value\n") # Compute p-value for specified disease pair or for all disease pairs with # non-zero patient counts. if options.disease1 != None and options.disease2 != None: D1_list = [options.disease1] D2_list = [options.disease2] else: diseases = database.get_diseases() diseases.sort() filtered_diseases = __filter_diseases(diseases, database) D1_list = filtered_diseases D2_list = filtered_diseases for D1 in D1_list: for D2 in D2_list: if options.disease1 == None or options.disease2 == None: if D1 >= D2: continue print "-" * 80 print "D1= %s, D2= %s," % (D1, D2), print "overlap_type= %s" % options.overlap_type # Independent (no genetic overlap) model indep_log_likelihood = __compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, "independent", options.threshold_type, options.prevalence_file, options.norm_prval_method, ) min_log_likelihood = indep_log_likelihood - 1.0 # Allow genetic overlap model overlap_log_likelihood = __compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, options.overlap_type, options.threshold_type, options.prevalence_file, options.norm_prval_method, min_log_likelihood, ) log_likelihood_ratio = 2.0 * (overlap_log_likelihood - indep_log_likelihood) # Degree of freedoms of the chi-square distribution. dof = 1 # p-value is the area at the right tail of the chi-square # distribution p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof) text = "%s, %s, %s, " % (D1, D2, options.overlap_type) text += "%.3E, " % overlap_log_likelihood text += "%.3E, " % indep_log_likelihood text += "%.3E, %.3E" % (log_likelihood_ratio, p_value) print "overlap_LL= %.2E," % overlap_log_likelihood, print "indep_LL= %.2E," % indep_log_likelihood, print "LLR= %.2E," % log_likelihood_ratio, print "p_value= %.2E" % p_value print "-" * 80 print f.write(text + "\n") f.close()
def main(): global options database = EMRDatabase(options.verbose) database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) diseases = [ "Alzheimer's disease", "Attention deficit", "Autism", "Breast cancer (female)", "Epilepsy", "Schizophrenia" ] D1 = "Autism" D2 = "Epilepsy" D3 = "Breast cancer (female)" # Query for patients that have D1 D1_patients = database.query_emr_data(D1) # Query for patients that have D2 D2_patients = database.query_emr_data(D2) # Query for patients that have both D1 and D2 D1andD2_patients = database.query_emr_data([D1, D2]) # Query for patients that have either D1 or D2 D1orD2_patients = database.query_emr_data([D1, D2], OR_match=True) # Query for patients that have D1 and not D2 D1notD2_patients = database.query_emr_data([D1, "not " + D2]) # Query for patients that have D2 and not D1 D2notD1_patients = database.query_emr_data(["not " + D1, D2]) # Query for patients that have neither D2 nor D1 noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2]) # Query for male patients with female Breast cancer D3_male_patients = database.query_emr_data(D3, gender_filters="M") # Query for female patients with female Breast cancer D3_female_patients = database.query_emr_data(D3, gender_filters="F") print "Test EMR_database queries:" print "%-6d patients with %s" % (len(D1_patients), D1) print "%-6d patients with %s" % (len(D2_patients), D2) print "%-6d patients with %s and %s" % (len(D1andD2_patients), D1, D2) print "%-6d patients with %s or %s" % (len(D1orD2_patients), D1, D2) print "%-6d patients with %s and no %s" % (len(D1notD2_patients), D1, D2) print "%-6d patients with %s and no %s" % (len(D2notD1_patients), D2, D1) print "%-6d patients with no %s and no %s" % (len(noD1D2_patients), D1, D2) print "%-6d male patients with %s" % (len(D3_male_patients), D3) print "%-6d female patients with %s" % (len(D3_female_patients), D3) if options.verbose: print "Patients with %s:" % D1 for patient in D1_patients: print "patient %s --> %s" % (patient, D1_patients[patient]) print print "Patients with %s:" % D2 for patient in D2_patients: print "patient %s --> %s" % (patient, D2_patients[patient]) print print "Patients with %s and %s:" % (D1, D2) for patient in D1andD2_patients: print "patient %s --> %s" % (patient, D1andD2_patients[patient]) print print "Patients with %s or %s:" % (D1, D2) for patient in D1orD2_patients: print "patient %s --> %s" % (patient, D1orD2_patients[patient]) print print "Patients with %s and no %s:" % (D2, D1) for patient in D2notD1_patients: print "patient %s --> %s" % (patient, D2notD1_patients[patient]) print print "Patients with %s and no %s:" % (D1, D2) for patient in D1notD2_patients: print "patient %s --> %s" % (patient, D1notD2_patients[patient]) print print "Patients with no %s and no %s:" % (D1, D2) for patient in noD1D2_patients: print "patient %s --> %s" % (patient, noD1D2_patients[patient]) print print "Male patients with %s:" % (D3) for patient in D3_male_patients: print "patient %s --> %s" % (patient, D3_male_patients[patient]) print print "Female patients with %s:" % (D3) for patient in D3_female_patients: print "patient %s --> %s" % (patient, D3_female_patients[patient]) print # Find occurrences of 6 diseases. print print "Occurrences of the 6 diseases:" for n1, D1 in enumerate(diseases): D1_patients = database.query_emr_data(D1) print "%-6d patients with %s" % (len(D1_patients), D1) if options.verbose: print "Patients with %s:" % D1 for patient in D1_patients: print "patient %s --> %s" % (patient, D1_patients[patient]) print # Find comorbidities between the 6 diseases. print print "Comorbidities between the 6 diseases:" for n1, D1 in enumerate(diseases): for n2, D2 in enumerate(diseases): if n1 >= n2: continue D1andD2_patients = database.query_emr_data([D1, D2]) print("%-6d patients with %s and %s" % (len(D1andD2_patients), D1, D2)) if options.verbose: print "Patients with both %s and %s:" % (D1, D2) for patient in D1andD2_patients: print("patient %s --> %s" % (patient, D1andD2_patients[patient])) print
def main(): global options database = EMRDatabase(options.verbose) database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) diseases = ["Alzheimer's disease", "Attention deficit", "Autism", "Breast cancer (female)", "Epilepsy", "Schizophrenia"] D1 = "Autism" D2 = "Epilepsy" D3 = "Breast cancer (female)" # Query for patients that have D1 D1_patients = database.query_emr_data(D1) # Query for patients that have D2 D2_patients = database.query_emr_data(D2) # Query for patients that have both D1 and D2 D1andD2_patients = database.query_emr_data([D1, D2]) # Query for patients that have either D1 or D2 D1orD2_patients = database.query_emr_data([D1, D2], OR_match = True) # Query for patients that have D1 and not D2 D1notD2_patients = database.query_emr_data([D1, "not " + D2]) # Query for patients that have D2 and not D1 D2notD1_patients = database.query_emr_data(["not " + D1, D2]) # Query for patients that have neither D2 nor D1 noD1D2_patients = database.query_emr_data(["not " + D1, "not " + D2]) # Query for male patients with female Breast cancer D3_male_patients = database.query_emr_data(D3 , gender_filters = "M") # Query for female patients with female Breast cancer D3_female_patients = database.query_emr_data(D3, gender_filters = "F") print "Test EMR_database queries:" print "%-6d patients with %s" %(len(D1_patients), D1) print "%-6d patients with %s" %(len(D2_patients), D2) print "%-6d patients with %s and %s" %(len(D1andD2_patients), D1, D2) print "%-6d patients with %s or %s" %(len(D1orD2_patients), D1, D2) print "%-6d patients with %s and no %s" %(len(D1notD2_patients), D1, D2) print "%-6d patients with %s and no %s" %(len(D2notD1_patients), D2, D1) print "%-6d patients with no %s and no %s" %(len(noD1D2_patients), D1, D2) print "%-6d male patients with %s" %(len(D3_male_patients), D3) print "%-6d female patients with %s" %(len(D3_female_patients), D3) if options.verbose: print "Patients with %s:" % D1 for patient in D1_patients: print "patient %s --> %s" % (patient, D1_patients[patient]) print print "Patients with %s:" % D2 for patient in D2_patients: print "patient %s --> %s" % (patient, D2_patients[patient]) print print "Patients with %s and %s:" % (D1, D2) for patient in D1andD2_patients: print "patient %s --> %s" % (patient, D1andD2_patients[patient]) print print "Patients with %s or %s:" % (D1, D2) for patient in D1orD2_patients: print "patient %s --> %s" % (patient, D1orD2_patients[patient]) print print "Patients with %s and no %s:" % (D2, D1) for patient in D2notD1_patients: print "patient %s --> %s" % (patient, D2notD1_patients[patient]) print print "Patients with %s and no %s:" % (D1, D2) for patient in D1notD2_patients: print "patient %s --> %s" % (patient, D1notD2_patients[patient]) print print "Patients with no %s and no %s:" % (D1, D2) for patient in noD1D2_patients: print "patient %s --> %s" % (patient, noD1D2_patients[patient]) print print "Male patients with %s:" % (D3) for patient in D3_male_patients: print "patient %s --> %s" % (patient, D3_male_patients[patient]) print print "Female patients with %s:" % (D3) for patient in D3_female_patients: print "patient %s --> %s" % (patient, D3_female_patients[patient]) print # Find occurrences of 6 diseases. print print "Occurrences of the 6 diseases:" for n1, D1 in enumerate(diseases): D1_patients = database.query_emr_data(D1) print "%-6d patients with %s" %(len(D1_patients), D1) if options.verbose: print "Patients with %s:" % D1 for patient in D1_patients: print "patient %s --> %s" % (patient, D1_patients[patient]) print # Find comorbidities between the 6 diseases. print print "Comorbidities between the 6 diseases:" for n1, D1 in enumerate(diseases): for n2, D2 in enumerate(diseases): if n1 >= n2: continue D1andD2_patients = database.query_emr_data([D1, D2]) print ("%-6d patients with %s and %s" %(len(D1andD2_patients), D1, D2)) if options.verbose: print "Patients with both %s and %s:" % (D1, D2) for patient in D1andD2_patients: print ("patient %s --> %s" % (patient, D1andD2_patients[patient])) print
def main(): global options # Import EMR data into database database = EMRDatabase() database.import_data(options.emr_data_file, options.diseases_file, options.code2disease_file) # Instantiate the OptimizeLogLikelihood class opt_log_likelihood = OptimizeLogLikelihood(options.verbose) opt_log_likelihood.set_use_random_seed(options.use_random_seed) f = open("p_value.txt", 'w') f.write("# emr_data_file = %s\n" % options.emr_data_file) f.write("# norm_prval_method = %s\n" % options.norm_prval_method) f.write("# threshold_type = %s\n" % options.threshold_type) f.write("\n") f.write("# D1, D2, overlap_type, ") f.write("overlap_LL, indepedent_LL, ") f.write("LLR, p_value\n") # Compute p-value for specified disease pair or for all disease pairs with # non-zero patient counts. if options.disease1 != None and options.disease2 != None: D1_list = [options.disease1] D2_list = [options.disease2] else: diseases = database.get_diseases() diseases.sort() filtered_diseases = __filter_diseases(diseases, database) D1_list = filtered_diseases D2_list = filtered_diseases for D1 in D1_list: for D2 in D2_list: if options.disease1 == None or options.disease2 == None: if D1 >= D2: continue print "-" * 80 print "D1= %s, D2= %s," % (D1, D2), print "overlap_type= %s" % options.overlap_type # Independent (no genetic overlap) model indep_log_likelihood = (__compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, "independent", options.threshold_type, options.prevalence_file, options.norm_prval_method)) min_log_likelihood = indep_log_likelihood - 1.0 # Allow genetic overlap model overlap_log_likelihood = (__compute_log_likelihood_wrapper( opt_log_likelihood, database, D1, D2, options.tau1, options.tau2, options.overlap_type, options.threshold_type, options.prevalence_file, options.norm_prval_method, min_log_likelihood)) log_likelihood_ratio = 2.0 * (overlap_log_likelihood - indep_log_likelihood) # Degree of freedoms of the chi-square distribution. dof = 1 # p-value is the area at the right tail of the chi-square # distribution p_value = 1.0 - chi2.cdf(log_likelihood_ratio, dof) text = "%s, %s, %s, " % (D1, D2, options.overlap_type) text += "%.3E, " % overlap_log_likelihood text += "%.3E, " % indep_log_likelihood text += "%.3E, %.3E" % (log_likelihood_ratio, p_value) print "overlap_LL= %.2E," % overlap_log_likelihood, print "indep_LL= %.2E," % indep_log_likelihood, print "LLR= %.2E," % log_likelihood_ratio, print "p_value= %.2E" % p_value print "-" * 80 print f.write(text + '\n') f.close()