def map_ag_for_proposed_algo(antigen): alleles = ag_to_allele_dict[antigen] set_alleles = [] for allele in alleles: new_allele = allele_truncate(allele) set_alleles.append(new_allele) alleles_mapped = list(set(set_alleles)) return alleles_mapped
def allele_list_ags(allele_list, pop): ag_freq = 0.0 allele_antigen_freq = {} for allele in allele_list: allele = allele.split("+")[0] allele = allele.rstrip("g p P G") allele = hla.allele_truncate(allele) if allele in population_allele_frequencies[pop]: ag_freq = population_allele_frequencies[pop][allele] else: ag_freq = 0.0 ag = allele_to_ag_dict[allele][0] bw46 = allele_to_ag_dict[allele][2] allele_antigen = ag if allele_antigen in allele_antigen_freq.keys(): allele_antigen_freq[allele_antigen] += float(ag_freq) else: allele_antigen_freq[allele_antigen] = float(ag_freq) print (allele + " " + ag + " " + str(ag_freq)) TF = sum(allele_antigen_freq.values()) if TF == 0.0: TF = 1 else: TF = TF for i,j in allele_antigen_freq.items(): ag_probs = j/TF #ag_probs = round(ag_probs, 4) allele_antigen_freq[i] = ag_probs sorted_af = sorted(allele_antigen_freq.items(), key = operator.itemgetter(1), reverse = True) top_ag = sorted_af[0][0] top_af = sorted_af[0][1] return (top_ag, bw46, sorted_af)
ua_ag = row_split[0] ua_ag_eqs = row_split[1:] ua_ag_eqs = list(filter(None, ua_ag_eqs)) UA_eq_dict[ua_ag] = ua_ag_eqs #print(UA_eq_dict) UNOS_conversion_table_filename = "conversion_table.csv" UNOS_conversion_table_file = open(UNOS_conversion_table_filename, 'r') for row in UNOS_conversion_table_file: expression_character = "" if row.startswith("Allele"): continue else: allele = row.split(',')[0] allele_4d = hla.allele_truncate(allele) antigen = row.split(',')[1] if antigen in ag_to_allele_dict.keys(): if allele_4d in ag_to_allele_dict[antigen]: continue else: ag_to_allele_dict[antigen].append(allele_4d) else: ag_to_allele_dict[antigen] = [allele_4d] #print(ag_to_allele_dict) final_dict = {}
def genotype_ags(genotype_list, pop): ag_freq_1 = 0.0 ag_freq_2 = 0.0 geno_antigen_freq = {} for genotype in genotype_list: allele_1 = genotype.split("+")[0] allele_1 = allele_1.rstrip("g") allele_1 = hla.allele_truncate(allele_1) allele_2 = genotype.split("+")[1] allele_2 = allele_2.rstrip("g") allele_2 = hla.allele_truncate(allele_2) ag_1 = allele_to_ag_dict[allele_1][0] bw46_1 = allele_to_ag_dict[allele_1][2] ag_2 = allele_to_ag_dict[allele_2][0] bw46_2 = allele_to_ag_dict[allele_2][2] if allele_1 in population_allele_frequencies[pop]: ag_freq_1 = population_allele_frequencies[pop][allele_1] if allele_2 in population_allele_frequencies[pop]: ag_freq_2 = population_allele_frequencies[pop][allele_2] gf = 0 if (ag_1 == ag_2): gf = float(ag_freq_1) * float(ag_freq_2) else: gf = 2 * float(ag_freq_1) * float(ag_freq_2) geno_antigen = ag_1 + "+" + ag_2 if geno_antigen in geno_antigen_freq.keys(): geno_antigen_freq[geno_antigen] += float(gf) else: geno_antigen_freq[geno_antigen] = float(gf) TF = sum(geno_antigen_freq.values()) if TF == 0.0: TF = 1 else: TF = TF for i, j in geno_antigen_freq.items(): ag_probs = j / TF geno_antigen_freq[i] = ag_probs print(geno_antigen_freq) sorted_gf = sorted(geno_antigen_freq.items(), key=operator.itemgetter(1), reverse=True) print(sorted_gf) #if len(sorted_gf) == 1: #ag_prob = 1 #else: #antigen_list = sortef_gf[1::2] top_ag_geno = sorted_gf[0][0] top_gf = sorted_gf[0][1] #print(top_ag_geno) ag_1 = top_ag_geno.split("+")[0] ag_2 = top_ag_geno.split("+")[1] #print(ag_1) #print(ag_2) ag_list = ag_1 + "," + ag_2 bw46_list = bw46_1 + "," + bw46_2 return (ag_list, bw46_list, sorted_gf)
def genotype_ags(genotype_list, pop): """ This functions assigns the most common antigens to a genotype list from locus""" ag_freq_1 = 0.0 ag_freq_2 = 0.0 geno_antigen_freq = {} for genotype in genotype_list: allele_1 = genotype.split("+")[0] allele_1 = allele_1.rstrip("g p P G") allele_1 = hla.allele_truncate(allele_1) allele_2 = genotype.split("+")[1] allele_2 = allele_2.rstrip("g p P G") allele_2 = hla.allele_truncate(allele_2) ag_1 = allele_to_ag_dict[allele_1][0] bw46_1 = allele_to_ag_dict[allele_1][2] ag_2 = allele_to_ag_dict[allele_2][0] bw46_2 = allele_to_ag_dict[allele_2][2] if allele_1 in population_allele_frequencies[pop]: ag_freq_1 = population_allele_frequencies[pop][allele_1] else: ag_freq_1 = 0.0 if allele_2 in population_allele_frequencies[pop]: ag_freq_2 = population_allele_frequencies[pop][allele_2] else: ag_freq_2 = 0.0 gf = 0 if (ag_1 == ag_2): gf = float(ag_freq_1) * float(ag_freq_2) else: gf = 2 * float(ag_freq_1) * float(ag_freq_2) geno_antigen = ag_1 + "+" + ag_2 # print (allele_1 + " " + allele_2 + " " + ag_1 + " " + ag_2 + " " + str(ag_freq_1) + " " + str(ag_freq_2) + " " + str(gf)) if geno_antigen in geno_antigen_freq.keys(): geno_antigen_freq[geno_antigen] += float(gf) else: geno_antigen_freq[geno_antigen] = float(gf) TF = sum(geno_antigen_freq.values()) if TF == 0.0: TF = 1 else: TF = TF for i,j in geno_antigen_freq.items(): ag_probs = j/TF #ag_probs = round(ag_probs, 4) geno_antigen_freq[i] = ag_probs #print(geno_antigen_freq) sorted_gf = sorted(geno_antigen_freq.items(), key = operator.itemgetter(1), reverse = True) #print(sorted_gf) #if len(sorted_gf) == 1: #ag_prob = 1 #else: #antigen_list = sortef_gf[1::2] top_ag_geno = sorted_gf[0][0] top_gf = sorted_gf[0][1] #print(top_ag_geno) ag_1 = top_ag_geno.split("+")[0] ag_2 = top_ag_geno.split("+")[1] #print(ag_1) #print(ag_2) ag_list = ag_1 + "," + ag_2 #print(ag_list) bw46_list = bw46_1 + "," + bw46_2 #print(bw46_list) return (ag_list, bw46_list, sorted_gf)