def main(): # parse_cdna('972_973delinsAT') # # #print(get_cdna()[3810]) # # #parse_cdna('4510_4535del') # # #print(get_cdna()[4509:4535]) # exit() if len(argv) < 2: print(f"usage: {argv[0]} <input tsv>") exit() in_tsv = argv[1] cases = parse_in(in_tsv) db, cursor = abca4_connect() for case in cases: [ pubmed_id, ref, patient_id, c1, p1, c2, p2, value, onset, progression_string ] = case print(case) # for allele1, allele 2 allele_ids = [] for [cdna_variants, protein_variants] in [[c1, p1], [c2, p2]]: # store or retrieve variant ids variant_ids = store_variants(cursor, cdna_variants, protein_variants) # store or retrieve allele id allele_id = store_allele(cursor, variant_ids) if not allele_id: panic(["no allele id for "] + variant_ids) allele_ids.append(allele_id) # store or retrieve publication id publication_id = store_publication(cursor, pubmed_id, ref) fixed_fields = { 'publication_id': publication_id, 'patient_xref_id': patient_id } update_fields = { 'allele_id_1': allele_ids[0], 'allele_id_2': allele_ids[1], 'onset_age': onset, 'acuity_type': 'decimal', 'eye': 'better', 'progression': progression_string } #store case: allele_id_1, allele_id_2, publication_id, patient_id, onset, value, better, progression store_or_update(cursor, "cases", fixed_fields=fixed_fields, update_fields=update_fields) # print(fixed_fields) # print(update_fields) # print() #exit() cursor.close() db.close()
def store_variants(cursor, c, p, verbose=False): cdna_vars = [v.replace(" ", "") for v in c.split(";")] protein_vars = [v.replace(" ", "") for v in p.split(";")] if len(cdna_vars) == 0: print(f"cdna vars empty {c} {p}") exit() if len(protein_vars) == 0: print(f"proteins vars empty {c} {p}") exit() if len(cdna_vars) != len(protein_vars): print(f"var lengths not equal {c} {p}") exit() var_ids = [] for i in range(len(cdna_vars)): cdna_var = cdna_vars[i].replace(" ", "") protein_var = protein_vars[i].replace(" ", "") if cdna_var == "np": ok = store_variant_w_cdna_uknown(cursor, protein_var, var_ids, verbose) if not ok: panic([cdna_vars, protein_vars]) else: ok = store_variant_w_known_cdna(cursor, cdna_var, protein_var, var_ids, verbose) if not ok: panic([cdna_vars, protein_vars]) if len(var_ids) != len(cdna_vars): print(f"missing variant id for {cdna_vars} {protein_vars} (?)") exit() return var_ids
def main(): db, cursor = abca4_connect() # find cases with at least three progression points qry = "select id, allele_id_1, allele_id_2, onset_age, progression, publication_id from cases where " qry += "(progression like '%:%:%:%' or (progression like '%:%:%' and onset_age is not null and onset_age>0)) " qry += "and (notes is null or notes not like '%caveat%')" # find the variants corresponding to those cases for [case_id, allele_id_1, allele_id_2, onset_age, progression, publication_id] in hard_landing_search(cursor,qry): params = {} variants = {} for ai in [allele_id_1, allele_id_2]: variants[ai] = hard_landing_search(cursor, f"select variant_ids from alleles where id={ai}")[0][0].strip("-").split("-") for v in variants[ai]: ret = error_intolerant_search(cursor, f"select * from parametrization_literature where variant_id={v}") if ret: if len(ret)>1: panic([f"multiple parametrization_literature for varid {v}"]) params[v] = ret[0] else: # is this null by any chance? ret = error_intolerant_search(cursor, f"select * from parametrization where variant_id={v}") if ret: if len(ret)>1: panic([f"multiple parametrization for varid {v}"]) [prm_id, var_id, e, t, notes] = ret[0] if e>0.001: continue # this is expressing params[v] = [prm_id, var_id, e, t, 0] if len(params)!=(len(variants[allele_id_1])+len(variants[allele_id_2])): continue # keep only if all variants have experimental support # i am still not ready to deal with multiple variants per allele if len(variants[allele_id_1])>1 or len(variants[allele_id_2])>1: continue print() print(case_id, "onset age:", onset_age) print(progression, publication_id) for ai in [allele_id_1, allele_id_2]: print("\t", ai, variants[ai]) for v in variants[ai]: print(f"\t\t variant {v} {params[v]}") varid1 = variants[allele_id_1][0] varid2 = variants[allele_id_2][0] params1 = params[varid1][2:4] params2 = params[varid2][2:4] print(varid1, params1) print(varid2, params2) age, va = unpack_progression(progression) if onset_age and onset_age>0: age = [max(onset_age-1, 0)] + age va = [1.0] + va # plot_sim_results_vs_data(age, va, varid1, varid2, params1, params2, rpe_baseline) plot_sim_results_vs_data(age, va, varid1, varid2, params1, params2, 0.1) cursor.close() db.close()
def store_publication(cursor, pubmed_id, reference): if (not pubmed_id or pubmed_id.lower() != "none") and not reference: panic(["null entry for publication"]) if pubmed_id and pubmed_id.lower() != "none": return store_publication_by_pubmed_id(cursor, pubmed_id, reference) return store_publication_by_reference(cursor, reference)
def store_publication_by_reference(cursor, reference): publication_id = None ret = error_intolerant_search( cursor, f"select id from publications where reference='{reference}'") if not ret: qry = f"insert into publications (reference) values ('{reference}')" if search_db(cursor, qry, verbose=True): exit() publication_id = hard_landing_search( cursor, "select max(id) from publications")[0][0] elif len(ret) > 1: panic(["multiple returns for", reference]) else: publication_id = ret[0][0] return publication_id
def store_allele(cursor, variant_ids): allele_id = None variant_signature = "-" + "-".join([str(i) for i in sorted(variant_ids)]) + "-" ret = error_intolerant_search( cursor, f"select id from alleles where variant_ids='{variant_signature}'") if not ret: qry = f"insert into alleles (variant_ids) values ('{variant_signature}')" if search_db(cursor, qry, verbose=True): exit() allele_id = hard_landing_search(cursor, "select max(id) from alleles")[0][0] elif len(ret) > 1: panic(["multiple returns for", variant_signature]) else: allele_id = ret[0][0] return allele_id
def store_publication(cursor, url, pmc, pubmed_id, ref): publication_id = None ret = error_intolerant_search( cursor, f"select id from publications where other_xref='{url}'") if not ret: qry = f"insert into publications (reference, other_xref) values ('{ref}', '{url}')" if search_db(cursor, qry, verbose=True): exit() publication_id = hard_landing_search( cursor, "select max(id) from publications")[0][0] if pubmed_id: qry = f"update publications set pubmed={pubmed_id} where id={publication_id}" if search_db(cursor, qry, verbose=True): exit() if pmc: qry = f"update publications set pubmedcentral='{pmc}' where id={publication_id}" if search_db(cursor, qry, verbose=True): exit() elif len(ret) > 1: panic(["multiple returns for", pubmed_id]) else: publication_id = ret[0][0] return publication_id