Exemple #1
0
def main():
    # parse_cdna('972_973delinsAT')
    # # #print(get_cdna()[3810])
    # # #parse_cdna('4510_4535del')
    # # #print(get_cdna()[4509:4535])
    # exit()

    if len(argv) < 2:
        print(f"usage: {argv[0]} <input tsv>")
        exit()

    in_tsv = argv[1]
    cases = parse_in(in_tsv)

    db, cursor = abca4_connect()

    for case in cases:
        [
            pubmed_id, ref, patient_id, c1, p1, c2, p2, value, onset,
            progression_string
        ] = case
        print(case)
        # for allele1, allele 2
        allele_ids = []
        for [cdna_variants, protein_variants] in [[c1, p1], [c2, p2]]:
            # store or retrieve variant ids
            variant_ids = store_variants(cursor, cdna_variants,
                                         protein_variants)
            # store or retrieve allele id
            allele_id = store_allele(cursor, variant_ids)
            if not allele_id: panic(["no allele id for "] + variant_ids)
            allele_ids.append(allele_id)

        # store or retrieve publication id
        publication_id = store_publication(cursor, pubmed_id, ref)
        fixed_fields = {
            'publication_id': publication_id,
            'patient_xref_id': patient_id
        }
        update_fields = {
            'allele_id_1': allele_ids[0],
            'allele_id_2': allele_ids[1],
            'onset_age': onset,
            'acuity_type': 'decimal',
            'eye': 'better',
            'progression': progression_string
        }

        #store case: allele_id_1, allele_id_2, publication_id, patient_id, onset, value, better, progression
        store_or_update(cursor,
                        "cases",
                        fixed_fields=fixed_fields,
                        update_fields=update_fields)

        # print(fixed_fields)
        # print(update_fields)
        # print()
        #exit()
    cursor.close()
    db.close()
Exemple #2
0
def store_variants(cursor, c, p, verbose=False):
    cdna_vars = [v.replace(" ", "") for v in c.split(";")]
    protein_vars = [v.replace(" ", "") for v in p.split(";")]
    if len(cdna_vars) == 0:
        print(f"cdna vars empty  {c} {p}")
        exit()
    if len(protein_vars) == 0:
        print(f"proteins vars empty  {c} {p}")
        exit()

    if len(cdna_vars) != len(protein_vars):
        print(f"var lengths not equal  {c} {p}")
        exit()

    var_ids = []
    for i in range(len(cdna_vars)):
        cdna_var = cdna_vars[i].replace(" ", "")
        protein_var = protein_vars[i].replace(" ", "")
        if cdna_var == "np":
            ok = store_variant_w_cdna_uknown(cursor, protein_var, var_ids,
                                             verbose)
            if not ok: panic([cdna_vars, protein_vars])

        else:
            ok = store_variant_w_known_cdna(cursor, cdna_var, protein_var,
                                            var_ids, verbose)
            if not ok: panic([cdna_vars, protein_vars])

    if len(var_ids) != len(cdna_vars):
        print(f"missing variant id for {cdna_vars} {protein_vars} (?)")
        exit()
    return var_ids
def main():


	db, cursor = abca4_connect()

	# find cases with at least three progression points
	qry = "select id, allele_id_1, allele_id_2, onset_age, progression, publication_id from cases where  "
	qry += "(progression like '%:%:%:%' or (progression like '%:%:%' and onset_age is not null and onset_age>0)) "
	qry += "and (notes is null or notes not like '%caveat%')"

	# find the variants corresponding to those cases
	for [case_id, allele_id_1, allele_id_2, onset_age, progression, publication_id] in hard_landing_search(cursor,qry):
		params = {}
		variants = {}
		for ai in [allele_id_1, allele_id_2]:
			variants[ai] = hard_landing_search(cursor, f"select variant_ids from alleles where id={ai}")[0][0].strip("-").split("-")
			for v in variants[ai]:
				ret = error_intolerant_search(cursor, f"select * from parametrization_literature where variant_id={v}")
				if ret:
					if len(ret)>1: panic([f"multiple parametrization_literature for varid {v}"])
					params[v] = ret[0]
				else:
					# is this null by any chance?
					ret = error_intolerant_search(cursor, f"select * from parametrization where variant_id={v}")
					if ret:
						if len(ret)>1: panic([f"multiple parametrization for varid {v}"])
						[prm_id, var_id, e, t, notes] = ret[0]
						if e>0.001: continue # this is expressing
						params[v] = [prm_id, var_id, e, t, 0]

		if len(params)!=(len(variants[allele_id_1])+len(variants[allele_id_2])): continue

		# keep only if all variants have experimental  support
		# i am still not ready to deal with multiple variants per allele
		if len(variants[allele_id_1])>1 or len(variants[allele_id_2])>1: continue
		print()
		print(case_id, "onset age:", onset_age)
		print(progression, publication_id)
		for ai in [allele_id_1, allele_id_2]:
			print("\t", ai, variants[ai])
			for v in variants[ai]:
				print(f"\t\t variant {v}    {params[v]}")
		varid1 = variants[allele_id_1][0]
		varid2 = variants[allele_id_2][0]
		params1 = params[varid1][2:4]
		params2 = params[varid2][2:4]
		print(varid1, params1)
		print(varid2, params2)
		age, va = unpack_progression(progression)
		if onset_age and onset_age>0:
			age = [max(onset_age-1, 0)] + age
			va = [1.0] + va
		# plot_sim_results_vs_data(age, va, varid1, varid2, params1, params2, rpe_baseline)
		plot_sim_results_vs_data(age, va, varid1, varid2, params1, params2, 0.1)

	cursor.close()
	db.close()
Exemple #4
0
def store_publication(cursor, pubmed_id, reference):

    if (not pubmed_id or pubmed_id.lower() != "none") and not reference:
        panic(["null entry for publication"])

    if pubmed_id and pubmed_id.lower() != "none":
        return store_publication_by_pubmed_id(cursor, pubmed_id, reference)

    return store_publication_by_reference(cursor, reference)
Exemple #5
0
def store_publication_by_reference(cursor, reference):
    publication_id = None
    ret = error_intolerant_search(
        cursor, f"select id from publications where reference='{reference}'")
    if not ret:
        qry = f"insert into publications (reference) values ('{reference}')"
        if search_db(cursor, qry, verbose=True): exit()
        publication_id = hard_landing_search(
            cursor, "select max(id) from publications")[0][0]
    elif len(ret) > 1:
        panic(["multiple returns for", reference])
    else:
        publication_id = ret[0][0]
    return publication_id
Exemple #6
0
def store_allele(cursor, variant_ids):
    allele_id = None
    variant_signature = "-" + "-".join([str(i)
                                        for i in sorted(variant_ids)]) + "-"
    ret = error_intolerant_search(
        cursor,
        f"select id from alleles where variant_ids='{variant_signature}'")
    if not ret:
        qry = f"insert into alleles (variant_ids) values ('{variant_signature}')"
        if search_db(cursor, qry, verbose=True): exit()
        allele_id = hard_landing_search(cursor,
                                        "select max(id) from alleles")[0][0]
    elif len(ret) > 1:
        panic(["multiple returns for", variant_signature])
    else:
        allele_id = ret[0][0]
    return allele_id
Exemple #7
0
def store_publication(cursor, url, pmc, pubmed_id, ref):

    publication_id = None
    ret = error_intolerant_search(
        cursor, f"select id from publications where other_xref='{url}'")
    if not ret:
        qry = f"insert into publications (reference, other_xref) values ('{ref}', '{url}')"
        if search_db(cursor, qry, verbose=True): exit()
        publication_id = hard_landing_search(
            cursor, "select max(id) from publications")[0][0]
        if pubmed_id:
            qry = f"update publications set pubmed={pubmed_id} where id={publication_id}"
            if search_db(cursor, qry, verbose=True): exit()
        if pmc:
            qry = f"update publications set pubmedcentral='{pmc}' where id={publication_id}"
            if search_db(cursor, qry, verbose=True): exit()

    elif len(ret) > 1:
        panic(["multiple returns for", pubmed_id])
    else:
        publication_id = ret[0][0]
    return publication_id