def id2rs_spdi(varid, build="38"): ''' THIS METHOD IS NOT RELIABLE FOR INDELS AS SEVERAL DIFFERENT SPDI NOTATIONS CAN BE EQUIVALENT For a given variant ID (chr_pos_A1_A2), return a set of matching rs IDs Variant ID is converted to SPDI which is used in a variant_recoder query Input: variant ID, build (default: 38) Output: set of rs IDs ''' S = set() if utils.isRS(varid): return {varid} if not utils.checkID(varid): LOGGER.error("Variant ID %s is malformed" % varid) return S # convert ID to dict V = utils.convertVariantID(varid) V1 = utils.convertVariantID(varid, reverse=True) # get SPDI records spdi = utils.var2spdi(V) spdi1 = utils.var2spdi(V1) r = query.restQuery(query.makeRSQueryURL(spdi, build=build), quiet=True) # r is a list of dicts if not r is None: LOGGER.debug("Got results for %s" % (str(V))) LOGGER.debug("\n%s" % json.dumps(r, indent=4, sort_keys=True)) for x1 in r: for x2 in x1: if "id" in x1[x2]: for rs in x1[x2]["id"]: S.add(rs) else: LOGGER.debug("No results for %s" % (str(V))) r = query.restQuery(query.makeRSQueryURL(spdi1, build=build), quiet=True) if not r is None: LOGGER.debug("Got results for %s" % (str(V1))) LOGGER.debug("\n%s" % json.dumps(r, indent=4, sort_keys=True)) for x1 in r: for x2 in x1: if "id" in x1[x2]: for rs in x1[x2]["id"]: S.add(rs) else: LOGGER.debug("No results for %s" % (str(V1))) return S
def addPhenotypesToRSList(rsIDs, build="38"): LOGGER.debug("Input rs list: %d variants" % len(rsIDs)) R = dict() # exclude possible NAs first for L in utils.chunks(list(filter(lambda x: x != "NA", rsIDs)), config.VARIATION_POST_MAX): r = query.restQuery(query.makeRSPhenotypeQueryURL(build=build), data=utils.list2string(L), qtype="post") if not r is None: LOGGER.debug( "\n=== phenotype query ====\n%s\n==========================\n" % json.dumps(r, indent=4, sort_keys=True)) for v in r: if not v in rsIDs: continue if "phenotypes" in r[v]: R[v] = set([ x["trait"] for x in list( filter( lambda x: not re.search( "phenotype\s+not\s+specified", x["trait"]), r[v]["phenotypes"])) ]) else: R[v] = set() for v in set(rsIDs) - (set(R.keys()) - {"NA"}): R[v] = set() return R
def addConsequencesToIDList(varIDs, build="38", most_severe_only=False, gene_key="gene_id"): LOGGER.debug("Input ID list: %d variants" % len(varIDs)) R = dict() # double check, make sure IDs have correct format for L in utils.chunks(list(filter(utils.checkID, varIDs)), config.VEP_POST_MAX // 2): h = {"variants": []} for varid in L: V = utils.convertVariantID(varid) if utils.checkDEL(V, build=build): h["variants"].append(utils.variant2vep(V)) else: V = utils.convertVariantID(varid, reverse=True) if utils.checkDEL(V, build=build): h["variants"].append(utils.variant2vep(V)) r = query.restQuery(query.makeVepListQueryURL(build=build), data=json.dumps(h), qtype="post") if not r is None: LOGGER.debug( "\n======= VEP query ========\n%s\n==========================\n" % json.dumps(r, indent=4, sort_keys=True)) for x in r: rs = x["id"] mcsq = x[ "most_severe_consequence"] if "most_severe_consequence" in x else "NA" H = dict() if "transcript_consequences" in x: for g in x["transcript_consequences"]: H.setdefault(g[gene_key], []).extend(g["consequence_terms"]) for g in H: H[g] = utils.getMostSevereConsequence(H[g]) else: H["NA"] = mcsq if most_severe_only is True: if mcsq == "NA": R[rs] = {"NA": "NA"} else: g0 = "NA" for g in H: if H[g] == mcsq: g0 = g R[rs] = {g0: mcsq} else: R[rs] = H s = set(varIDs) - (set(R.keys()) - {"NA"}) LOGGER.debug("No consequences found for %d IDs" % len(s)) for v in s: R[v] = {"NA": "NA"} return R
def rs2spdi(ID, build="38"): L = [] z = query.restQuery(query.makeRSQueryURL(ID, build=build)) if z: LOGGER.debug("\n%s" % json.dumps(z, indent=4, sort_keys=True)) for x in z: for x1 in x: if "spdi" in x[x1]: spdis = x[x1]["spdi"] for spdi in spdis: if not spdi in L: L.append(spdi) return L
def rsList2position(L, build="38", alleles=False): ''' Input: list of rsID, build (default: 38), alleles=True/False (if we need alleles as well) Output: a dictionary rsID --> [{"chr":c,"pos":p}, ...], or None if query fails ''' D = {} data = utils.list2string(L) url = query.makeRSListQueryURL(build=build) z = query.restQuery(url, qtype="post", data=data) if z: for x in z: inputID = x["input"] D[inputID] = [] spdis = x["spdi"] for spdi in spdis: h = query.parseSPDI(spdi, build=build, alleles=alleles) p = h["pos"] c = h["chr"] ref = h["ref"] alt = h["alt"] z = None if alleles: z = next( (x for x in D[inputID] if x["chr"] == c and x["pos"] == p and x["ref"] == ref and x["alt"] == alt), None) else: z = next((x for x in D[inputID] if x["chr"] == c and x["pos"] == p), None) if not z: D[inputID].append({ "chr": c, "pos": p, "ref": ref, "alt": alt }) # in case some input IDs are missing in the response # for ID in L: # if not ID in D: # D[ID]=[{"chr":None,"pos":None,"ref":None,"alt":None}] else: return None return D
def addConsequencesToRSList(rsIDs, build="38", most_severe_only=False, gene_key="gene_id"): LOGGER.debug("Input rs list: %d variants" % len(rsIDs)) R = dict() # exclude possible NAs from the input list first for L in utils.chunks(list(filter(lambda x: x != "NA", rsIDs)), config.VEP_POST_MAX): r = query.restQuery(query.makeVepRSListQueryURL(build=build), data=utils.list2string(L), qtype="post") if not r is None: LOGGER.debug( "\n======= VEP query ========\n%s\n==========================\n" % json.dumps(r, indent=4, sort_keys=True)) for x in r: rs = x["id"] mcsq = x[ "most_severe_consequence"] if "most_severe_consequence" in x else "NA" H = dict() if "transcript_consequences" in x: for g in x["transcript_consequences"]: H.setdefault(g[gene_key], []).extend(g["consequence_terms"]) for g in H: H[g] = utils.getMostSevereConsequence(H[g]) else: H["NA"] = mcsq if most_severe_only is True: if mcsq == "NA": R[rs] = {"NA": "NA"} else: g0 = "NA" for g in H: if H[g] == mcsq: g0 = g R[rs] = {g0: mcsq} else: R[rs] = H s = set(rsIDs) - (set(R.keys()) - {"NA"}) LOGGER.debug("No consequences found for %d rs IDs" % len(s)) for v in s: R[v] = {"NA": "NA"} return R
def rs2position(ID, build="38", alleles=False): ''' Given rsID, return a list of dictionaries with keys "chr", "pos" Input: rsID, build (default: 38), alleles=True/False (if we need alleles as well) Output: a list of dictionaries with keys "chr", "pos", or None if query fails ''' L = [] z = query.restQuery(query.makeRSQueryURL(ID, build=build)) if z: print(json.dumps(z, indent=4, sort_keys=True)) for x in z: for x1 in x: spdis = x[x1]["spdi"] for spdi in spdis: LOGGER.debug("SPDI: %s" % spdi) h = query.parseSPDI(spdi, build=build, alleles=alleles) p = h["pos"] c = h["chr"] ref = h["ref"] alt = h["alt"] LOGGER.debug("%s:%d:%s:%s" % (c, p, ref, alt)) z = None if alleles: z = next( (x for x in L if x["chr"] == c and x["pos"] == p and x["ref"] == ref and x["alt"] == alt), None) else: z = next( (x for x in L if x["chr"] == c and x["pos"] == p), None) if not z: L.append({"chr": c, "pos": p, "ref": ref, "alt": alt}) else: return None return L
def id2rs_mod2(varid, build="38"): ''' For a given variant ID (chr_pos_A1_A2), return a set of matching rs IDs Input: variant ID, build (default: 38) Output: set of rs IDs ''' S = set() if utils.isRS(varid): return {varid} if not utils.checkID(varid): LOGGER.error("Variant ID %s is malformed" % varid) return S V = utils.convertVariantID(varid) V1 = utils.convertVariantID(varid, reverse=True) window = max(len(V["del"]), len(V["ins"])) if utils.getVarType(V) == "SNP": r = query.restQuery( query.makeOverlapVarQueryURL(V["seq"], V["pos"], V["pos"], build=build)) if not r: return S for v in r: if V["del"] in v["alleles"] and V["ins"] in v["alleles"] and v[ "strand"] == 1 and v["start"] == v["end"]: S.add(v["id"]) else: r = query.restQuery( query.makeOverlapVarQueryURL(V["seq"], V["pos"] - window, V["pos"] + window, build=build)) if not r: return S LOGGER.debug("Got %d variants around %s:%d\n" % (len(r), V["seq"], V["pos"])) LOGGER.debug("\n%s" % json.dumps(r, indent=4, sort_keys=True)) # only save indel IDs in L L = [] for v in r: if "alleles" in v and "id" in v: for a in v["alleles"]: if a == "-" or len(a) > 1: L.append(v["id"]) break if len(L) == 0: LOGGER.debug("No indels found") return S LOGGER.debug("%d indels found: %s" % (len(L), str(L))) # TODO: check if L is larger than allowed POST size z1 = query.restQuery(query.makeRSListQueryURL(build=build), qtype="post", data=utils.list2string(L)) LOGGER.debug( "\n=======================\n%s\n==========================\n" % json.dumps(z1, indent=4, sort_keys=True)) LOGGER.debug("---------- CHECK START ----------------\n") for v in z1: for x1 in v: if "spdi" in v[x1] and "id" in v[x1]: var = v[x1]["id"][0] spdis = v[x1]["spdi"] for spdi in spdis: V2 = utils.convertSPDI(spdi, build=build) LOGGER.debug("SPDI: %s; V2: %s" % (spdi, V2)) if utils.equivalentVariants(V, V2, build=build): S.add(var) break if utils.equivalentVariants(V1, V2, build=build): S.add(var) break LOGGER.debug("----------- CHECK END -----------------\n") return S
def id2rs_list(varIDs, build="38", skip_non_rs=False, keep_all=True): H = dict() R = dict() # TODO: check ID validity and if it's an rsID # trying fast method first LOGGER.debug("Input variant list: %d elements" % len(varIDs)) c = 0 t = 2 * len(varIDs) // config.VARIATION_POST_MAX if t % 2: t = t + 1 for L in utils.chunks(varIDs, config.VARIATION_POST_MAX // 2): L1 = list() for x in L: # TODO: checks spdi = utils.var2spdi(utils.convertVariantID(x)) H[spdi] = x L1.append(spdi) spdi = utils.var2spdi(utils.convertVariantID(x, reverse=True)) H[spdi] = x L1.append(spdi) r = None while r is None: r = query.restQuery(query.makeRSListQueryURL(build=build), data=utils.list2string(L1), qtype="post") if r is None: LOGGER.debug("Retrying") for x1 in r: for x2 in x1: if "id" in x1[x2]: v = H[x1[x2]["input"]] if not v in R: R[v] = set() R[v].update(x1[x2]["id"]) else: R[v].update(x1[x2]["id"]) c += 1 LOGGER.debug("Chunk %d (%d) done" % (c, t)) LOGGER.debug("Found rsIDs for %d variants using fast method" % len(R.keys())) # slow method for unmapped unmapped = list(set(varIDs) - set(R.keys())) LOGGER.debug("Using slow method for %d variants" % len(unmapped)) for v in unmapped: R[v] = id2rs_mod2(v, build) if skip_non_rs == True: LOGGER.debug("Filtering non rs IDs") for v in R: s = set(filter(utils.isRS, R[v])) if len(s) == 0: R[v] = {"NA"} else: R[v] = s if not keep_all is True: LOGGER.debug("Keeping only one rs ID") c = 0 for v in R: if len(R[v]) > 1: z = R[v].pop() R[v] = {z} c += 1 LOGGER.debug("Truncated %d sets" % c) return R
help="varID", required=True) if len(sys.argv[1:]) == 0: parser.print_help() sys.exit(0) try: args = parser.parse_args() except: parser.print_help() sys.exit(0) if args.build != None: build = args.build rs = args.id logging.getLogger("variant").setLevel(logging.DEBUG) #--------------------------------------------------------------------------------------------------------------------------- data = query.restQuery(query.makeRsPhenotypeQuery2URL(rs, build)) L = list() if data: if "synonyms" in data: L = list(filter(lambda x: x != rs, data["synonyms"])) for x in L: print(x)
def id2rs_mod(varid, build="38"): ''' For a given variant ID (chr_pos_A1_A2), return a set of matching rs IDs Input: variant ID, build (default: 38) Output: set of rs IDs ''' S = set() if utils.isRS(varid): return {varid} if not utils.checkID(varid): LOGGER.error("Variant ID %s is malformed" % varid) return S batchsize = 100 V = utils.convertVariantID(varid) V1 = utils.convertVariantID(varid, reverse=True) b = utils.checkDEL(V, build=build) b1 = utils.checkDEL(V1, build=build) window = max(len(V["del"]), len(V["ins"])) if utils.getVarType(V) == "SNP": r = query.restQuery( query.makeOverlapVarQueryURL(V["seq"], V["pos"], V["pos"], build=build)) if not r: return S for v in r: if V["del"] in v["alleles"] and V["ins"] in v["alleles"] and v[ "strand"] == 1 and v["start"] == v["end"]: S.add(v["id"]) else: r = query.restQuery( query.makeOverlapVarQueryURL(V["seq"], V["pos"] - window, V["pos"] + window, build=build)) if not r: return S LOGGER.debug("\n%s" % json.dumps(r, indent=4, sort_keys=True)) LOGGER.debug("Got %d variants around %s:%d\n" % (len(r), V["seq"], V["pos"])) for v in r: LOGGER.debug("Current variant: %s" % v["id"]) z = query.restQuery(query.makeRSQueryURL(v["id"], build=build)) if not z: continue LOGGER.debug("\n%s" % json.dumps(z, indent=4, sort_keys=True)) for x in z: for x1 in x: spdis = x[x1]["spdi"] var = x[x1]["id"][0] for spdi in spdis: LOGGER.debug("SPDI: %s" % spdi) V2 = utils.convertSPDI(spdi, build=build) LOGGER.debug("V2: %s" % V2) if b: if utils.equivalentVariants(V, V2, build=build): S.add(var) break if b1: if utils.equivalentVariants(V1, V2, build=build): S.add(var) break return S
def id2rs(varid, build="38"): ''' For a given variant ID (chr_pos_A1_A2), return a set of matching rs IDs Input: variant ID, build (default: 38) Output: set of rs IDs ''' S = set() if varid.startswith("rs"): return varid m = re.search("^(\d+)_(\d+)_([ATGC]+)_([ATGC]+)", varid) if not m: LOGGER.error("%s is malformed" % varid) return S chrom = m.group(1) pos = int(m.group(2)) a1 = m.group(3) a2 = m.group(4) batchsize = 100 if len(a1) == 1 and len(a2) == 1: # SNP r = query.restQuery( query.makeOverlapVarQueryURL(chrom, pos, pos, build=build)) if not r: return S for v in r: if a1 in v["alleles"] and a2 in v["alleles"]: S.add(v["id"]) else: # in case of indels, pull all variants around the variant's position window = max(len(a1), len(a2)) r = query.restQuery( query.makeOverlapVarQueryURL(chrom, pos - window, pos + window, build=build)) if not r: return S for v in r: z = query.restQuery(query.makeRSQueryURL(v["id"], build=build)) if not z: continue for x in z: spdis = x["spdi"] var = x["id"][0] for spdi in spdis: h = query.parseSPDI(spdi, alleles=True) ref = h["ref"] alt = h["alt"] p = h["pos"] c = h["chr"] LOGGER.debug("%s : %s : %s %d %s %s" % (var, spdi, c, p, ref, alt)) #print(spdi) #print(c,p,ref,alt,sep="\t") if p != pos: continue if len(ref) == 1 and len(alt) == 1: continue if (ref == a1 and alt == a2) or (ref == a2 and alt == a1): S.add(var) break return S
def getVariantInfo(rs, build="38"): ''' For a given variant ID, return a dictionary with variant information; keys are: "minor_allele" "MAF" "rsID" "class" : variant class "synonyms" : list of synonym IDs "consequence" : most severe consequence "mappings" : list of mapping dictionaries with keys: "chr", "pos", "ref", "alt", "polyphen_score", "polyphen_prediction", "sift_score", "sift_preddiction" "population_data" : list of dictionaries "population":{"allele":"frequency"} (from phase 3 of 1KG) "phenotype_data" : list of dictionaries with keys "trait", "source", "risk_allele" "clinical_significance" : list of clinical significance terms "scores" : dictionary mapping "chr:pos" string to a dictionary with keys "avg_gerp", "gerp", "gwava" ''' res = dict() # in case provided ID is not an RS if not utils.isRS(rs): t = utils.splitID(rs) if t: # TODO: check if ref/alt mappings are correct: compare to reference sequence return { "minor_allele": None, "MAF": None, "rsID": None, "class": rs, "synonyms": [], "consequence": None, "mappings": [{ "chr": t["chr"], "pos": t["pos"], "ref": t["a1"], "alt": t["a2"], "polyphen_score": "NA", "polyphen_prediction": "NA", "sift_score": "NA", "sift_prediction": "NA" }, { "chr": t["chr"], "pos": t["pos"], "ref": t["a2"], "alt": t["a1"], "polyphen_score": "NA", "polyphen_prediction": "NA", "sift_score": "NA", "sift_prediction": "NA" }], "population_data": None, "phenotype_data": None, "clinical_significance": None, "scores": None } else: return None #------------------- general information --------------- data = query.restQuery(query.makeRsPhenotypeQuery2URL(rs, build)) #print(json.dumps(data,indent=4,sort_keys=True)) if not data: return None res["minor_allele"] = data["minor_allele"] if re.search("[01]\.\d+", str(data["MAF"])): res["MAF"] = str(data["MAF"]) else: res["MAF"] = "NA" res["rsID"] = rs res["class"] = data["var_class"] res["consequence"] = data["most_severe_consequence"] if "synonyms" in data: res["synonyms"] = list(filter(lambda x: x != rs, data["synonyms"])) else: res["synonyms"] = [] #------------------- mappings---------------------- mappings = list() z = query.restQuery(query.makeRSQueryURL(rs, build=build)) if z is None: return None for x in z: spdis = x["spdi"] for spdi in spdis: h = query.parseSPDI(spdi, alleles=True) ref = h["ref"] alt = h["alt"] p = h["pos"] c = h["chr"] mappings.append({ "chr": c, "pos": p, "ref": ref, "alt": alt, "sift_score": "NA", "sift_prediction": "NA", "polyphen_score": "NA", "polyphen_prediction": "NA" }) #------------------ population data ---------------- population_data = list() for pop in data["populations"]: pop_name = pop["population"].split(":") if pop_name[0] == "1000GENOMES" and pop_name[1] == "phase_3": name = pop_name[2] try: z = next(x for x in population_data if name == x["population"]) z["frequency"][pop["allele"]] = pop["frequency"] except: population_data.append({ "population": name, "frequency": { pop["allele"]: pop["frequency"] } }) #------------------ phenotype data ------------------- phenotype_data = list() for p in data["phenotypes"]: trait = p["trait"] if "trait" in p else "NA" source = p["source"] if "source" in p else "NA" risk = p["risk_allele"] if "risk_allele" in p else "NA" if trait: phenotype_data.append({ "trait": trait, "source": source, "risk_allele": risk }) #------------------ clinical significance ------------------- clinical_significance = list() if "clinical_significance" in data: for cs in data["clinical_significance"]: if cs != "other" and cs != "not provided": clinical_significance.append(cs) #---------------- chr:pos dependent scores ----------------- scores = dict() for m in mappings: #scores[m["chr"]+":"+str(m["pos"])]={"avg_gerp":"NA","gerp":"NA","gwava":"NA"} scores[m["chr"] + ":" + str(m["pos"])] = {"gwava": "NA"} #----------------------------------------------------- res["mappings"] = mappings res["population_data"] = population_data res["phenotype_data"] = phenotype_data res["clinical_significance"] = clinical_significance res["scores"] = scores return res
def getVariantsWithPhenotypes(chrom, pos, window=config.PHENO_WINDOW, build="38"): ''' For a given genomic region, return dataframe containing variants with phenotype annotations Input: chromosome, position, window (default: config.PHENO_WINDOW), build (default: "38") Output: pandas dataframe with columns: "ID", "Consequence", "Location", "Phenotype", "Source", "Distance" ''' start = pos - window end = pos + window if start < 1: start = 1 empty_df = pd.DataFrame(columns=[ "ID", "Consequence", "Location", "Phenotype", "Source", "Distance" ]) if end - start > 5000000: LOGGER.error("Maximal region size allowed: 5Mbp") return empty_df LOGGER.debug("%s:%d; window: %d" % (chrom, pos, window)) variants = query.restQuery(query.makePhenoOverlapQueryURL(chrom, start, end, build=build), qtype="get") #print(json.dumps(variants,indent=4,sort_keys=True)) if not variants: return empty_df if len(variants) == 0: LOGGER.info( "No variants with phenotypes were found in the region %s:%d-%d" % (chrom, start, end)) return empty_df rsIDs = list() for var in variants: rsIDs.append(var["id"]) if len(rsIDs) == 0: LOGGER.info( "No variants with phenotypes were found in the region %s:%d-%d" % (chrom, start, end)) return empty_df else: LOGGER.info( "%d variant(s) with phenotypes were found in the region %s:%d-%d" % (len(rsIDs), chrom, start, end)) output = [] i = 0 df = pd.DataFrame(columns=[ "ID", "Consequence", "Location", "Phenotype", "Source", "Link" ]) for L in utils.chunks(rsIDs, config.VARIATION_POST_MAX): r = query.restQuery(query.makeRSPhenotypeQueryURL(build=build), data=utils.list2string(L), qtype="post") if r: #print(json.dumps(r,indent=4,sort_keys=True)) for rsID in r: for phenotype in r[rsID]["phenotypes"]: m = re.search("phenotype\s+not\s+specified", phenotype["trait"]) if m: continue x = next((m for m in r[rsID]["mappings"] if m["seq_region_name"] == chrom), None) if not x: continue link = utils.makeLink(config.ENSEMBL_PHENO_URL % rsID, "ENSEMBL") if phenotype["source"] == "ClinVar": link = utils.makeLink(config.CLINVAR_URL + rsID, "ClinVar") elif phenotype["source"] == "NHGRI-EBI GWAS catalog": link = utils.makeLink(config.NHGRI_URL + rsID, "NHGRI-EBI") df.loc[i] = [ rsID, r[rsID]["most_severe_consequence"].replace("_", " "), chrom + ":" + str(x["start"]), phenotype["trait"], phenotype["source"], link ] i += 1 return df
def f(ID): L = ID.split("_") L.insert(2, ID) return " ".join(L) + " . . ." #--------------------------------------------------------------------------------------------------------------------------- for L in utils.chunks([line.rstrip() for line in sys.stdin.readlines()], config.VEP_POST_MAX): string = "{\"variants\":[\"" + "\",\"".join(list(map(lambda x: f(x), L))) + "\"]}" LOGGER.debug("data: %s" % (string)) r = query.restQuery(query.makeVepListQueryURL(build=build), data=string, qtype="post") if r: print(json.dumps(r, indent=4, sort_keys=True)) for x in r: rsid = "NA" if "colocated_variants" in x: if "id" in x["colocated_variants"][0]: rsid = x["colocated_variants"][0]["id"] mcsq = x[ "most_severe_consequence"] if "most_severe_consequence" in x else "NA" H = {} if "transcript_consequences" in x: for g in x["transcript_consequences"]: gene_id = g["gene_id"] csq = g["consequence_terms"][0]
line_width=1, line_dash="dashed") label = Label(x=pos, y=e['logp'].max(), text=traits, angle=90, angle_units="deg", text_align="right", text_color="firebrick", text_font_size="11pt", text_font_style="italic") p.add_layout(label) overlapping_genes = query.restQuery( query.makeGeneOverlapQueryURL(str(e['#chr'][0]), e['ps'].min(), e['ps'].max(), build="38")) genes_df = pd.DataFrame(json.loads(json.dumps(overlapping_genes))) #print(json.dumps(overlapping_genes,indent=4,sort_keys=True)) #genes_df.to_csv(sys.stdout,sep="\t",index=False) # overlapping_GWASCAT_vars=query.restQuery(query.makeOverlapVarGWASCATQueryURL(str(e['#chr'][0]),e['ps'].min(),e['ps'].max(),build="38")) # cat=pd.DataFrame(json.loads(json.dumps(overlapping_GWASCAT_vars))) # print(cat) #print("") #cat.to_csv(sys.stdout,sep="\t",index=False) # TODO: max POST size # pheno_vars=query.restQuery(query.makeRSPhenotypeQueryURL(build="38"),data=utils.list2string(cat["id"].tolist()),qtype="post") # for rsid in pheno_vars:
'%(levelname)s - %(name)s - %(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S') ch.setFormatter(formatter) LOGGER.addHandler(ch) logging.getLogger("varannot.variant").addHandler(ch) logging.getLogger("varannot.variant").setLevel(logging.DEBUG) logging.getLogger("varannot.query").addHandler(ch) logging.getLogger("varannot.query").setLevel(logging.DEBUG) #--------------------------------------------------------------------------------------------------------------------------- if rs1 == rs2: sys.exit(0) data1 = query.restQuery(query.makeRsPhenotypeQuery2URL(rs1, build)) data2 = query.restQuery(query.makeRsPhenotypeQuery2URL(rs2, build)) L1 = list() L2 = list() if data1: if "synonyms" in data1: L1 = list(filter(lambda x: x != rs1, data1["synonyms"])) if rs2 in L1: sys.exit(0) if data2: if "synonyms" in data2: L2 = list(filter(lambda x: x != rs2, data2["synonyms"]))