def __init__(self, args, ps, cache, staticDir, assembly): self.args = args self.ps = ps self.cache = cache self.staticDir = staticDir self.pgSearch = PGsearch(ps, assembly) self.assembly = assembly self.actions = {"geneTrack": self.geneTrack, "trackhub": self.trackhub}
def __init__(self, cache, ps, assembly, gene, ct1, ct2): self.cache = cache self.ps = ps self.assembly = assembly self.gene = gene self.ct1 = ct1 self.ct2 = ct2 self.pgSearch = PGsearch(ps, assembly) self.pos = None self.halfWindow = 250 * 1000 * 2 self.thres = 1.64 self.radiusScale = 10
def __init__(self, args, ps, cache, staticDir, assembly): self.args = args self.ps = ps self.cache = cache self.staticDir = staticDir self.assembly = assembly self.pgSearch = PGsearch(ps, assembly) self.allBiosampleTypes = set([ "cell line", "induced pluripotent stem cell line", "in vitro differentiated cells", "primary cell", "stem cell", "tissue" ]) self.actions = {"search": self.search}
def ucsc_trackhub_url_snp(self, j, uuid): assembly = self.assembly = j["assembly"] pgSearch = PGsearch(self.pw, assembly) snp = j["snp"] c = Coord(snp["chrom"], snp["cre_start"], snp["cre_end"]) hubNum = self.db.insertOrUpdate(assembly, snp["accession"], uuid, j) trackhubUrl = '/'.join([j["host"], "ucsc_trackhub", uuid, "hub_" + str(hubNum) + ".txt"]) url = "https://genome.ucsc.edu/cgi-bin/hgTracks?" url += "db=" + assembly url += "&position=" + str(c) url += "&hubClear=" + trackhubUrl url += "&highlight=" + assembly + "." + c.chrom + "%3A" + str(snp["snp_start"]) + '-' + str(snp["snp_end"]) if "hg19" == assembly: url += "&g=wgEncodeGencodeV19" url += "&g=phastCons100way" return {"url": url, "trackhubUrl": trackhubUrl}
class GenomeBrowserWebService(object): def __init__(self, args, ps, cache, staticDir, assembly): self.args = args self.ps = ps self.cache = cache self.staticDir = staticDir self.pgSearch = PGsearch(ps, assembly) self.assembly = assembly self.actions = {"geneTrack": self.geneTrack, "trackhub": self.trackhub} def process(self, j, args, kwargs): action = args[0] try: return self.actions[action](j, args[1:]) except: raise def geneTrack(self, j, args): chrom = checkChrom(self.assembly, j) results = self.pgSearch.geneTable(j, chrom, j.get("coord_start", None), j.get("coord_end", None)) return results def trackhub(self, j, args): return []
def _trackhub_url_info(self, j): assembly = self.assembly = j["assembly"] pgSearch = PGsearch(self.pw, assembly) if "coord_start" not in j: cre = CRE(pgSearch, j["accession"], self.cacheW[assembly]) coord = cre.coord() else: coord = Coord(j["coord_chrom"], j["coord_start"], j["coord_end"]) coord.resize(j["halfWindow"]) return assembly, j["accession"], coord
def __init__(self, args, pw, cache, staticDir, assembly): self.args = args self.pw = pw self.cache = cache self.staticDir = staticDir self.assembly = assembly self.pgSearch = PGsearch(pw, assembly) self.pgGlobal = GlobalPG(pw, assembly) self.pgFantomCat = PGFantomCat(pw, assembly) self.actions = { "cre_table": self.cre_table, "cre_tf_dcc": self.cre_tf_dcc, "cre_histone_dcc": self.cre_histone_dcc, "re_detail": self.re_detail, "bed_download": self.bed_download, "json_download": self.json_download, "global_object": self.global_object, "global_fantomcat": self.global_fantomcat, "global_liftover": self.global_liftover, "rampage": self.rampage, "gwas_json_download": self.gwas_json_download, "home_inputData": self.home_inputData, "ground_level_versions": self.ground_level } self.reDetailActions = { "topTissues": self._re_detail_topTissues, "nearbyGenomic": self._re_detail_nearbyGenomic, "fantom_cat": self.fantom_cat, "ortholog": self._ortholog, "tfIntersection": self._re_detail_tfIntersection, "cistromeIntersection": self._re_detail_cistromeIntersection, "rampage": self._re_detail_rampage, "linkedGenes": self._re_detail_linkedGenes, "miniPeaks": self._re_detail_miniPeaks, "groundLevel": self._re_detail_groundlevel }
class DE: def __init__(self, cache, ps, assembly, gene, ct1, ct2): self.cache = cache self.ps = ps self.assembly = assembly self.gene = gene self.ct1 = ct1 self.ct2 = ct2 self.pgSearch = PGsearch(ps, assembly) self.pos = None self.halfWindow = 250 * 1000 * 2 self.thres = 1.64 self.radiusScale = 10 def coord(self): if not self.pos: self.pos, self.names = self.pgSearch.genePos(self.gene) if not self.pos: raise Exception("invalid pos for " + self.gene) return self.pos def _parseCE(self, typ, c): radius = float(c[2] - c[1]) / 2 return { "center": radius + c[1], "value": round(float(c[4] - c[3]), 3), "typ": typ, "width": 4, "accession": c[0], "start": c[1], "stop": c[2], "len": c[2] - c[1] } def _nearbyPromoters(self): rmLookup = self.cache.rankMethodToIDxToCellType["H3K4me3"] if self.ct1 not in rmLookup or self.ct2 not in rmLookup: return [] ct1PromoterIdx = rmLookup[self.ct1] ct2PromoterIdx = rmLookup[self.ct2] cols = [ self.assembly + "_cre_all.accession AS accession", "start", "stop", "h3k4me3_zscores[%s]" % ct1PromoterIdx, "h3k4me3_zscores[%s]" % ct2PromoterIdx ] cres = self.pgSearch.nearbyCREs(self.coord(), 2 * self.halfWindow, cols, True) ret = [] for c in [x for x in cres if "PLS" in self.cache.groups[x[0]]]: if c[3] > self.thres or c[4] > self.thres: ret.append(self._parseCE("promoter-like signature", c)) return ret def _nearbyEnhancers(self): rmLookup = self.cache.rankMethodToIDxToCellType["H3K27ac"] if self.ct1 not in rmLookup or self.ct2 not in rmLookup: return [] ct1EnhancerIdx = rmLookup[self.ct1] ct2EnhancerIdx = rmLookup[self.ct2] cols = [ self.assembly + "_cre_all.accession AS accession", "start", "stop", "h3k27ac_zscores[%s]" % ct1EnhancerIdx, "h3k27ac_zscores[%s]" % ct2EnhancerIdx ] cres = self.pgSearch.nearbyCREs(self.coord(), 2 * self.halfWindow, cols, False) cres += self.pgSearch.nearbyCREs(self.coord(), 2 * self.halfWindow, cols, True) ret = [] for c in [x for x in cres if "ELS" in self.cache.groups[x[0]]]: if c[3] > self.thres or c[4] > self.thres: ret.append(self._parseCE("enhancer-like signature", c)) return ret def diffCREs(self, xdomain): xstart = xdomain[0] xstop = xdomain[1] ret = self._nearbyPromoters() + self._nearbyEnhancers() ret = [x for x in ret if x["start"] >= xstart and x["stop"] <= xstop] return {"data": ret} def _genesInRegion(self, start, stop): pos = self.coord() return self.pgSearch.genesInRegion(pos.chrom, int(start), int(stop)) def _DEsForDisplay(self, nearbyDEs): ret = [] for d in nearbyDEs: genename, strand = self.cache.lookupEnsembleGene(d[3]) ret.append({ "fc": round(float(d[2]), 3), "gene": genename, "start": d[0], "stop": d[1], "strand": strand, "sstart": "{:,} ({})".format(d[0], strand) }) return ret def nearbyDEs(self): # limb_14.5 from C57BL-6_limb_embryo_14.5_days ct1 = self.ct1.replace("C57BL/6_", "").replace("embryo_", "").replace( "_days", "").replace("postnatal_", "") ct2 = self.ct2.replace("C57BL/6_", "").replace("embryo_", "").replace( "_days", "").replace("postnatal_", "") cd = self.coord() pg = PGde(self.pgSearch.pw, self.assembly) nearbyDEs = pg.nearbyDEs(cd, self.halfWindow, ct1, ct2, 0.05) if not nearbyDEs: return {"data": None, "xdomain": None} # center on middle of DEs cxdomain = [ max(0, min([d[0] for d in nearbyDEs])), max([d[1] for d in nearbyDEs]) ] center = float(cxdomain[1] - cxdomain[0]) / 2 + cxdomain[0] halfWindow = max(self.halfWindow, (cxdomain[1] - cxdomain[0]) / 2.0) # widen each side xdomain = [max(0, center - halfWindow), center + halfWindow] genes = self._genesInRegion(min(xdomain[0], cxdomain[0]), max(xdomain[1], cxdomain[1])) ret = self._DEsForDisplay(nearbyDEs) return { "names": self.names, "data": ret, "xdomain": xdomain, "genes": genes, "ymin": min([d["fc"] for d in ret]), "ymax": max([d["fc"] for d in ret]) }
class GeneExpWebService(object): def __init__(self, args, ps, cache, staticDir, assembly): self.args = args self.ps = ps self.cache = cache self.staticDir = staticDir self.assembly = assembly self.pgSearch = PGsearch(ps, assembly) self.allBiosampleTypes = set([ "cell line", "induced pluripotent stem cell line", "in vitro differentiated cells", "primary cell", "stem cell", "tissue" ]) self.actions = {"search": self.search} def process(self, j, args, kwargs): action = args[0] try: return self.actions[action](j, args[1:]) except: raise def search(self, j, args): def abort(err): return {"hasData": False, "items": {}, "err": err} compartments = j["compartments_selected"] assay_name = j["assay_name"] if "assay_name" in j else None biosample_types_selected = j["biosample_types_selected"] if not biosample_types_selected: return abort("no biosample type selected") if not set(biosample_types_selected).issubset(self.allBiosampleTypes): return abort("invalid biosample type") # TODO: check value of compartments if not compartments: return abort("no compartments") gene = j["gene"] # TODO: check for valid gene gi = self.pgSearch.geneInfo(gene) if not gi: return {"assembly": self.assembly, "gene": gene} name = gi.approved_symbol strand = gi.strand cge = GeneExpression(self.ps, self.cache, self.assembly) r = {"assembly": self.assembly, "gene": gene} for assay_name in ["total RNA-seq", "polyA RNA-seq", "all"]: single = cge.computeHorBars( name, compartments, biosample_types_selected, assay_name if assay_name != "all" else None) mean = cge.computeHorBarsMean( name, compartments, biosample_types_selected, assay_name if assay_name != "all" else None) itemsByRID = cge.itemsByRID r[assay_name] = { "assembly": self.assembly, "gene": name, "strand": strand, "ensemblid_ver": gi.ensemblid_ver, "coords": { "chrom": gi.chrom, "start": gi.start, "stop": gi.stop }, "single": single, "mean": mean, "itemsByRID": itemsByRID } return r
class DataWebService(): def __init__(self, args, pw, cache, staticDir, assembly): self.args = args self.pw = pw self.cache = cache self.staticDir = staticDir self.assembly = assembly self.pgSearch = PGsearch(pw, assembly) self.pgGlobal = GlobalPG(pw, assembly) self.pgFantomCat = PGFantomCat(pw, assembly) self.actions = { "cre_table": self.cre_table, "cre_tf_dcc": self.cre_tf_dcc, "cre_histone_dcc": self.cre_histone_dcc, "re_detail": self.re_detail, "bed_download": self.bed_download, "json_download": self.json_download, "global_object": self.global_object, "global_fantomcat": self.global_fantomcat, "global_liftover": self.global_liftover, "rampage": self.rampage, "gwas_json_download": self.gwas_json_download, "home_inputData": self.home_inputData, "ground_level_versions": self.ground_level } self.reDetailActions = { "topTissues": self._re_detail_topTissues, "nearbyGenomic": self._re_detail_nearbyGenomic, "fantom_cat": self.fantom_cat, "ortholog": self._ortholog, "tfIntersection": self._re_detail_tfIntersection, "cistromeIntersection": self._re_detail_cistromeIntersection, "rampage": self._re_detail_rampage, "linkedGenes": self._re_detail_linkedGenes, "miniPeaks": self._re_detail_miniPeaks, "groundLevel": self._re_detail_groundlevel } def process(self, j, args, kwargs): action = args[0] try: return self.actions[action](j, args[1:]) except: raise def ground_level(self, j, args): results = self.pgSearch.versions() r = {} for result in results: result = { "accession": result[0], "biosample": result[1], "assay": result[2], "version": result[3] } if result["version"] not in r: r[result["version"]] = {} if result["biosample"] not in r[result["version"]]: r[result["version"]][result["biosample"]] = {} if result["assay"] not in r[result["version"]][ result["biosample"]]: r[result["version"]][result["biosample"]][result["assay"]] = [] r[result["version"]][result["biosample"]][result["assay"]].append( result["accession"]) return r def _ortholog(self, j, accession): if j["assembly"] != "mm10": mm10 = Ortholog(self.pw, self.assembly, accession, "mm10") hg19 = Ortholog(self.pw, self.assembly, accession, "hg19") return { accession: { "ortholog": mm10.as_dict(), "hg19": hg19.as_dict() } } hg38 = Ortholog(self.pw, "mm10", accession, "GRCh38").as_dict() hg19 = [] hg19accs = set() for ortholog in hg38: for result in Ortholog(self.pw, "GRCh38", ortholog["accession"], "hg19").as_dict(): if result["accession"] not in hg19accs: hg19accs.add(result["accession"]) hg19.append(result) return {accession: {"ortholog": hg38, "hg19": hg19}} def global_liftover(self, j, args): retval = { "saturation": { self.assembly: self.global_object({"name": "saturation"}, args), "GRCh38": self.external_global_object({"name": "saturation"}, args, "GRCh38"), "GRCh38_encode_cistrome": self.external_global_object( {"name": "saturation_encode_cistrome"}, args, "GRCh38") }, "cistrome_encode": {} } for a in ["hg19", "GRCh38"]: for b in ["hg19", "GRCh38"]: retval["%s_%s" % (a, b)] = self.global_object( {"name": "liftOver_%s_%s" % (a, b)}, args) retval["cistrome_encode_%s" % a] = self.global_object( {"name": "encode_cistrome_%s" % a}, args) return retval def global_fantomcat(self, j, args): return { "main": self.global_object({"name": "fantomcat"}, args), "fantomcat_2kb": self.global_object({"name": "fantomcat_2kb"}, args) } def ctcf_distr(self, j, args): result = self.global_object({"name": "ctcf_density_10000"}, args) if j["chr"] not in result: raise Exception("data_ws$DataWS::ctcf_distr: chr %s not valid" % j["chr"]) return { "data": { "results": result[j["chr"]], "tads": [[x[0] / 10000, x[1] / 10000] for x in self.tads.get_chrom_btn(j["biosample"], j["chr"])] } } def global_object(self, j, args): return self.pgGlobal.select(j["name"]) def external_global_object(self, j, args, assembly): return self.pgGlobal.select_external(j["name"], assembly) def cre_table(self, j, args): chrom = checkChrom(self.assembly, j) results = self.pgSearch.creTable(j, chrom, j.get("coord_start", None), j.get("coord_end", None)) lookup = self.cache.geneIDsToApprovedSymbol for r in results["cres"]: genesp, genesa = CRE(self.pgSearch, r["info"]["accession"], self.cache).nearbyGenesPA() r["genesallpc"] = { "all": genesa, "pc": genesp, "accession": r["info"]["accession"] } if "cellType" in j and j["cellType"]: results["rfacets"] = self.pgSearch.rfacets_active(j) else: results["rfacets"] = ["dnase", "promoter", "enhancer", "ctcf"] results["cts"] = self.pgSearch.haveSCT(j) return results def re_detail(self, j, args): action = args[0] if action not in self.reDetailActions: raise Exception("unknown action") return self.reDetailActions[action](j, j["accession"]) def tfenrichment(self, j, args): a = j["tree_nodes_compare"] tree_rank_method = j["tree_rank_method"] return self.tfEnrichment.findenrichment(tree_rank_method, a[0], a[1]) def _re_detail_topTissues(self, j, accession): cre = CRE(self.pgSearch, accession, self.cache) ranks = cre.topTissues() return {accession: ranks} def fantom_cat(self, j, accession): def process(key): results = self.pgFantomCat.select_cre_intersections(accession, key) for result in results: result["other_names"] = result["genename"] if result[ "genename"] != result["geneid"] else "" if result["aliases"] != "": if result["other_names"] != "": result["other_names"] += ", " result["other_names"] += ", ".join( result["aliases"].split("|")) return results enhancers = [{ "chr": a, "start": int(b), "stop": int(c), "score": float(d) } for a, b, c, d in self.pgFantomCat.select_enhancers(accession)] cage = [{ "chr": a, "start": int(b), "stop": int(c), "strand": d, "score": float(e), "tssstart": int(f), "tssstop": int(g) } for a, b, c, d, e, f, g in self.pgFantomCat.select_cage(accession)] return { accession: { "fantom_cat": process("intersections"), "fantom_cat_twokb": process("twokb_intersections"), "enhancers": enhancers, "cage": cage } } def _re_detail_nearbyGenomic(self, j, accession): cre = CRE(self.pgSearch, accession, self.cache) coord = cre.coord() # with Timer("snps") as t: snps = cre.intersectingSnps(10000) # 10 KB # with Timer("nearbyCREs") as t: nearbyCREs = cre.distToNearbyCREs(1000000) # 1 MB # with Timer("nearbyGenes") as t: nearbyGenes = cre.nearbyGenes() # with Timer("genesInTad") as t: genesInTad = cre.genesInTad() # with Timer("re_cres") as t: re_tads = cre.cresInTad() vista = cre.vista() return { accession: { "nearby_genes": nearbyGenes, "tads": genesInTad, "re_tads": re_tads, "nearby_res": nearbyCREs, "overlapping_snps": snps, "vistaids": vista } } def _re_detail_tfIntersection(self, j, accession): cre = CRE(self.pgSearch, accession, self.cache) peakIntersectCount = cre.peakIntersectCount() return {accession: peakIntersectCount} def _re_detail_cistromeIntersection(self, j, accession): cre = CRE(self.pgSearch, accession, self.cache) peakIntersectCount = cre.peakIntersectCount(eset="cistrome") return {accession: peakIntersectCount} def _re_detail_linkedGenes(self, j, accession): cre = CRE(self.pgSearch, accession, self.cache) return {accession: {"linked_genes": cre.linkedGenes()}} def _re_detail_rampage(self, j, accession): cre = CRE(self.pgSearch, accession, self.cache) nearbyGenes = cre.nearbyPcGenes() nearest = min(nearbyGenes, key=lambda x: x["distance"]) rampage = Rampage(self.assembly, self.pgSearch, self.cache) ret = rampage.getByGene(nearest) return {accession: ret} def rampage(self, j, args): rampage = Rampage(self.assembly, self.pgSearch, self.cache) gene = j["gene"] ret = rampage.getByGeneApprovedSymbol(gene) return {gene: ret} def bed_download(self, j, args): cd = CREdownload(self.pgSearch, Config.downloadDir) return cd.bed(j) def gwas_json_download(self, j, args): j["uuid"] = str(uuid.uuid4()) cd = CREdownload(self.pgSearch, Config.downloadDir) return cd.gwas(j, j["uuid"]) def json_download(self, j, args): cd = CREdownload(self.pgSearch, Config.downloadDir) return cd.json(j) def cre_tf_dcc(self, j, args): accession = j.get("accession", None) if not accession: raise Exception("invalid accession") target = j.get("target", None) if not target: raise Exception("invalid target") return { target: self.pgSearch.tfTargetExps(accession, target, eset=j.get("eset", None)) } def cre_histone_dcc(self, j, args): accession = j.get("accession", None) if not accession: raise Exception("invalid accession") target = j.get("target", None) if not target: raise Exception("invalid target") return { target: self.pgSearch.histoneTargetExps(accession, target, eset=j.get("eset", None)) } def _re_detail_miniPeaks(self, j, accession): nbins = Config.minipeaks_nbins ver = Config.minipeaks_ver mp = MiniPeaks(self.assembly, self.pgSearch, self.cache, nbins, ver) rows, accessions = mp.getMinipeaksForAssays( ["dnase", "h3k27ac", "h3k4me3"], [accession]) return {accession: {"rows": rows, "accessions": accessions}} def _re_detail_groundlevel(self, j, accession): cre = CRE(self.pgSearch, accession, self.cache) coord = cre.coord() def _dreq(url): return requests.get( url % (coord.chrom, coord.start, coord.end)).json()["results"]["all"] return { accession: { k: _dreq("https://api.wenglab.org/peaksws/GRCh38/" + k + "/search/%s/%d/%d") for k in ["tf", "histone", "dnase", "3dinteractions", "cdhs"] } } def home_inputData(self, j, args): home = PGHome(self.ps) return home.inputData()