コード例 #1
0
ファイル: gb_ws.py プロジェクト: wanliu2019/SCREEN
    def __init__(self, args, ps, cache, staticDir, assembly):
        self.args = args
        self.ps = ps
        self.cache = cache
        self.staticDir = staticDir
        self.pgSearch = PGsearch(ps, assembly)
        self.assembly = assembly

        self.actions = {"geneTrack": self.geneTrack, "trackhub": self.trackhub}
コード例 #2
0
ファイル: de.py プロジェクト: wanliu2019/SCREEN
    def __init__(self, cache, ps, assembly, gene, ct1, ct2):
        self.cache = cache
        self.ps = ps
        self.assembly = assembly
        self.gene = gene
        self.ct1 = ct1
        self.ct2 = ct2

        self.pgSearch = PGsearch(ps, assembly)
        self.pos = None

        self.halfWindow = 250 * 1000 * 2
        self.thres = 1.64
        self.radiusScale = 10
コード例 #3
0
    def __init__(self, args, ps, cache, staticDir, assembly):
        self.args = args
        self.ps = ps
        self.cache = cache
        self.staticDir = staticDir
        self.assembly = assembly
        self.pgSearch = PGsearch(ps, assembly)

        self.allBiosampleTypes = set([
            "cell line", "induced pluripotent stem cell line",
            "in vitro differentiated cells", "primary cell", "stem cell",
            "tissue"
        ])

        self.actions = {"search": self.search}
コード例 #4
0
ファイル: trackhub_ws.py プロジェクト: wanliu2019/SCREEN
    def ucsc_trackhub_url_snp(self, j, uuid):
        assembly = self.assembly = j["assembly"]
        pgSearch = PGsearch(self.pw, assembly)

        snp = j["snp"]
        c = Coord(snp["chrom"], snp["cre_start"], snp["cre_end"])

        hubNum = self.db.insertOrUpdate(assembly, snp["accession"], uuid, j)

        trackhubUrl = '/'.join([j["host"],
                                "ucsc_trackhub",
                                uuid,
                                "hub_" + str(hubNum) + ".txt"])

        url = "https://genome.ucsc.edu/cgi-bin/hgTracks?"
        url += "db=" + assembly
        url += "&position=" + str(c)
        url += "&hubClear=" + trackhubUrl
        url += "&highlight=" + assembly + "." + c.chrom + "%3A" + str(snp["snp_start"]) + '-' + str(snp["snp_end"])

        if "hg19" == assembly:
            url += "&g=wgEncodeGencodeV19"
            url += "&g=phastCons100way"

        return {"url": url, "trackhubUrl": trackhubUrl}
コード例 #5
0
ファイル: gb_ws.py プロジェクト: wanliu2019/SCREEN
class GenomeBrowserWebService(object):
    def __init__(self, args, ps, cache, staticDir, assembly):
        self.args = args
        self.ps = ps
        self.cache = cache
        self.staticDir = staticDir
        self.pgSearch = PGsearch(ps, assembly)
        self.assembly = assembly

        self.actions = {"geneTrack": self.geneTrack, "trackhub": self.trackhub}

    def process(self, j, args, kwargs):
        action = args[0]
        try:
            return self.actions[action](j, args[1:])
        except:
            raise

    def geneTrack(self, j, args):
        chrom = checkChrom(self.assembly, j)
        results = self.pgSearch.geneTable(j, chrom, j.get("coord_start", None),
                                          j.get("coord_end", None))
        return results

    def trackhub(self, j, args):
        return []
コード例 #6
0
ファイル: trackhub_ws.py プロジェクト: wanliu2019/SCREEN
    def _trackhub_url_info(self, j):
        assembly = self.assembly = j["assembly"]
        pgSearch = PGsearch(self.pw, assembly)

        if "coord_start" not in j:
            cre = CRE(pgSearch, j["accession"], self.cacheW[assembly])
            coord = cre.coord()
        else:
            coord = Coord(j["coord_chrom"], j["coord_start"], j["coord_end"])
        coord.resize(j["halfWindow"])

        return assembly, j["accession"], coord
コード例 #7
0
    def __init__(self, args, pw, cache, staticDir, assembly):
        self.args = args
        self.pw = pw
        self.cache = cache
        self.staticDir = staticDir
        self.assembly = assembly
        self.pgSearch = PGsearch(pw, assembly)
        self.pgGlobal = GlobalPG(pw, assembly)
        self.pgFantomCat = PGFantomCat(pw, assembly)

        self.actions = {
            "cre_table": self.cre_table,
            "cre_tf_dcc": self.cre_tf_dcc,
            "cre_histone_dcc": self.cre_histone_dcc,
            "re_detail": self.re_detail,
            "bed_download": self.bed_download,
            "json_download": self.json_download,
            "global_object": self.global_object,
            "global_fantomcat": self.global_fantomcat,
            "global_liftover": self.global_liftover,
            "rampage": self.rampage,
            "gwas_json_download": self.gwas_json_download,
            "home_inputData": self.home_inputData,
            "ground_level_versions": self.ground_level
        }

        self.reDetailActions = {
            "topTissues": self._re_detail_topTissues,
            "nearbyGenomic": self._re_detail_nearbyGenomic,
            "fantom_cat": self.fantom_cat,
            "ortholog": self._ortholog,
            "tfIntersection": self._re_detail_tfIntersection,
            "cistromeIntersection": self._re_detail_cistromeIntersection,
            "rampage": self._re_detail_rampage,
            "linkedGenes": self._re_detail_linkedGenes,
            "miniPeaks": self._re_detail_miniPeaks,
            "groundLevel": self._re_detail_groundlevel
        }
コード例 #8
0
ファイル: de.py プロジェクト: wanliu2019/SCREEN
class DE:
    def __init__(self, cache, ps, assembly, gene, ct1, ct2):
        self.cache = cache
        self.ps = ps
        self.assembly = assembly
        self.gene = gene
        self.ct1 = ct1
        self.ct2 = ct2

        self.pgSearch = PGsearch(ps, assembly)
        self.pos = None

        self.halfWindow = 250 * 1000 * 2
        self.thres = 1.64
        self.radiusScale = 10

    def coord(self):
        if not self.pos:
            self.pos, self.names = self.pgSearch.genePos(self.gene)
        if not self.pos:
            raise Exception("invalid pos for " + self.gene)
        return self.pos

    def _parseCE(self, typ, c):
        radius = float(c[2] - c[1]) / 2
        return {
            "center": radius + c[1],
            "value": round(float(c[4] - c[3]), 3),
            "typ": typ,
            "width": 4,
            "accession": c[0],
            "start": c[1],
            "stop": c[2],
            "len": c[2] - c[1]
        }

    def _nearbyPromoters(self):
        rmLookup = self.cache.rankMethodToIDxToCellType["H3K4me3"]
        if self.ct1 not in rmLookup or self.ct2 not in rmLookup:
            return []
        ct1PromoterIdx = rmLookup[self.ct1]
        ct2PromoterIdx = rmLookup[self.ct2]

        cols = [
            self.assembly + "_cre_all.accession AS accession", "start", "stop",
            "h3k4me3_zscores[%s]" % ct1PromoterIdx,
            "h3k4me3_zscores[%s]" % ct2PromoterIdx
        ]
        cres = self.pgSearch.nearbyCREs(self.coord(), 2 * self.halfWindow,
                                        cols, True)

        ret = []
        for c in [x for x in cres if "PLS" in self.cache.groups[x[0]]]:
            if c[3] > self.thres or c[4] > self.thres:
                ret.append(self._parseCE("promoter-like signature", c))
        return ret

    def _nearbyEnhancers(self):
        rmLookup = self.cache.rankMethodToIDxToCellType["H3K27ac"]
        if self.ct1 not in rmLookup or self.ct2 not in rmLookup:
            return []
        ct1EnhancerIdx = rmLookup[self.ct1]
        ct2EnhancerIdx = rmLookup[self.ct2]

        cols = [
            self.assembly + "_cre_all.accession AS accession", "start", "stop",
            "h3k27ac_zscores[%s]" % ct1EnhancerIdx,
            "h3k27ac_zscores[%s]" % ct2EnhancerIdx
        ]
        cres = self.pgSearch.nearbyCREs(self.coord(), 2 * self.halfWindow,
                                        cols, False)
        cres += self.pgSearch.nearbyCREs(self.coord(), 2 * self.halfWindow,
                                         cols, True)
        ret = []
        for c in [x for x in cres if "ELS" in self.cache.groups[x[0]]]:
            if c[3] > self.thres or c[4] > self.thres:
                ret.append(self._parseCE("enhancer-like signature", c))
        return ret

    def diffCREs(self, xdomain):
        xstart = xdomain[0]
        xstop = xdomain[1]
        ret = self._nearbyPromoters() + self._nearbyEnhancers()
        ret = [x for x in ret if x["start"] >= xstart and x["stop"] <= xstop]
        return {"data": ret}

    def _genesInRegion(self, start, stop):
        pos = self.coord()
        return self.pgSearch.genesInRegion(pos.chrom, int(start), int(stop))

    def _DEsForDisplay(self, nearbyDEs):
        ret = []
        for d in nearbyDEs:
            genename, strand = self.cache.lookupEnsembleGene(d[3])
            ret.append({
                "fc": round(float(d[2]), 3),
                "gene": genename,
                "start": d[0],
                "stop": d[1],
                "strand": strand,
                "sstart": "{:,} ({})".format(d[0], strand)
            })
        return ret

    def nearbyDEs(self):
        # limb_14.5 from C57BL-6_limb_embryo_14.5_days
        ct1 = self.ct1.replace("C57BL/6_", "").replace("embryo_", "").replace(
            "_days", "").replace("postnatal_", "")
        ct2 = self.ct2.replace("C57BL/6_", "").replace("embryo_", "").replace(
            "_days", "").replace("postnatal_", "")

        cd = self.coord()

        pg = PGde(self.pgSearch.pw, self.assembly)
        nearbyDEs = pg.nearbyDEs(cd, self.halfWindow, ct1, ct2, 0.05)

        if not nearbyDEs:
            return {"data": None, "xdomain": None}

        # center on middle of DEs
        cxdomain = [
            max(0, min([d[0] for d in nearbyDEs])),
            max([d[1] for d in nearbyDEs])
        ]
        center = float(cxdomain[1] - cxdomain[0]) / 2 + cxdomain[0]
        halfWindow = max(self.halfWindow, (cxdomain[1] - cxdomain[0]) / 2.0)

        # widen each side
        xdomain = [max(0, center - halfWindow), center + halfWindow]

        genes = self._genesInRegion(min(xdomain[0], cxdomain[0]),
                                    max(xdomain[1], cxdomain[1]))

        ret = self._DEsForDisplay(nearbyDEs)

        return {
            "names": self.names,
            "data": ret,
            "xdomain": xdomain,
            "genes": genes,
            "ymin": min([d["fc"] for d in ret]),
            "ymax": max([d["fc"] for d in ret])
        }
コード例 #9
0
class GeneExpWebService(object):
    def __init__(self, args, ps, cache, staticDir, assembly):
        self.args = args
        self.ps = ps
        self.cache = cache
        self.staticDir = staticDir
        self.assembly = assembly
        self.pgSearch = PGsearch(ps, assembly)

        self.allBiosampleTypes = set([
            "cell line", "induced pluripotent stem cell line",
            "in vitro differentiated cells", "primary cell", "stem cell",
            "tissue"
        ])

        self.actions = {"search": self.search}

    def process(self, j, args, kwargs):
        action = args[0]
        try:
            return self.actions[action](j, args[1:])
        except:
            raise

    def search(self, j, args):
        def abort(err):
            return {"hasData": False, "items": {}, "err": err}

        compartments = j["compartments_selected"]
        assay_name = j["assay_name"] if "assay_name" in j else None
        biosample_types_selected = j["biosample_types_selected"]

        if not biosample_types_selected:
            return abort("no biosample type selected")
        if not set(biosample_types_selected).issubset(self.allBiosampleTypes):
            return abort("invalid biosample type")

        # TODO: check value of compartments
        if not compartments:
            return abort("no compartments")

        gene = j["gene"]  # TODO: check for valid gene
        gi = self.pgSearch.geneInfo(gene)
        if not gi:
            return {"assembly": self.assembly, "gene": gene}

        name = gi.approved_symbol
        strand = gi.strand

        cge = GeneExpression(self.ps, self.cache, self.assembly)
        r = {"assembly": self.assembly, "gene": gene}
        for assay_name in ["total RNA-seq", "polyA RNA-seq", "all"]:
            single = cge.computeHorBars(
                name, compartments, biosample_types_selected,
                assay_name if assay_name != "all" else None)
            mean = cge.computeHorBarsMean(
                name, compartments, biosample_types_selected,
                assay_name if assay_name != "all" else None)
            itemsByRID = cge.itemsByRID
            r[assay_name] = {
                "assembly": self.assembly,
                "gene": name,
                "strand": strand,
                "ensemblid_ver": gi.ensemblid_ver,
                "coords": {
                    "chrom": gi.chrom,
                    "start": gi.start,
                    "stop": gi.stop
                },
                "single": single,
                "mean": mean,
                "itemsByRID": itemsByRID
            }
        return r
コード例 #10
0
class DataWebService():
    def __init__(self, args, pw, cache, staticDir, assembly):
        self.args = args
        self.pw = pw
        self.cache = cache
        self.staticDir = staticDir
        self.assembly = assembly
        self.pgSearch = PGsearch(pw, assembly)
        self.pgGlobal = GlobalPG(pw, assembly)
        self.pgFantomCat = PGFantomCat(pw, assembly)

        self.actions = {
            "cre_table": self.cre_table,
            "cre_tf_dcc": self.cre_tf_dcc,
            "cre_histone_dcc": self.cre_histone_dcc,
            "re_detail": self.re_detail,
            "bed_download": self.bed_download,
            "json_download": self.json_download,
            "global_object": self.global_object,
            "global_fantomcat": self.global_fantomcat,
            "global_liftover": self.global_liftover,
            "rampage": self.rampage,
            "gwas_json_download": self.gwas_json_download,
            "home_inputData": self.home_inputData,
            "ground_level_versions": self.ground_level
        }

        self.reDetailActions = {
            "topTissues": self._re_detail_topTissues,
            "nearbyGenomic": self._re_detail_nearbyGenomic,
            "fantom_cat": self.fantom_cat,
            "ortholog": self._ortholog,
            "tfIntersection": self._re_detail_tfIntersection,
            "cistromeIntersection": self._re_detail_cistromeIntersection,
            "rampage": self._re_detail_rampage,
            "linkedGenes": self._re_detail_linkedGenes,
            "miniPeaks": self._re_detail_miniPeaks,
            "groundLevel": self._re_detail_groundlevel
        }

    def process(self, j, args, kwargs):
        action = args[0]
        try:
            return self.actions[action](j, args[1:])
        except:
            raise

    def ground_level(self, j, args):
        results = self.pgSearch.versions()
        r = {}
        for result in results:
            result = {
                "accession": result[0],
                "biosample": result[1],
                "assay": result[2],
                "version": result[3]
            }
            if result["version"] not in r: r[result["version"]] = {}
            if result["biosample"] not in r[result["version"]]:
                r[result["version"]][result["biosample"]] = {}
            if result["assay"] not in r[result["version"]][
                    result["biosample"]]:
                r[result["version"]][result["biosample"]][result["assay"]] = []
            r[result["version"]][result["biosample"]][result["assay"]].append(
                result["accession"])
        return r

    def _ortholog(self, j, accession):
        if j["assembly"] != "mm10":
            mm10 = Ortholog(self.pw, self.assembly, accession, "mm10")
            hg19 = Ortholog(self.pw, self.assembly, accession, "hg19")
            return {
                accession: {
                    "ortholog": mm10.as_dict(),
                    "hg19": hg19.as_dict()
                }
            }
        hg38 = Ortholog(self.pw, "mm10", accession, "GRCh38").as_dict()
        hg19 = []
        hg19accs = set()
        for ortholog in hg38:
            for result in Ortholog(self.pw, "GRCh38", ortholog["accession"],
                                   "hg19").as_dict():
                if result["accession"] not in hg19accs:
                    hg19accs.add(result["accession"])
                    hg19.append(result)
        return {accession: {"ortholog": hg38, "hg19": hg19}}

    def global_liftover(self, j, args):
        retval = {
            "saturation": {
                self.assembly:
                self.global_object({"name": "saturation"}, args),
                "GRCh38":
                self.external_global_object({"name": "saturation"}, args,
                                            "GRCh38"),
                "GRCh38_encode_cistrome":
                self.external_global_object(
                    {"name": "saturation_encode_cistrome"}, args, "GRCh38")
            },
            "cistrome_encode": {}
        }
        for a in ["hg19", "GRCh38"]:
            for b in ["hg19", "GRCh38"]:
                retval["%s_%s" % (a, b)] = self.global_object(
                    {"name": "liftOver_%s_%s" % (a, b)}, args)
            retval["cistrome_encode_%s" % a] = self.global_object(
                {"name": "encode_cistrome_%s" % a}, args)
        return retval

    def global_fantomcat(self, j, args):
        return {
            "main": self.global_object({"name": "fantomcat"}, args),
            "fantomcat_2kb": self.global_object({"name": "fantomcat_2kb"},
                                                args)
        }

    def ctcf_distr(self, j, args):
        result = self.global_object({"name": "ctcf_density_10000"}, args)
        if j["chr"] not in result:
            raise Exception("data_ws$DataWS::ctcf_distr: chr %s not valid" %
                            j["chr"])
        return {
            "data": {
                "results":
                result[j["chr"]],
                "tads":
                [[x[0] / 10000, x[1] / 10000]
                 for x in self.tads.get_chrom_btn(j["biosample"], j["chr"])]
            }
        }

    def global_object(self, j, args):
        return self.pgGlobal.select(j["name"])

    def external_global_object(self, j, args, assembly):
        return self.pgGlobal.select_external(j["name"], assembly)

    def cre_table(self, j, args):
        chrom = checkChrom(self.assembly, j)
        results = self.pgSearch.creTable(j, chrom, j.get("coord_start", None),
                                         j.get("coord_end", None))
        lookup = self.cache.geneIDsToApprovedSymbol
        for r in results["cres"]:
            genesp, genesa = CRE(self.pgSearch, r["info"]["accession"],
                                 self.cache).nearbyGenesPA()
            r["genesallpc"] = {
                "all": genesa,
                "pc": genesp,
                "accession": r["info"]["accession"]
            }
        if "cellType" in j and j["cellType"]:
            results["rfacets"] = self.pgSearch.rfacets_active(j)
        else:
            results["rfacets"] = ["dnase", "promoter", "enhancer", "ctcf"]
        results["cts"] = self.pgSearch.haveSCT(j)
        return results

    def re_detail(self, j, args):
        action = args[0]
        if action not in self.reDetailActions:
            raise Exception("unknown action")
        return self.reDetailActions[action](j, j["accession"])

    def tfenrichment(self, j, args):
        a = j["tree_nodes_compare"]
        tree_rank_method = j["tree_rank_method"]
        return self.tfEnrichment.findenrichment(tree_rank_method, a[0], a[1])

    def _re_detail_topTissues(self, j, accession):
        cre = CRE(self.pgSearch, accession, self.cache)
        ranks = cre.topTissues()
        return {accession: ranks}

    def fantom_cat(self, j, accession):
        def process(key):
            results = self.pgFantomCat.select_cre_intersections(accession, key)
            for result in results:
                result["other_names"] = result["genename"] if result[
                    "genename"] != result["geneid"] else ""
                if result["aliases"] != "":
                    if result["other_names"] != "":
                        result["other_names"] += ", "
                    result["other_names"] += ", ".join(
                        result["aliases"].split("|"))
            return results

        enhancers = [{
            "chr": a,
            "start": int(b),
            "stop": int(c),
            "score": float(d)
        } for a, b, c, d in self.pgFantomCat.select_enhancers(accession)]

        cage = [{
            "chr": a,
            "start": int(b),
            "stop": int(c),
            "strand": d,
            "score": float(e),
            "tssstart": int(f),
            "tssstop": int(g)
        } for a, b, c, d, e, f, g in self.pgFantomCat.select_cage(accession)]
        return {
            accession: {
                "fantom_cat": process("intersections"),
                "fantom_cat_twokb": process("twokb_intersections"),
                "enhancers": enhancers,
                "cage": cage
            }
        }

    def _re_detail_nearbyGenomic(self, j, accession):
        cre = CRE(self.pgSearch, accession, self.cache)
        coord = cre.coord()

        # with Timer("snps") as t:
        snps = cre.intersectingSnps(10000)  # 10 KB
        # with Timer("nearbyCREs") as t:
        nearbyCREs = cre.distToNearbyCREs(1000000)  # 1 MB
        # with Timer("nearbyGenes") as t:
        nearbyGenes = cre.nearbyGenes()
        # with Timer("genesInTad") as t:
        genesInTad = cre.genesInTad()
        # with Timer("re_cres") as t:
        re_tads = cre.cresInTad()
        vista = cre.vista()

        return {
            accession: {
                "nearby_genes": nearbyGenes,
                "tads": genesInTad,
                "re_tads": re_tads,
                "nearby_res": nearbyCREs,
                "overlapping_snps": snps,
                "vistaids": vista
            }
        }

    def _re_detail_tfIntersection(self, j, accession):
        cre = CRE(self.pgSearch, accession, self.cache)
        peakIntersectCount = cre.peakIntersectCount()
        return {accession: peakIntersectCount}

    def _re_detail_cistromeIntersection(self, j, accession):
        cre = CRE(self.pgSearch, accession, self.cache)
        peakIntersectCount = cre.peakIntersectCount(eset="cistrome")
        return {accession: peakIntersectCount}

    def _re_detail_linkedGenes(self, j, accession):
        cre = CRE(self.pgSearch, accession, self.cache)
        return {accession: {"linked_genes": cre.linkedGenes()}}

    def _re_detail_rampage(self, j, accession):
        cre = CRE(self.pgSearch, accession, self.cache)
        nearbyGenes = cre.nearbyPcGenes()
        nearest = min(nearbyGenes, key=lambda x: x["distance"])
        rampage = Rampage(self.assembly, self.pgSearch, self.cache)
        ret = rampage.getByGene(nearest)
        return {accession: ret}

    def rampage(self, j, args):
        rampage = Rampage(self.assembly, self.pgSearch, self.cache)
        gene = j["gene"]
        ret = rampage.getByGeneApprovedSymbol(gene)
        return {gene: ret}

    def bed_download(self, j, args):
        cd = CREdownload(self.pgSearch, Config.downloadDir)
        return cd.bed(j)

    def gwas_json_download(self, j, args):
        j["uuid"] = str(uuid.uuid4())
        cd = CREdownload(self.pgSearch, Config.downloadDir)
        return cd.gwas(j, j["uuid"])

    def json_download(self, j, args):
        cd = CREdownload(self.pgSearch, Config.downloadDir)
        return cd.json(j)

    def cre_tf_dcc(self, j, args):
        accession = j.get("accession", None)
        if not accession:
            raise Exception("invalid accession")
        target = j.get("target", None)
        if not target:
            raise Exception("invalid target")
        return {
            target:
            self.pgSearch.tfTargetExps(accession,
                                       target,
                                       eset=j.get("eset", None))
        }

    def cre_histone_dcc(self, j, args):
        accession = j.get("accession", None)
        if not accession:
            raise Exception("invalid accession")
        target = j.get("target", None)
        if not target:
            raise Exception("invalid target")
        return {
            target:
            self.pgSearch.histoneTargetExps(accession,
                                            target,
                                            eset=j.get("eset", None))
        }

    def _re_detail_miniPeaks(self, j, accession):
        nbins = Config.minipeaks_nbins
        ver = Config.minipeaks_ver
        mp = MiniPeaks(self.assembly, self.pgSearch, self.cache, nbins, ver)
        rows, accessions = mp.getMinipeaksForAssays(
            ["dnase", "h3k27ac", "h3k4me3"], [accession])
        return {accession: {"rows": rows, "accessions": accessions}}

    def _re_detail_groundlevel(self, j, accession):
        cre = CRE(self.pgSearch, accession, self.cache)
        coord = cre.coord()

        def _dreq(url):
            return requests.get(
                url %
                (coord.chrom, coord.start, coord.end)).json()["results"]["all"]

        return {
            accession: {
                k: _dreq("https://api.wenglab.org/peaksws/GRCh38/" + k +
                         "/search/%s/%d/%d")
                for k in ["tf", "histone", "dnase", "3dinteractions", "cdhs"]
            }
        }

    def home_inputData(self, j, args):
        home = PGHome(self.ps)
        return home.inputData()