Esempio n. 1
0
    def _trackhub_url_info(self, j):
        assembly = self.assembly = j["assembly"]
        pgSearch = PGsearch(self.pw, assembly)

        if "coord_start" not in j:
            cre = CRE(pgSearch, j["accession"], self.cacheW[assembly])
            coord = cre.coord()
        else:
            coord = Coord(j["coord_chrom"], j["coord_start"], j["coord_end"])
        coord.resize(j["halfWindow"])

        return assembly, j["accession"], coord
    def parseSnp(self):
        snps = self.dbsnps.lookup(self.assembly, self.loci)
        if not snps:
            return None

        if len(snps) > 1:
            # search on UCSC
            return None

        snp = snps[0]
        c = Coord(snp[0], snp[1], snp[2])
        c.resize(self.halfWindow)
        return c
    def parseSnp(self):
        snps = self.dbsnps.lookup(self.assembly, self.loci)
        if not snps:
            return None

        if len(snps) > 1:
            # search on UCSC
            return None

        snp = snps[0]
        c = Coord(snp[0], snp[1], snp[2])
        c.resize(self.halfWindow)
        return c
Esempio n. 4
0
    def ucsc_trackhub_url_snp(self, j, uuid):
        assembly = self.assembly = j["assembly"]
        pgSearch = PGsearch(self.pw, assembly)

        snp = j["snp"]
        c = Coord(snp["chrom"], snp["cre_start"], snp["cre_end"])

        hubNum = self.db.insertOrUpdate(assembly, snp["accession"], uuid, j)

        trackhubUrl = '/'.join([j["host"],
                                "ucsc_trackhub",
                                uuid,
                                "hub_" + str(hubNum) + ".txt"])

        url = "https://genome.ucsc.edu/cgi-bin/hgTracks?"
        url += "db=" + assembly
        url += "&position=" + str(c)
        url += "&hubClear=" + trackhubUrl
        url += "&highlight=" + assembly + "." + c.chrom + "%3A" + str(snp["snp_start"]) + '-' + str(snp["snp_end"])

        if "hg19" == assembly:
            url += "&g=wgEncodeGencodeV19"
            url += "&g=phastCons100way"

        return {"url": url, "trackhubUrl": trackhubUrl}
Esempio n. 5
0
    def ucsc_trackhub_url(self, j, uuid):
        assembly, accession, coord = self._trackhub_url_info(j)
        hubNum = self.db.insertOrUpdate(assembly, accession, uuid, j)

        c = Coord(j["coord_chrom"], j["coord_start"], j["coord_end"])

        trackhubUrl = '/'.join([j["host"],
                                "ucsc_trackhub",
                                uuid,
                                "hub_" + str(hubNum) + ".txt"])

        url = "https://genome.ucsc.edu/cgi-bin/hgTracks?"
        url += "db=" + assembly
        url += "&position=" + str(coord)
        url += "&hubClear=" + trackhubUrl
        url += "&highlight=" + assembly + "." + c.chrom + "%3A" + str(c.start) + '-' + str(c.end)

        if "hg19" == assembly:
            url += "&g=wgEncodeGencodeV19"
            url += "&g=phastCons100way"

        if "mm10" == assembly:
            # FIXME
            pass

        return {"url": url, "trackhubUrl": trackhubUrl}
    def parseGene(self):
        genes = self.genes.lookup(self.assembly, self.loci)
        if not genes:
            genes = self.genes.fuzzy_lookup(self.assembly, self.loci)
            if not genes:
                self.userErrMsg = "'{loci}' not found".format(loci=self.loci)
            else:
                self.userErrMsg = "'{loci}' not found; potential matches: {genes}".format(loci=self.loci, genes=", ".join(sorted(genes)))
            return None

        if len(genes) > 1:
            self.userErrMsg = "Multiple genomic positions found; using first found..."
            return None

        gene = genes[0]
        c = Coord(gene[0], gene[1], gene[2])
        c.resize(self.halfWindow)
        return c
    def parseGene(self):
        genes = self.genes.lookup(self.assembly, self.loci)
        if not genes:
            genes = self.genes.fuzzy_lookup(self.assembly, self.loci)
            if not genes:
                self.userErrMsg = "'{loci}' not found".format(loci=self.loci)
            else:
                self.userErrMsg = "'{loci}' not found; potential matches: {genes}".format(
                    loci=self.loci, genes=", ".join(sorted(genes)))
            return None

        if len(genes) > 1:
            self.userErrMsg = "Multiple genomic positions found; using first found..."
            return None

        gene = genes[0]
        c = Coord(gene[0], gene[1], gene[2])
        c.resize(self.halfWindow)
        return c
    def getRankedPeakCoord(self):
        if 1 != len(self.tissue_ids):
            self.userErrMsg = "Please only select one tissue"
            return None

        wepis = self.epigenomes.GetByAssemblyAndAssays(self.assembly,
                                                       self.assays)
        wepis = filter(lambda e: e.web_id() in self.tissue_ids, wepis.epis)

        if 1 != len(wepis):
            self.userErrMsg = "Please only select one tissue"
            return None

        wepi = wepis[0]

        fnp = wepi.predictionFnp().replace(".bigBed", ".bed")
        if not os.path.exists(fnp):
            raise Exception("file not found " + fnp)

        rank = int(self.loci) - 1

        if rank < 0:
            self.userErrMsg = "Ranks start at 0"
            return None

        row = None
        with open(fnp) as f:
            # http://stackoverflow.com/a/2081880
            for i, line in enumerate(f):
                if i == rank:
                    row = line
                    break
                if i > rank:
                    break
        if not row:
            self.userErrMsg = "Only %d peaks in file" % i
            return None

        toks = row.rstrip().split('\t')
        c = Coord(toks[0], toks[1], toks[2])
        c.resize(self.halfWindow)
        return c
    def getRankedPeakCoord(self):
        if 1 != len(self.tissue_ids):
            self.userErrMsg = "Please only select one tissue"
            return None

        wepis = self.epigenomes.GetByAssemblyAndAssays(self.assembly, self.assays)
        wepis = filter(lambda e: e.web_id() in self.tissue_ids, wepis.epis)

        if 1 != len(wepis):
            self.userErrMsg = "Please only select one tissue"
            return None

        wepi = wepis[0]

        fnp = wepi.predictionFnp().replace(".bigBed", ".bed")
        if not os.path.exists(fnp):
            raise Exception("file not found " + fnp)

        rank = int(self.loci) - 1

        if rank < 0:
            self.userErrMsg = "Ranks start at 0"
            return None

        row = None
        with open(fnp) as f:
            # http://stackoverflow.com/a/2081880
            for i, line in enumerate(f):
                if i == rank:
                    row = line
                    break
                if i > rank:
                    break
        if not row:
            self.userErrMsg = "Only %d peaks in file" % i
            return None

        toks = row.rstrip().split('\t')
        c = Coord(toks[0], toks[1], toks[2])
        c.resize(self.halfWindow)
        return c
Esempio n. 10
0
 def search(self):
     coord = None
     if self.loci.lower().startswith("chr"):
         # coordinate
         coord = Coord.parse(self.loci)
     elif self.loci.lower().startswith("rs"):
         # SNP
         coord = self.parseSnp()
     elif self.loci.isdigit():
         # a ranked peak for a single selected tissue
         coord = self.getRankedPeakCoord()
     else:
         coord = self.parseGene()
     return coord
Esempio n. 11
0
 def search(self):
     coord = None
     if self.loci.lower().startswith("chr"):
         # coordinate
         coord = Coord.parse(self.loci)
     elif self.loci.lower().startswith("rs"):
         # SNP
         coord = self.parseSnp()
     elif self.loci.isdigit():
         # a ranked peak for a single selected tissue
         coord = self.getRankedPeakCoord()
     else:
         coord = self.parseGene()
     return coord