def _trackhub_url_info(self, j): assembly = self.assembly = j["assembly"] pgSearch = PGsearch(self.pw, assembly) if "coord_start" not in j: cre = CRE(pgSearch, j["accession"], self.cacheW[assembly]) coord = cre.coord() else: coord = Coord(j["coord_chrom"], j["coord_start"], j["coord_end"]) coord.resize(j["halfWindow"]) return assembly, j["accession"], coord
def parseSnp(self): snps = self.dbsnps.lookup(self.assembly, self.loci) if not snps: return None if len(snps) > 1: # search on UCSC return None snp = snps[0] c = Coord(snp[0], snp[1], snp[2]) c.resize(self.halfWindow) return c
def ucsc_trackhub_url_snp(self, j, uuid): assembly = self.assembly = j["assembly"] pgSearch = PGsearch(self.pw, assembly) snp = j["snp"] c = Coord(snp["chrom"], snp["cre_start"], snp["cre_end"]) hubNum = self.db.insertOrUpdate(assembly, snp["accession"], uuid, j) trackhubUrl = '/'.join([j["host"], "ucsc_trackhub", uuid, "hub_" + str(hubNum) + ".txt"]) url = "https://genome.ucsc.edu/cgi-bin/hgTracks?" url += "db=" + assembly url += "&position=" + str(c) url += "&hubClear=" + trackhubUrl url += "&highlight=" + assembly + "." + c.chrom + "%3A" + str(snp["snp_start"]) + '-' + str(snp["snp_end"]) if "hg19" == assembly: url += "&g=wgEncodeGencodeV19" url += "&g=phastCons100way" return {"url": url, "trackhubUrl": trackhubUrl}
def ucsc_trackhub_url(self, j, uuid): assembly, accession, coord = self._trackhub_url_info(j) hubNum = self.db.insertOrUpdate(assembly, accession, uuid, j) c = Coord(j["coord_chrom"], j["coord_start"], j["coord_end"]) trackhubUrl = '/'.join([j["host"], "ucsc_trackhub", uuid, "hub_" + str(hubNum) + ".txt"]) url = "https://genome.ucsc.edu/cgi-bin/hgTracks?" url += "db=" + assembly url += "&position=" + str(coord) url += "&hubClear=" + trackhubUrl url += "&highlight=" + assembly + "." + c.chrom + "%3A" + str(c.start) + '-' + str(c.end) if "hg19" == assembly: url += "&g=wgEncodeGencodeV19" url += "&g=phastCons100way" if "mm10" == assembly: # FIXME pass return {"url": url, "trackhubUrl": trackhubUrl}
def parseGene(self): genes = self.genes.lookup(self.assembly, self.loci) if not genes: genes = self.genes.fuzzy_lookup(self.assembly, self.loci) if not genes: self.userErrMsg = "'{loci}' not found".format(loci=self.loci) else: self.userErrMsg = "'{loci}' not found; potential matches: {genes}".format(loci=self.loci, genes=", ".join(sorted(genes))) return None if len(genes) > 1: self.userErrMsg = "Multiple genomic positions found; using first found..." return None gene = genes[0] c = Coord(gene[0], gene[1], gene[2]) c.resize(self.halfWindow) return c
def parseGene(self): genes = self.genes.lookup(self.assembly, self.loci) if not genes: genes = self.genes.fuzzy_lookup(self.assembly, self.loci) if not genes: self.userErrMsg = "'{loci}' not found".format(loci=self.loci) else: self.userErrMsg = "'{loci}' not found; potential matches: {genes}".format( loci=self.loci, genes=", ".join(sorted(genes))) return None if len(genes) > 1: self.userErrMsg = "Multiple genomic positions found; using first found..." return None gene = genes[0] c = Coord(gene[0], gene[1], gene[2]) c.resize(self.halfWindow) return c
def getRankedPeakCoord(self): if 1 != len(self.tissue_ids): self.userErrMsg = "Please only select one tissue" return None wepis = self.epigenomes.GetByAssemblyAndAssays(self.assembly, self.assays) wepis = filter(lambda e: e.web_id() in self.tissue_ids, wepis.epis) if 1 != len(wepis): self.userErrMsg = "Please only select one tissue" return None wepi = wepis[0] fnp = wepi.predictionFnp().replace(".bigBed", ".bed") if not os.path.exists(fnp): raise Exception("file not found " + fnp) rank = int(self.loci) - 1 if rank < 0: self.userErrMsg = "Ranks start at 0" return None row = None with open(fnp) as f: # http://stackoverflow.com/a/2081880 for i, line in enumerate(f): if i == rank: row = line break if i > rank: break if not row: self.userErrMsg = "Only %d peaks in file" % i return None toks = row.rstrip().split('\t') c = Coord(toks[0], toks[1], toks[2]) c.resize(self.halfWindow) return c
def search(self): coord = None if self.loci.lower().startswith("chr"): # coordinate coord = Coord.parse(self.loci) elif self.loci.lower().startswith("rs"): # SNP coord = self.parseSnp() elif self.loci.isdigit(): # a ranked peak for a single selected tissue coord = self.getRankedPeakCoord() else: coord = self.parseGene() return coord