def _generate(self, allAssays): if 0: # mouse m = MetadataWS(Datasets.all_mouse) mm10epis = m.chipseq_tf_annotations_mm10(date="2016-05-01") # human m = MetadataWS(Datasets.all_human) encodeHg19 = m.chipseq_tf_annotations_hg19(date="2016-05-01") outFnp = "/data/projects/encode/encyclopedia_v3/webEpigenomesLoader.cpickle" with open(outFnp, 'wb') as f: p = pickle.Pickler(f) p.dump(mm10epis) p.dump(encodeHg19) print("wrote", outFnp) else: outFnp = os.path.join(os.path.dirname(__file__), "../../../webEpigenomesLoader.cpickle") #outFnp = "/data/projects/encode/encyclopedia_v3/webEpigenomesLoader.cpickle" with open(outFnp, 'rb') as f: p = pickle.Unpickler(f) mm10epis = p.load() encodeHg19 = p.load() print("read", outFnp) for assays in allAssays: self._loadEpigenomes("mm10", mm10epis, assays) roadmapMetadata = RoadmapMetadata(self.histMark, self.assayType) roadmap = roadmapMetadata.epigenomes hg19epis = Epigenomes("ROADMAP + ENCODE", "hg19") hg19epis.epis = encodeHg19.epis + roadmap.epis for assays in allAssays: self._loadEpigenomes("hg19", hg19epis, assays)
def outputSubTrack(priority, assembly, bt, btn, expIDs, fnp, idx, total, biosample_type, biosample_term_name, lookupByExp): mw = MetadataWS(host=Host) exps = mw.exps(expIDs) actives = [] for expID in expIDs: if expID in lookupByExp: actives.append(lookupByExp[expID].isActive()) isActive = any(t for t in actives) if isActive: print("active biosample:", btn) parent = Parent(bt + '_' + btn, isActive) tracks = Tracks(assembly, parent, False) for exp in exps: active = False expID = exp.encodeID cREs = {} if expID in lookupByExp: active = lookupByExp[expID].isActive() cREs = lookupByExp[expID].ccREs tracks.addExp(exp, True, cREs) Utils.ensureDir(fnp) with open(fnp, 'w') as f: for line in tracks.lines(priority): f.write(line) printWroteNumLines(fnp, idx, 'of', total) return tracks.subgroups()
def main(): expID = 'ENCSR000BCA' #ENCSR000SKS' #ENCSR000BCE' #'ENCSR000AEC' if 1: mw = MetadataWS(host="http://192.168.1.46:9008/metadata") exp = mw.exps([expID])[0] else: exp = Exp.fromJsonFile(expID) files = bigWigFilters("hg19", exp) print("found", len(files)) for f in files: print(f, f.bio_rep)
def process(args, expID): exp = MetadataWS.exp(expID) try: bigWigs = bigWigFilters(args.assembly, exp.files) if not bigWigs: return bigWig = bigWigs[0] bigWigFnp = bigWig.fnp() if os.path.exists(bigWig.normFnp()): print "skipping", exp return 0 else: print "missing", bigWig.normFnp() bwAssembly = bigWig.assembly if not bigWigFnp: print exp.getSingleBigWigSingleFnp(args) print "missing", exp else: cmds = [normBin, "--assembly=" + bwAssembly, "--bwFnp=" + bigWig.normFnp(), bigWigFnp] print " ".join(cmds) print Utils.runCmds(cmds) return 0 except Exception, e: print "bad " + str(e)
def process(args, expID): exp = MetadataWS.exp(expID) try: bigWigs = bigWigFilters(args.assembly, exp.files) if not bigWigs: return bigWig = bigWigs[0] bigWigFnp = bigWig.fnp() if os.path.exists(bigWig.normFnp()): print "skipping", exp return 0 else: print "missing", bigWig.normFnp() bwAssembly = bigWig.assembly if not bigWigFnp: print exp.getSingleBigWigSingleFnp(args) print "missing", exp else: cmds = [ normBin, "--assembly=" + bwAssembly, "--bwFnp=" + bigWig.normFnp(), bigWigFnp ] print " ".join(cmds) print Utils.runCmds(cmds) return 0 except Exception, e: print "bad " + str(e)
def __init__(self, args, assembly, globalData, priority): self.args = args self.assembly = assembly self.globalData = globalData self.priority = priority dataset = Datasets.byAssembly(assembly) self.mw = MetadataWS(dataset=dataset, host=Host)
def _doImport(self): mc = None if Config.memcache: mc = MemCacheWrapper(Config.memcache) qd = QueryDCC(auth=False, cache=mc) m = MetadataWS.byAssembly(self.assembly) allExps = m.all_bigBeds_bigWigs(self.assembly) printt("found", len(allExps)) ret = {} ns = self.pgSearch.loadNineStateGenomeBrowser() total = len(ns) counter = 1 for ctn, v in ns.iteritems(): printt(counter, 'of', total, ctn) counter += 1 btns = set() for fileID in [v["dnase"], v["h3k4me3"], v["h3k27ac"], v["ctcf"]]: if 'NA' == fileID: continue exp = qd.getExpFromFileID(fileID) btns.add(exp.biosample_term_name) exps = filter(lambda e: e.biosample_term_name in btns, allExps) ret[ctn] = [] for e in exps: q = { "expID": e.encodeID, "assay_term_name": e.assay_term_name, "target": e.target, "tf": e.tf, "bigWigs": [{ "fileID": f.fileID, "techRep": f.technical_replicates } for f in e.files if f.isBigWig()], "beds": [{ "fileID": f.fileID, "techRep": f.technical_replicates } for f in e.files if f.isBigBed()] } ret[ctn].append(q) ret[ctn] = sorted(ret[ctn], key=lambda q: (q["assay_term_name"], q["target"], q["tf"])) self.curs.execute( """ INSERT INTO {tableName} (cellTypeName, tracks) VALUES (%s, %s)""".format(tableName=self.tableName), (ctn, json.dumps(ret[ctn])))
def makeTrackDb(self): trackhubFnp = os.path.join(os.path.dirname(__file__), "..", "views", "interacting_gene", "trackhub.txt") with open(trackhubFnp) as f: fileLines = f.read() lines = [] dataset = Datasets.all_human m = MetadataWS(dataset) exps = m.biosample_term_name("GM12878") for exp in sorted(exps, key=lambda x: (x.assay_term_name, x.tf, x.lab)): lines += [self.trackhubExp(exp)] f = StringIO.StringIO() for line in lines: if line: f.write(line + "\n") return fileLines + "\n" + f.getvalue()
def makeJobs(self): m = MetadataWS(Datasets.byAssembly(self.assembly)) allExps = [(m.chipseq_tfs_useful(), "tf"), (m.chipseq_histones_useful(), "histone")] allExpsIndiv = [] for exps, etype in allExps: printt("found", len(exps), etype) exps = [Exp.fromJsonFile(e.encodeID) for e in exps] exps = filter(lambda e: "ERROR" not in e.jsondata["audit"], exps) printt("found", len(exps), etype, "after removing ERROR audit exps") for exp in exps: allExpsIndiv.append((exp, etype)) random.shuffle(allExpsIndiv) total = len(allExpsIndiv) i = 0 jobs = [] for exp, etype in allExpsIndiv: i += 1 try: bed = exp.getUsefulPeakFile(self.assembly) if not bed: printt("missing", exp) jobs.append({ "exp": exp, # this is an Exp "bed": bed, # this is an ExpFile "i": i, "total": total, "assembly": self.assembly, "etype": etype }) except Exception, e: printt(str(e)) printt("bad exp:", exp)
def __init__(self, args, siteInfo): self.args = args self.ontology = Ontology() byAssembly = {} m = MetadataWS(Datasets.all_mouse) interacting_gene = InteractingGeneMetadata().epigenomes combined = Epigenomes("ROADMAP + ENCODE", "hg19") combined.epis = interacting_gene.epis byAssembly["hg19"] = combined self.byAssemblyAssays = defaultdict(lambda: defaultdict(None)) for assembly in ["hg19"]: for assays in ["InteractingGene"]: epis = byAssembly[assembly].GetByAssays(assays) if epis: epis = [ WebEpigenome(self.args, epi, assays, self.ontology) for epi in epis ] self.byAssemblyAssays[assembly][assays] = WebEpigenomes( self.args, assembly, assays, epis)