def publish(): '''publish files.''' # publish web pages P.publish_report() # publish additional data web_dir = PARAMS["web_dir"] project_id = P.getProjectId() # directory, files exportfiles = { "bamfiles" : glob.glob( "*.bam" ) + glob.glob( "*.bam.bai" ), "genesets": [ "lincrna.gtf.gz", "abinitio.gtf.gz" ], "classification": glob.glob("*.class.tsv.gz") , "differential_expression" : glob.glob( "*.cuffdiff.dir" ), } bams = [] for targetdir, filenames in exportfiles.iteritems(): for src in filenames: dest = "%s/%s/%s" % (web_dir, targetdir, src) if dest.endswith( ".bam"): bams.append( dest ) dest = os.path.abspath( dest ) if not os.path.exists( dest ): os.symlink( os.path.abspath(src), dest ) # output ucsc links for bam in bams: filename = os.path.basename( bam ) track = P.snip( filename, ".bam" ) print """track type=bam name="%(track)s" bigDataUrl=http://www.cgat.org/downloads/%(project_id)s/bamfiles/%(filename)s""" % locals()
def publish(): '''publish files.''' # publish web pages P.publish_report() # publish additional data web_dir = PARAMS["web_dir"] project_id = P.getProjectId() ucsc_urls = { "bam": """track type=bam name="%(track)s" bigDataUrl=http://www.cgat.org/downloads/%(project_id)s/%(dirname)s/%(filename)s""", "bigwig": """track type=bigWig name="%(track)s" bigDataUrl=http://www.cgat.org/downloads/%(project_id)s/%(dirname)s/%(filename)s""", } # directory, files exportfiles = ( ("bamfiles", glob.glob("*/*.genome.bam") + glob.glob("*/*.genome.bam.bai"), "bam"), ("bamfiles", glob.glob("*/*.prep.bam") + glob.glob("*/*.prep.bam.bai"), "bam"), ("medips", glob.glob("*/*.bigwig"), "bigwig"), ) ucsc_files = [] for targetdir, filenames, datatype in exportfiles: for src in filenames: filename = os.path.basename(src) dest = "%s/%s/%s" % (web_dir, targetdir, filename) suffix = os.path.splitext(src) if suffix in ucsc_urls: ucsc_files.append((datatype, targetdir, filename)) dest = os.path.abspath(dest) if not os.path.exists(dest): os.symlink(os.path.abspath(src), dest) # output ucsc links for ucsctype, dirname, filename in ucsc_files: filename = os.path.basename(filename) track = P.snip(filename, ucsctype) print ucsc_urls[ucsctype] % locals()
def publish(): '''publish files.''' # publish web pages P.publish_report() # publish additional data web_dir = PARAMS["web_dir"] project_id = P.getProjectId() # directory, files exportfiles = { "intervals": glob.glob(os.path.join(PARAMS["exportdir"], "bed", "*.bed.gz")) + glob.glob(os.path.join(PARAMS["exportdir"], "bed", "*.bed.gz.tbi")), } bams = [] for targetdir, filenames in exportfiles.iteritems(): if len(filenames) == 0: E.warn("no files for target '%s'" % targetdir) for src in filenames: dest = "%s/%s/%s" % (web_dir, targetdir, os.path.basename(src)) if dest.endswith(".bam"): bams.append(dest) dest = os.path.abspath(dest) destdir = os.path.dirname(dest) if not os.path.exists(destdir): os.makedirs(destdir) if not os.path.exists(dest): E.debug("creating symlink from %s to %s" % (src, dest)) os.symlink(os.path.abspath(src), dest) # output ucsc links for bam in bams: filename = os.path.basename(bam) track = P.snip(filename, ".bam") print """track type=bam name="%(track)s" bigDataUrl=http://www.cgat.org/downloads/%(project_id)s/bamfiles/%(filename)s""" % locals( )
def publish(): '''publish files.''' # publish web pages P.publish_report() # publish additional data web_dir = PARAMS["web_dir"] project_id = P.getProjectId() # directory, files exportfiles = { "intervals": glob.glob(os.path.join(PARAMS["exportdir"], "bed", "*.bed.gz")) + glob.glob(os.path.join(PARAMS["exportdir"], "bed", "*.bed.gz.tbi")), } bams = [] for targetdir, filenames in exportfiles.iteritems(): if len(filenames) == 0: E.warn("no files for target '%s'" % targetdir) for src in filenames: dest = "%s/%s/%s" % (web_dir, targetdir, os.path.basename(src)) if dest.endswith(".bam"): bams.append(dest) dest = os.path.abspath(dest) destdir = os.path.dirname(dest) if not os.path.exists(destdir): os.makedirs(destdir) if not os.path.exists(dest): E.debug("creating symlink from %s to %s" % (src, dest)) os.symlink(os.path.abspath(src), dest) # output ucsc links for bam in bams: filename = os.path.basename(bam) track = P.snip(filename, ".bam") print """track type=bam name="%(track)s" bigDataUrl=http://www.cgat.org/downloads/%(project_id)s/bamfiles/%(filename)s""" % locals()
def publish_tracks(export_files, prefix="", project_id=None, project_name=None): '''publish a UCSC Track Hub. *export_files* is a dictionary of filetypes and files. *prefix* will be added to each track. ''' if not prefix: prefix = PARAMS.get("report_prefix", "") web_dir = PARAMS["web_dir"] if project_id is None: project_id = P.getProjectId() if project_name is None: project_name = P.getProjectName() src_export = os.path.abspath("export") dest_report = prefix + "report" dest_export = prefix + "export" hubdir = os.path.join(PARAMS["web_dir"], "ucsc") if not os.path.exists(hubdir): E.info("creating %s" % hubdir) os.mkdir(hubdir) # write the UCSC hub file hubfile = os.path.join(hubdir, "hub.txt") genomesfile = os.path.join(hubdir, "genomes.txt") trackdir = os.path.join(hubdir, PARAMS["genome"]) trackfile = os.path.join(hubdir, PARAMS["genome"], "trackDb.txt") trackrelpath = os.path.join(PARAMS["genome"], "trackDb.txt") if os.path.exists(hubfile): with IOTools.openFile(hubfile) as infile: hubdata = PipelineUCSC.readUCSCFile(infile) else: hubdata = [('hub', "CGAT-" + project_name), ('shortLabel', "CGAT-" + project_name), ('longLabel', "Data for CGAT project %s" % project_name), ('genomesFile', "genomes.txt"), ('email', '*****@*****.**')] E.info("writing to %s" % hubfile) with IOTools.openFile(hubfile, "w") as outfile: PipelineUCSC.writeUCSCFile(outfile, hubdata) # create the genomes.txt file - append to it if necessary. if os.path.exists(genomesfile): with IOTools.openFile(genomesfile) as infile: genomes = PipelineUCSC.readUCSCFile(infile) else: genomes = [] if ("genome", PARAMS["genome"]) not in genomes: genomes.append(("genome", PARAMS["genome"])) genomes.append(("trackDb", trackrelpath)) E.info("writing to %s" % genomesfile) with IOTools.openFile(genomesfile, "w") as outfile: PipelineUCSC.writeUCSCFile(outfile, genomes) # create the track data if not os.path.exists(trackdir): os.mkdir(trackdir) if os.path.exists(trackfile): E.debug('reading existing tracks from %s' % trackfile) with IOTools.openFile(trackfile) as infile: tracks = PipelineUCSC.readTrackFile(infile) else: tracks = [] tracks = collections.OrderedDict(tracks) def getName(name): if name.endswith(".bam"): return "bam", name elif name.endswith(".bw") or name.endswith(".bigwig"): return "bigWig", name else: return None, None for targetdir, filenames in export_files.items(): for src in filenames: dest = os.path.join(trackdir, prefix + os.path.basename(src)) dest = os.path.abspath(dest) # create a symlink if not os.path.exists(dest): try: os.symlink(os.path.abspath(src), dest) except OSError, msg: E.warn("could not create symlink from %s to %s: %s" % (os.path.abspath(src), dest, msg)) ucsctype, trackname = getName(os.path.basename(dest)) # ignore invalid types and other files (.bai files, ...) if ucsctype is None: continue tracks[trackname] = (("bigDataUrl", os.path.basename(dest)), ("shortLabel", trackname), ("longLabel", trackname), ("type", ucsctype))