def publish_tracks(export_files, prefix="", project_id=None, project_name=None, UCSC_ini=None): '''publish a UCSC Track Hub. This method takes a dictionary of file types associated with files. For each file, a link will be created in the upload directory. The track will be stored under a project name, which will be derived from the location of the working directory. Information about the genome, the upload directory, etc. will be taken from the global configuration dictionary. For example, calling the following code in a pipeline executed in .../proj013/mapping:: export_files = { "bamfiles": glob.glob("*/*.bam") + glob.glob("*/*.bam.bai"), "bigwigfiles": glob.glob("*/*.bw"), } publish_tracks(export_files) will create a hub file at :file:`<uploaddir>/OBFUSID/mapping/ucsc.hub`, where OBFUSID is the obfuscated directory entry in the CGAT download directory for a particular project. If you want to create group tracks and get them to inherit from a parent, you can supply an filename for a UCSC ini file. The ini file defines two types of parameters, parents and set_features. Parents define containers with a regex to identify the child tracks. Set_features add additional features to all tracks matching a regex. Parent and set_feature parameters are identified by their respective "parent" or "set_features" prefixes. For example, the following UCSC ini "test.ini" will create a parent multiWig track called "Test" with the UCSC options as defined in the values parameter. The values param must be a comma separated list of key:value pairs which are seperated by a single space. The regex param for parent_test defines the child tracks which will be contained within "Test". The optional colour param defines the colours for the child tracks. Colours are defined using the brewer2mpl python module. Colour parameters must contain the name of the pallete followed by the type of pallette. The ini file below also defines a "set_features" parameter, "bigwigs". Set_feature require a value and regex parameter. In this case, the UCSC options in the values parameter will be added to all tracks matching the ".*bigwig$" regex. As above, the values param must be a comma separated list of key:value pairs which are seperated by a single space. As above, an optional colours parameter can also be given. Note: colour palletes have a maximum number of allowable colours. To see the available palletes and their size, run: >import brewer2mpl >brewer2mpl.print_maps() >cat test.ini ####################### ####################### [parent_test] values=container multiWig,bigDataUrl Test,shortLabel Test,longLabel Test,type bigWig,viewLimits 0:160,visibility full,aggregate transparentOverlay,showSubtrackColorOnUi on,windowingFunction maximum,priority 1.2,configurable on,autoScale on,dragAndDrop subtracks regex=.*-Saline-.*bw$ colour=Blues,Sequential ####################### [set_features_bigwigs] values=configurable on,autoScale on,useScore on,visibility full regex=.*bigwig$ colour=Oranges,Sequential ####################### ####################### Arguments --------- export_files : dict Dictionary mapping filetypes to files. prefix : string will be added to each track. project_id : string The project identifier. If not given, it will be taken from the path of the project directory. project_name : string The project name, typically the project number. If not given, it will be taken from the current directory. ''' # the import is located here to avoid cyclical dependencies # between Local.py, Pipeline.py and PipelineUCSC.py import CGATPipelines.PipelineUCSC as PipelineUCSC if not prefix: prefix = PARAMS.get("report_prefix", "") if not UCSC_ini: UCSC_ini = PARAMS.get("ucsc_ini", None) web_dir = PARAMS["web_dir"] if project_id is None: project_id = getProjectId() if project_name is None: project_name = getProjectName() src_export = os.path.abspath("export") dest_report = prefix + "report" dest_export = prefix + "export" hubdir = os.path.join(PARAMS["web_dir"], "ucsc") if not os.path.exists(hubdir): E.info("creating %s" % hubdir) os.mkdir(hubdir) # write the UCSC hub file hubfile = os.path.join(hubdir, "hub.txt") genomesfile = os.path.join(hubdir, "genomes.txt") trackdir = os.path.join(hubdir, PARAMS["genome"]) trackfile = os.path.join(hubdir, PARAMS["genome"], "trackDb.txt") trackrelpath = os.path.join(PARAMS["genome"], "trackDb.txt") if os.path.exists(hubfile): with IOTools.openFile(hubfile) as infile: hubdata = PipelineUCSC.readUCSCFile(infile) else: hubdata = [('hub', "CGAT-" + project_name), ('shortLabel', "CGAT-" + project_name), ('longLabel', "Data for CGAT project %s" % project_name), ('genomesFile', "genomes.txt"), ('email', '*****@*****.**')] E.info("writing to %s" % hubfile) with IOTools.openFile(hubfile, "w") as outfile: PipelineUCSC.writeUCSCFile(outfile, hubdata) # create the genomes.txt file - append to it if necessary. if os.path.exists(genomesfile): with IOTools.openFile(genomesfile) as infile: genomes = PipelineUCSC.readUCSCFile(infile) else: genomes = [] if ("genome", PARAMS["genome"]) not in genomes: genomes.append(("genome", PARAMS["genome"])) genomes.append(("trackDb", trackrelpath)) E.info("writing to %s" % genomesfile) with IOTools.openFile(genomesfile, "w") as outfile: PipelineUCSC.writeUCSCFile(outfile, genomes) # create the track data if not os.path.exists(trackdir): os.mkdir(trackdir) if os.path.exists(trackfile): E.debug('reading existing tracks from %s' % trackfile) with IOTools.openFile(trackfile) as infile: tracks = PipelineUCSC.readTrackFile(infile) else: tracks = [] tracks = collections.OrderedDict(tracks) def getName(name): if name.endswith(".bam"): return "bam", name elif name.endswith(".bw") or name.endswith(".bigwig"): return "bigWig", name elif name.endswith(".bb") or name.endswith(".bigbed"): return "bigBed", name else: return None, None for targetdir, filenames in export_files.items(): for src in filenames: dest = os.path.join(trackdir, prefix + os.path.basename(src)) dest = os.path.abspath(dest) # create a symlink if not os.path.exists(dest): try: os.symlink(os.path.abspath(src), dest) except OSError, msg: E.warn("could not create symlink from %s to %s: %s" % (os.path.abspath(src), dest, msg)) ucsctype, trackname = getName(os.path.basename(dest)) # ignore invalid types and other files (.bai files, ...) if ucsctype is None: continue tracks[trackname] = (("bigDataUrl", os.path.basename(dest)), ("shortLabel", trackname), ("longLabel", trackname), ("type", ucsctype))
def publish_tracks(export_files, prefix="", project_id=None, project_name=None): '''publish a UCSC Track Hub. *export_files* is a dictionary of filetypes and files. *prefix* will be added to each track. ''' if not prefix: prefix = PARAMS.get("report_prefix", "") web_dir = PARAMS["web_dir"] if project_id is None: project_id = P.getProjectId() if project_name is None: project_name = P.getProjectName() src_export = os.path.abspath("export") dest_report = prefix + "report" dest_export = prefix + "export" hubdir = os.path.join(PARAMS["web_dir"], "ucsc") if not os.path.exists(hubdir): E.info("creating %s" % hubdir) os.mkdir(hubdir) # write the UCSC hub file hubfile = os.path.join(hubdir, "hub.txt") genomesfile = os.path.join(hubdir, "genomes.txt") trackdir = os.path.join(hubdir, PARAMS["genome"]) trackfile = os.path.join(hubdir, PARAMS["genome"], "trackDb.txt") trackrelpath = os.path.join(PARAMS["genome"], "trackDb.txt") if os.path.exists(hubfile): with IOTools.openFile(hubfile) as infile: hubdata = PipelineUCSC.readUCSCFile(infile) else: hubdata = [('hub', "CGAT-" + project_name), ('shortLabel', "CGAT-" + project_name), ('longLabel', "Data for CGAT project %s" % project_name), ('genomesFile', "genomes.txt"), ('email', '*****@*****.**')] E.info("writing to %s" % hubfile) with IOTools.openFile(hubfile, "w") as outfile: PipelineUCSC.writeUCSCFile(outfile, hubdata) # create the genomes.txt file - append to it if necessary. if os.path.exists(genomesfile): with IOTools.openFile(genomesfile) as infile: genomes = PipelineUCSC.readUCSCFile(infile) else: genomes = [] if ("genome", PARAMS["genome"]) not in genomes: genomes.append(("genome", PARAMS["genome"])) genomes.append(("trackDb", trackrelpath)) E.info("writing to %s" % genomesfile) with IOTools.openFile(genomesfile, "w") as outfile: PipelineUCSC.writeUCSCFile(outfile, genomes) # create the track data if not os.path.exists(trackdir): os.mkdir(trackdir) if os.path.exists(trackfile): E.debug('reading existing tracks from %s' % trackfile) with IOTools.openFile(trackfile) as infile: tracks = PipelineUCSC.readTrackFile(infile) else: tracks = [] tracks = collections.OrderedDict(tracks) def getName(name): if name.endswith(".bam"): return "bam", name elif name.endswith(".bw") or name.endswith(".bigwig"): return "bigWig", name else: return None, None for targetdir, filenames in export_files.items(): for src in filenames: dest = os.path.join(trackdir, prefix + os.path.basename(src)) dest = os.path.abspath(dest) # create a symlink if not os.path.exists(dest): try: os.symlink(os.path.abspath(src), dest) except OSError, msg: E.warn("could not create symlink from %s to %s: %s" % (os.path.abspath(src), dest, msg)) ucsctype, trackname = getName(os.path.basename(dest)) # ignore invalid types and other files (.bai files, ...) if ucsctype is None: continue tracks[trackname] = (("bigDataUrl", os.path.basename(dest)), ("shortLabel", trackname), ("longLabel", trackname), ("type", ucsctype))
def publish_tracks(export_files, prefix="", project_id=None, project_name=None): """publish a UCSC Track Hub. This method takes a dictionary of file types associated with files. For each file, a link will be created in the upload directory. The track will be stored under a project name, which will be derived from the location of the working directory. Information about the genome, the upload directory, etc. will be taken from the global configuration dictionary. For example, calling the following code in a pipeline executed in .../proj013/mapping:: export_files = { "bamfiles": glob.glob("*/*.bam") + glob.glob("*/*.bam.bai"), "bigwigfiles": glob.glob("*/*.bw"), } publish_tracks(export_files) will create a hub file at :file:`<uploaddir>/OBFUSID/mapping/ucsc.hub`, where OBFUSID is the obfuscated directory entry in the CGAT download directory for a particular project. Arguments --------- export_files : dict Dictionary mapping filetypes to files. prefix : string will be added to each track. project_id : string The project identifier. If not given, it will be taken from the path of the project directory. project_name : string The project name, typically the project number. If not given, it will be taken from the current directory. """ # the import is located here to avoid cyclical dependencies # between Local.py, Pipeline.py and PipelineUCSC.py import CGATPipelines.PipelineUCSC as PipelineUCSC if not prefix: prefix = PARAMS.get("report_prefix", "") web_dir = PARAMS["web_dir"] if project_id is None: project_id = getProjectId() if project_name is None: project_name = getProjectName() src_export = os.path.abspath("export") dest_report = prefix + "report" dest_export = prefix + "export" hubdir = os.path.join(PARAMS["web_dir"], "ucsc") if not os.path.exists(hubdir): E.info("creating %s" % hubdir) os.mkdir(hubdir) # write the UCSC hub file hubfile = os.path.join(hubdir, "hub.txt") genomesfile = os.path.join(hubdir, "genomes.txt") trackdir = os.path.join(hubdir, PARAMS["genome"]) trackfile = os.path.join(hubdir, PARAMS["genome"], "trackDb.txt") trackrelpath = os.path.join(PARAMS["genome"], "trackDb.txt") if os.path.exists(hubfile): with IOTools.openFile(hubfile) as infile: hubdata = PipelineUCSC.readUCSCFile(infile) else: hubdata = [ ("hub", "CGAT-" + project_name), ("shortLabel", "CGAT-" + project_name), ("longLabel", "Data for CGAT project %s" % project_name), ("genomesFile", "genomes.txt"), ("email", "*****@*****.**"), ] E.info("writing to %s" % hubfile) with IOTools.openFile(hubfile, "w") as outfile: PipelineUCSC.writeUCSCFile(outfile, hubdata) # create the genomes.txt file - append to it if necessary. if os.path.exists(genomesfile): with IOTools.openFile(genomesfile) as infile: genomes = PipelineUCSC.readUCSCFile(infile) else: genomes = [] if ("genome", PARAMS["genome"]) not in genomes: genomes.append(("genome", PARAMS["genome"])) genomes.append(("trackDb", trackrelpath)) E.info("writing to %s" % genomesfile) with IOTools.openFile(genomesfile, "w") as outfile: PipelineUCSC.writeUCSCFile(outfile, genomes) # create the track data if not os.path.exists(trackdir): os.mkdir(trackdir) if os.path.exists(trackfile): E.debug("reading existing tracks from %s" % trackfile) with IOTools.openFile(trackfile) as infile: tracks = PipelineUCSC.readTrackFile(infile) else: tracks = [] tracks = collections.OrderedDict(tracks) def getName(name): if name.endswith(".bam"): return "bam", name elif name.endswith(".bw") or name.endswith(".bigwig"): return "bigWig", name else: return None, None for targetdir, filenames in export_files.items(): for src in filenames: dest = os.path.join(trackdir, prefix + os.path.basename(src)) dest = os.path.abspath(dest) # create a symlink if not os.path.exists(dest): try: os.symlink(os.path.abspath(src), dest) except OSError, msg: E.warn("could not create symlink from %s to %s: %s" % (os.path.abspath(src), dest, msg)) ucsctype, trackname = getName(os.path.basename(dest)) # ignore invalid types and other files (.bai files, ...) if ucsctype is None: continue tracks[trackname] = ( ("bigDataUrl", os.path.basename(dest)), ("shortLabel", trackname), ("longLabel", trackname), ("type", ucsctype), )