コード例 #1
0
ファイル: pipeline_motifs.py プロジェクト: wbyu/CGATPipelines
def publish():
    '''publish files.'''
    # publish web pages

    P.publish_report()

    # publish additional data
    web_dir = PARAMS["web_dir"]
    project_id = P.getProjectId()

    # directory, files
    exportfiles = {
        "intervals":
        glob.glob(os.path.join(PARAMS["exportdir"], "bed", "*.bed.gz")) +
        glob.glob(os.path.join(PARAMS["exportdir"], "bed", "*.bed.gz.tbi")),
    }

    bams = []

    for targetdir, filenames in exportfiles.items():
        if len(filenames) == 0:
            E.warn("no files for target '%s'" % targetdir)
        for src in filenames:
            dest = "%s/%s/%s" % (web_dir, targetdir, os.path.basename(src))
            if dest.endswith(".bam"):
                bams.append(dest)
            dest = os.path.abspath(dest)
            destdir = os.path.dirname(dest)
            if not os.path.exists(destdir):
                os.makedirs(destdir)

            if not os.path.exists(dest):
                E.debug("creating symlink from %s to %s" % (src, dest))
                os.symlink(os.path.abspath(src), dest)
コード例 #2
0
def publish():
    '''publish files.'''
    # publish web pages

    P.publish_report()

    # publish additional data
    web_dir = PARAMS["web_dir"]
    project_id = P.getProjectId()

    # directory, files
    exportfiles = {
        "intervals": glob.glob(os.path.join(
            PARAMS["exportdir"], "bed", "*.bed.gz")) +
        glob.glob(os.path.join(PARAMS["exportdir"], "bed", "*.bed.gz.tbi")),
    }

    bams = []

    for targetdir, filenames in exportfiles.iteritems():
        if len(filenames) == 0:
            E.warn("no files for target '%s'" % targetdir)
        for src in filenames:
            dest = "%s/%s/%s" % (web_dir, targetdir, os.path.basename(src))
            if dest.endswith(".bam"):
                bams.append(dest)
            dest = os.path.abspath(dest)
            destdir = os.path.dirname(dest)
            if not os.path.exists(destdir):
                os.makedirs(destdir)

            if not os.path.exists(dest):
                E.debug("creating symlink from %s to %s" % (src, dest))
                os.symlink(os.path.abspath(src), dest)
コード例 #3
0
def buildExperimentTable(infiles, outfile):

    d = os.getcwd()
    try:
        project_id = P.getProjectId()
    except ValueError:
        project_id = "unknown"
    with IOTools.openFile(outfile, "w") as outf:
        outf.write("id\tname\tproject_id\tdirectory\ttitle\n")
        outf.write("\t".join(("1", P.getProjectName(), project_id, d, PARAMS.get("title", ""))) + "\n")
コード例 #4
0
def publish():
    '''publish files.'''

    # publish web pages
    P.publish_report()

    # publish additional data
    web_dir = PARAMS["web_dir"]
    project_id = P.getProjectId()

    ucsc_urls = {
        "bam":
        """track type=bam name="%(track)s" bigDataUrl=http://www.cgat.org/downloads/%(project_id)s/%(dirname)s/%(filename)s""",
        "bigwig":
        """track type=bigWig name="%(track)s" bigDataUrl=http://www.cgat.org/downloads/%(project_id)s/%(dirname)s/%(filename)s""",
    }

    # directory, files
    exportfiles = (
        ("bamfiles",
         glob.glob("*/*.genome.bam") + glob.glob("*/*.genome.bam.bai"), "bam"),
        ("bamfiles", glob.glob("*/*.prep.bam") + glob.glob("*/*.prep.bam.bai"),
         "bam"),
        ("medips", glob.glob("*/*.bigwig"), "bigwig"),
    )

    ucsc_files = []

    for targetdir, filenames, datatype in exportfiles:
        for src in filenames:
            filename = os.path.basename(src)
            dest = "%s/%s/%s" % (web_dir, targetdir, filename)
            suffix = os.path.splitext(src)
            if suffix in ucsc_urls:
                ucsc_files.append((datatype, targetdir, filename))
            dest = os.path.abspath(dest)
            if not os.path.exists(dest):
                os.symlink(os.path.abspath(src), dest)

    # output ucsc links
    for ucsctype, dirname, filename in ucsc_files:
        filename = os.path.basename(filename)
        track = P.snip(filename, ucsctype)
        print(ucsc_urls[ucsctype] % locals())
コード例 #5
0
def publish():
    '''publish files.'''

    # publish web pages
    P.publish_report()

    # publish additional data
    web_dir = PARAMS["web_dir"]
    project_id = P.getProjectId()

    ucsc_urls = {
        "bam":
        """track type=bam name="%(track)s" bigDataUrl=http://www.cgat.org/downloads/%(project_id)s/%(dirname)s/%(filename)s""",
        "bigwig":
        """track type=bigWig name="%(track)s" bigDataUrl=http://www.cgat.org/downloads/%(project_id)s/%(dirname)s/%(filename)s""",
    }

    # directory, files
    exportfiles = (
        ("bamfiles", glob.glob("*/*.genome.bam") +
         glob.glob("*/*.genome.bam.bai"), "bam"),
        ("bamfiles", glob.glob("*/*.prep.bam") +
         glob.glob("*/*.prep.bam.bai"), "bam"),
        ("medips", glob.glob("*/*.bigwig"), "bigwig"),
    )

    ucsc_files = []

    for targetdir, filenames, datatype in exportfiles:
        for src in filenames:
            filename = os.path.basename(src)
            dest = "%s/%s/%s" % (web_dir, targetdir, filename)
            suffix = os.path.splitext(src)
            if suffix in ucsc_urls:
                ucsc_files.append((datatype, targetdir, filename))
            dest = os.path.abspath(dest)
            if not os.path.exists(dest):
                os.symlink(os.path.abspath(src), dest)

    # output ucsc links
    for ucsctype, dirname, filename in ucsc_files:
        filename = os.path.basename(filename)
        track = P.snip(filename, ucsctype)
        print(ucsc_urls[ucsctype] % locals())
コード例 #6
0
def publish_tracks(export_files,
                   prefix="",
                   project_id=None,
                   project_name=None):
    '''publish a UCSC Track Hub.

    *export_files* is a dictionary of filetypes and files.
    *prefix* will be added to each track.

    '''

    if not prefix:
        prefix = PARAMS.get("report_prefix", "")

    web_dir = PARAMS["web_dir"]
    if project_id is None:
        project_id = P.getProjectId()
    if project_name is None:
        project_name = P.getProjectName()

    src_export = os.path.abspath("export")
    dest_report = prefix + "report"
    dest_export = prefix + "export"

    hubdir = os.path.join(PARAMS["web_dir"], "ucsc")

    if not os.path.exists(hubdir):
        E.info("creating %s" % hubdir)
        os.mkdir(hubdir)

    # write the UCSC hub file
    hubfile = os.path.join(hubdir, "hub.txt")
    genomesfile = os.path.join(hubdir, "genomes.txt")
    trackdir = os.path.join(hubdir, PARAMS["genome"])
    trackfile = os.path.join(hubdir, PARAMS["genome"], "trackDb.txt")
    trackrelpath = os.path.join(PARAMS["genome"], "trackDb.txt")

    if os.path.exists(hubfile):
        with IOTools.openFile(hubfile) as infile:
            hubdata = PipelineUCSC.readUCSCFile(infile)
    else:
        hubdata = [('hub', "CGAT-" + project_name),
                   ('shortLabel', "CGAT-" + project_name),
                   ('longLabel', "Data for CGAT project %s" % project_name),
                   ('genomesFile', "genomes.txt"),
                   ('email', '*****@*****.**')]

    E.info("writing to %s" % hubfile)
    with IOTools.openFile(hubfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, hubdata)

    # create the genomes.txt file - append to it if necessary.
    if os.path.exists(genomesfile):
        with IOTools.openFile(genomesfile) as infile:
            genomes = PipelineUCSC.readUCSCFile(infile)
    else:
        genomes = []

    if ("genome", PARAMS["genome"]) not in genomes:
        genomes.append(("genome", PARAMS["genome"]))
        genomes.append(("trackDb", trackrelpath))

    E.info("writing to %s" % genomesfile)
    with IOTools.openFile(genomesfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, genomes)

    # create the track data
    if not os.path.exists(trackdir):
        os.mkdir(trackdir)

    if os.path.exists(trackfile):
        E.debug('reading existing tracks from %s' % trackfile)
        with IOTools.openFile(trackfile) as infile:
            tracks = PipelineUCSC.readTrackFile(infile)
    else:
        tracks = []

    tracks = collections.OrderedDict(tracks)

    def getName(name):
        if name.endswith(".bam"):
            return "bam", name
        elif name.endswith(".bw") or name.endswith(".bigwig"):
            return "bigWig", name
        else:
            return None, None

    for targetdir, filenames in export_files.items():
        for src in filenames:
            dest = os.path.join(trackdir, prefix + os.path.basename(src))
            dest = os.path.abspath(dest)
            # create a symlink
            if not os.path.exists(dest):
                try:
                    os.symlink(os.path.abspath(src), dest)
                except OSError, msg:
                    E.warn("could not create symlink from %s to %s: %s" %
                           (os.path.abspath(src), dest, msg))
            ucsctype, trackname = getName(os.path.basename(dest))
            # ignore invalid types and other files (.bai files, ...)
            if ucsctype is None:
                continue
            tracks[trackname] = (("bigDataUrl", os.path.basename(dest)),
                                 ("shortLabel", trackname),
                                 ("longLabel", trackname),
                                 ("type", ucsctype))