Ejemplo n.º 1
0
def consolidate_output(job, config, output):
    """
    Combines the contents of the outputs into one tarball and places in output directory or s3

    :param JobFunctionWrappingJob job: passed automatically by Toil
    :param Expando config: Dict-like object containing workflow options as attributes
    :param dict(str, str) output:
    """
    # Collect all tarballs from fileStore
    tars = {}
    for tool, filestore_id in output.iteritems():
        tars[os.path.join(config.uuid,
                          tool)] = job.fileStore.readGlobalFile(filestore_id)

    # Consolidate tarballs into one output tar as streams (to avoid unnecessary decompression)
    out_tar = os.path.join(job.tempDir, config.uuid + '.tar.gz')
    with tarfile.open(out_tar, 'w:gz') as f_out:
        for name, tar in tars.iteritems():
            with tarfile.open(tar, 'r') as f_in:
                for tarinfo in f_in:
                    with closing(f_in.extractfile(tarinfo)) as f_in_file:
                        tarinfo.name = os.path.join(
                            name, os.path.basename(tarinfo.name))
                        f_out.addfile(tarinfo, fileobj=f_in_file)

    # Move to output location
    IOStore.get(congif.filestore).write_output_file(out_tar,
                                                    config.uuid + '.tar.gz')
Ejemplo n.º 2
0
def rsync_to_store(job, name, options=None, url=None, file_ending=".gz"):
    if name in databases:
        _url, _options = databases[name]
        url = url or _url
        options = options or _options
    elif url is None or options is None or not isinstance(options, (list,tuple)):
        raise RuntimeError("Invalid call to rync_rstore")

    work_dir = job.fileStore.getLocalTempDir()

    prefix = job.fileStore.jobStore.config.jobStore.rsplit(":", 1)[0]
    jobStore = IOStore.get("{}:molmimic-{}".format(prefix, name))

    outpath = os.path.join(work_dir, name)
    if not os.path.isdir(outpath):
        os.makedirs(outpath)

    arguments = list(options)+[url, outpath]

    def add_to_jobstore(key):
        if key and key.endswith(file_ending):
            full_path = os.path.join(outpath, key)
            if os.path.isfile(full_path):
                jobStore.write_output_file(full_path, key)

    call_rsync(arguments, callback=add_to_jobstore)
Ejemplo n.º 3
0
def download_mmdb(job, preemptable=True):
    work_dir = job.fileStore.getLocalTempDir()
    jobStore = IOStore.get(get_jobstore_name(job, "ibis"))

    mmdb_path = os.path.join(job._fileStore.getLocalTempDir(), "MMDB.h5")
    subprocess.check_call(["wget", "-N", "-O", mmdb_path,
        "https://www.dropbox.com/s/l42w7qq3kixq4v9/MMDB.h5?dl=0"])
    jobStore.write_output_file(mmdb_path, "MMDB.h5")
Ejemplo n.º 4
0
def download_ibis_inf(job, preemptable=True):
    work_dir = job.fileStore.getLocalTempDir()
    jobStore = IOStore.get(get_jobstore_name(job, "ibis"))

    ibis_inf_path = os.path.join(work_dir, "IBIS_inferred.h5")
    subprocess.check_call(["wget", "-N", "-O", ibis_inf_path,
        "https://www.dropbox.com/s/0rzk2yyspurqbnw/IBIS_inferred.h5?dl=0"])
    jobStore.write_output_file(ibis_inf_path, "IBIS_inferred.h5")
Ejemplo n.º 5
0
def download_ibis_obs(job, preemptable=True):
    work_dir = job.fileStore.getLocalTempDir()
    jobStore = IOStore.get(get_jobstore_name(job, "ibis"))

    ibis_obs_path = os.path.join(work_dir, "IBIS_observed.h5")
    subprocess.check_call(["wget", "-N", "-O", ibis_obs_path,
        "https://www.dropbox.com/s/47agor1gx0qewr0/IBIS_observed.h5?dl=0"])
    jobStore.write_output_file(ibis_obs_path, "IBIS_observed.h5")
Ejemplo n.º 6
0
def download_pdb(job):
    work_dir = job.fileStore.getLocalTempDir()
    jobStore = IOStore.get(get_jobstore_name(job, "pdb"))

    pdb_path = os.path.join(work_dir, "pdb")
    os.makedirs(pdb_path)
    subprocess.check_call(["rsync", "-rlpt", "-v", "-z", "--delete", "--port=33444",
        "rsync.wwpdb.org::ftp_data/structures/divided/pdb/", pdb_path])

    add_directory_to_jobstore(jobStore, pdb_path)
Ejemplo n.º 7
0
def download_sifts(job, force=False):
    work_dir = job.fileStore.getLocalTempDir()
    prefix = job.fileStore.jobStore.config.jobStore.rsplit(":", 1)[0]
    jobStore = IOStore.get("{}:molmimic-sifts".format(prefix))

    sifts_path = os.path.join(work_dir, "sifts")
    if not os.path.isdir(sifts_path):
        os.makedirs(sifts_path)
    subprocess.check_call(["rsync", "-vrltH", "--delete", "--stats", "-D", "--numeric-ids",
        "rsync.ebi.ac.uk::pub/databases/msd/sifts/split_xml/", sifts_path])

    add_directory_to_jobstore(jobStore, sifts_path)
Ejemplo n.º 8
0
def compare_rsync_to_store(job, name, options=None, url=None, ending=".gz", no_retries=False, can_retry=True):
    if name in databases:
        _url, _options = databases[name]
        url = url or _url
        options = options or _options
    elif url is None or options is None or not isinstance(options, (list,tuple)):
        raise RuntimeError("Invalid call to compare_rsync_to_store")

    work_dir = job.fileStore.getLocalTempDir()

    prefix = job.fileStore.jobStore.config.jobStore.rsplit(":", 1)[0]
    jobStore = IOStore.get("{}:molmimic-{}".format(prefix, name))

    outpath = os.path.join(work_dir, name)
    if not os.path.isdir(outpath):
        os.makedirs(outpath)

    num_saved = jobStore.get_number_of_items()

    arguments = list(options)+["--dry-run", "--stats", url, outpath]

    total_num = None
    def get_total_num(line, total):
        job.log(line)
        if line.startswith("Number of files:"):
            job.log("SAVING LINE")
            total = int(line.split(": ")[2][-5].replace(",", ""))
            return True

    output = call_rsync(arguments, callback=get_total_num, args=[total_num])

    for line in output.split("\n"):
        get_total_num(line, total_num)

    job.log("COMPARE Saved: {}, Total: {}".format(num_saved, total_num))
    job.log("OUTPUT: {}".format(output))
    if can_retry and num_saved<total_num:
        job.log("RERUNNING {}".format(name))
        sync_job = job.addChildJobFn(rsync_to_store, name, options, url, ending)
        sync_job.addFollowOnJobFn(compare_rsync_to_store, name, options, url, ending,
            can_retry=False)

    return num_saved, total_num
Ejemplo n.º 9
0
def download_consurf(job):
    import shutil, requests, zipfile
    from cStringIO import StringIO
    from molmimic.parsers.Consurf import download_consurf as download_consurf_all

    work_dir = job.fileStore.getLocalTempDir()
    jobStore = IOStore.get(get_jobstore_name(job, "ConSurf"))

    #Download nr mapping
    pdb_nr = os.path.join(work_dir, "pdbaa_list.nr")
    r = requests.get("http://bental.tau.ac.il/new_ConSurfDB/ConSurfDB_list_feature.zip", stream=True)
    with zipfile.ZipFile(StringIO(r.content)) as z:
        with z.open("pdbaa_list.nr") as zf, open(pdb_nr, "w") as f:
            shutil.copyfileobj(zf, f)
    jobStore.write_output_file(pdb_nr, "pdbaa_list.nr")

    #Download all pdb consurf files
    consurf_path = os.path.join(work_dir, "ConSurf")
    os.path.mkdirs(consurf_path)
    download_consurf(consurf_path=consurf_path)
    add_directory_to_jobstore(jobStore, consurf_path)