def _prep_s3_directories(args, buckets):
    """Map input directories into stable S3 buckets and folders for storing files.
    """
    dirs = set([])

    def _get_dirs(fname, context, remap_dict):
        dirs.add(os.path.normpath(os.path.dirname(os.path.abspath(fname))))

    remap.walk_files(args, _get_dirs, {}, pass_dirs=True)
    work_dir, biodata_dir = _get_known_dirs(args)
    out = {}
    external_count = 0
    for d in sorted(dirs):
        if work_dir and d.startswith(work_dir):
            folder = d.replace(work_dir, "")
            folder = folder[1:] if folder.startswith("/") else folder
            out[d] = {"bucket": buckets["run"], "folder": folder}
        elif biodata_dir and d.startswith(biodata_dir):
            folder = d.replace(biodata_dir, "")
            folder = folder[1:] if folder.startswith("/") else folder
            out[d] = {"bucket": buckets["biodata"], "folder": folder}
        else:
            folder = os.path.join("externalmap", str(external_count))
            out[d] = {"bucket": buckets["run"], "folder": folder}
            external_count += 1
    return out
Exemple #2
0
def _prep_s3_directories(args, buckets):
    """Map input directories into stable S3 buckets and folders for storing files.
    """
    dirs = set([])
    def _get_dirs(fname, context, remap_dict):
        dirs.add(os.path.normpath(os.path.dirname(os.path.abspath(fname))))
    remap.walk_files(args, _get_dirs, {}, pass_dirs=True)
    work_dir, biodata_dir = _get_known_dirs(args)
    out = {}
    external_count = 0
    for d in sorted(dirs):
        if work_dir and d.startswith(work_dir):
            folder = d.replace(work_dir, "")
            folder = folder[1:] if folder.startswith("/") else folder
            out[d] = {"bucket": buckets["run"],
                      "folder": folder}
        elif biodata_dir and d.startswith(biodata_dir):
            folder = d.replace(biodata_dir, "")
            folder = folder[1:] if folder.startswith("/") else folder
            out[d] = {"bucket": buckets["biodata"],
                      "folder": folder}
        else:
            folder = os.path.join("externalmap", str(external_count))
            out[d] = {"bucket": buckets["run"],
                      "folder": folder}
            external_count += 1
    return out
def _remap_dict_shared(workdir, new_workdir, args):
    """Prepare a remap dictionary with directories we should potential copy files from.
    """
    ignore_keys = set(["algorithm"])
    out = {workdir: new_workdir}
    def _update_remap(fname, context, remap_dict):
        """Updated list of directories we should potentially be remapping in.
        """
        if not fname.startswith(tuple(out.keys())) and context and context[0] not in ignore_keys:
            dirname = os.path.normpath(os.path.dirname(fname))
            local_dir = utils.safe_makedir(os.path.join(new_workdir, "external", str(len(out))))
            out[dirname] = local_dir
    remap.walk_files(args, _update_remap, {})
    return out
Exemple #4
0
def to_s3(args, config):
    """Ship required processing files to S3 for running on non-shared filesystem Amazon instances.
    """
    dir_to_s3 = _prep_s3_directories(args, config["buckets"])
    conn = boto.connect_s3()
    args = _remove_empty(remap.walk_files(args, _remap_and_ship(conn), dir_to_s3, pass_dirs=True))
    return args
Exemple #5
0
def _remap_dict_shared(workdir, new_workdir, args):
    """Prepare a remap dictionary with directories we should potential copy files from.
    """
    ignore_keys = set(["algorithm"])
    out = {workdir: new_workdir}

    def _update_remap(fname, context, remap_dict):
        """Updated list of directories we should potentially be remapping in.
        """
        if not fname.startswith(tuple(
                out.keys())) and context and context[0] not in ignore_keys:
            dirname = os.path.normpath(os.path.dirname(fname))
            local_dir = utils.safe_makedir(
                os.path.join(new_workdir, "external", str(len(out))))
            out[dirname] = local_dir

    remap.walk_files(args, _update_remap, {})
    return out
 def _do(out):
     if remap_dict:
         new_remap_dict = {v: k for k, v in remap_dict.items()}
         new_out = (remap.walk_files(out, _remap_copy_file(parallel), new_remap_dict)
                    if out else None)
         if os.path.exists(workdir):
             shutil.rmtree(workdir)
         return new_out
     else:
         return out
Exemple #7
0
 def _do(out):
     if remap_dict:
         new_remap_dict = {v: k for k, v in remap_dict.items()}
         new_out = (remap.walk_files(out, _remap_copy_file(parallel),
                                     new_remap_dict) if out else None)
         if os.path.exists(workdir):
             shutil.rmtree(workdir)
         return new_out
     else:
         return out
Exemple #8
0
def to_s3(args, config):
    """Ship required processing files to S3 for running on non-shared filesystem Amazon instances.
    """
    dir_to_s3 = _prep_s3_directories(args, config["buckets"])
    conn = boto.connect_s3()
    args = _remove_empty(
        remap.walk_files(args,
                         _remap_and_ship(conn),
                         dir_to_s3,
                         pass_dirs=True))
    return args
def _create_workdir_shared(workdir, args, parallel, tmpdir=None):
    """Create a work directory given inputs from the shared filesystem.

    If tmpdir is not None, we create a local working directory within the
    temporary space so IO and processing occurs there, remapping the input
    argument paths at needed.
    """
    if not tmpdir:
        return workdir, {}, args
    else:
        new_workdir = utils.safe_makedir(os.path.join(tmpdir, "bcbio-work-%s" % uuid.uuid1()))
        remap_dict = _remap_dict_shared(workdir, new_workdir, args)
        new_args = remap.walk_files(args, _remap_copy_file(parallel), remap_dict)
        return new_workdir, remap_dict, new_args
Exemple #10
0
def _create_workdir_shared(workdir, args, parallel, tmpdir=None):
    """Create a work directory given inputs from the shared filesystem.

    If tmpdir is not None, we create a local working directory within the
    temporary space so IO and processing occurs there, remapping the input
    argument paths at needed.
    """
    if not tmpdir:
        return workdir, {}, args
    else:
        new_workdir = utils.safe_makedir(
            os.path.join(tmpdir, "bcbio-work-%s" % uuid.uuid1()))
        remap_dict = _remap_dict_shared(workdir, new_workdir, args)
        new_args = remap.walk_files(args, _remap_copy_file(parallel),
                                    remap_dict)
        return new_workdir, remap_dict, new_args
def _unpack_s3(bucket, args):
    """Create local directory in current directory with pulldowns from S3.
    """
    local_dir = utils.safe_makedir(os.path.join(os.getcwd(), bucket))
    remote_key = "s3://%s" % bucket
    def _get_s3(orig_fname, context, remap_dict):
        """Pull down s3 published data locally for processing.
        """
        if orig_fname.startswith(remote_key):
            for fname in utils.file_plus_index(orig_fname):
                out_fname = fname.replace(remote_key, local_dir)
                keyname = fname.replace(remote_key + "/", "")
                _get_s3(out_fname, keyname, bucket)
            return orig_fname.replace(remote_key, local_dir)
        else:
            return orig_fname
    new_args = remap.walk_files(args, _get_s3, {remote_key: local_dir})
    return local_dir, new_args
Exemple #12
0
def _unpack_s3(bucket, args):
    """Create local directory in current directory with pulldowns from S3.
    """
    local_dir = utils.safe_makedir(os.path.join(os.getcwd(), bucket))
    remote_key = "s3://%s" % bucket

    def _get_s3(orig_fname, context, remap_dict):
        """Pull down s3 published data locally for processing.
        """
        if orig_fname.startswith(remote_key):
            if context[0] in ["reference", "genome_resources", "sam_ref"]:
                cur_dir = os.path.join(local_dir, "genomes")
            else:
                cur_dir = local_dir
            for fname in utils.file_plus_index(orig_fname):
                out_fname = fname.replace(remote_key, cur_dir)
                keyname = fname.replace(remote_key + "/", "")
                _transfer_s3(out_fname, keyname, bucket)
            return orig_fname.replace(remote_key, cur_dir)
        else:
            return orig_fname

    new_args = remap.walk_files(args, _get_s3, {remote_key: local_dir})
    return local_dir, new_args
def _prep_s3_directories(args):
    dirs = set([])
    def _get_dirs(fname, context, remap_dict):
        dirs.add(os.path.normpath(os.path.dirname(os.path.abspath(fname))))
    remap.walk_files(args, _get_dirs, {})
    print dirs