def _prep_s3_directories(args, buckets): """Map input directories into stable S3 buckets and folders for storing files. """ dirs = set([]) def _get_dirs(fname, context, remap_dict): dirs.add(os.path.normpath(os.path.dirname(os.path.abspath(fname)))) remap.walk_files(args, _get_dirs, {}, pass_dirs=True) work_dir, biodata_dir = _get_known_dirs(args) out = {} external_count = 0 for d in sorted(dirs): if work_dir and d.startswith(work_dir): folder = d.replace(work_dir, "") folder = folder[1:] if folder.startswith("/") else folder out[d] = {"bucket": buckets["run"], "folder": folder} elif biodata_dir and d.startswith(biodata_dir): folder = d.replace(biodata_dir, "") folder = folder[1:] if folder.startswith("/") else folder out[d] = {"bucket": buckets["biodata"], "folder": folder} else: folder = os.path.join("externalmap", str(external_count)) out[d] = {"bucket": buckets["run"], "folder": folder} external_count += 1 return out
def _remap_dict_shared(workdir, new_workdir, args): """Prepare a remap dictionary with directories we should potential copy files from. """ ignore_keys = set(["algorithm"]) out = {workdir: new_workdir} def _update_remap(fname, context, remap_dict): """Updated list of directories we should potentially be remapping in. """ if not fname.startswith(tuple(out.keys())) and context and context[0] not in ignore_keys: dirname = os.path.normpath(os.path.dirname(fname)) local_dir = utils.safe_makedir(os.path.join(new_workdir, "external", str(len(out)))) out[dirname] = local_dir remap.walk_files(args, _update_remap, {}) return out
def to_s3(args, config): """Ship required processing files to S3 for running on non-shared filesystem Amazon instances. """ dir_to_s3 = _prep_s3_directories(args, config["buckets"]) conn = boto.connect_s3() args = _remove_empty(remap.walk_files(args, _remap_and_ship(conn), dir_to_s3, pass_dirs=True)) return args
def _remap_dict_shared(workdir, new_workdir, args): """Prepare a remap dictionary with directories we should potential copy files from. """ ignore_keys = set(["algorithm"]) out = {workdir: new_workdir} def _update_remap(fname, context, remap_dict): """Updated list of directories we should potentially be remapping in. """ if not fname.startswith(tuple( out.keys())) and context and context[0] not in ignore_keys: dirname = os.path.normpath(os.path.dirname(fname)) local_dir = utils.safe_makedir( os.path.join(new_workdir, "external", str(len(out)))) out[dirname] = local_dir remap.walk_files(args, _update_remap, {}) return out
def _do(out): if remap_dict: new_remap_dict = {v: k for k, v in remap_dict.items()} new_out = (remap.walk_files(out, _remap_copy_file(parallel), new_remap_dict) if out else None) if os.path.exists(workdir): shutil.rmtree(workdir) return new_out else: return out
def to_s3(args, config): """Ship required processing files to S3 for running on non-shared filesystem Amazon instances. """ dir_to_s3 = _prep_s3_directories(args, config["buckets"]) conn = boto.connect_s3() args = _remove_empty( remap.walk_files(args, _remap_and_ship(conn), dir_to_s3, pass_dirs=True)) return args
def _create_workdir_shared(workdir, args, parallel, tmpdir=None): """Create a work directory given inputs from the shared filesystem. If tmpdir is not None, we create a local working directory within the temporary space so IO and processing occurs there, remapping the input argument paths at needed. """ if not tmpdir: return workdir, {}, args else: new_workdir = utils.safe_makedir(os.path.join(tmpdir, "bcbio-work-%s" % uuid.uuid1())) remap_dict = _remap_dict_shared(workdir, new_workdir, args) new_args = remap.walk_files(args, _remap_copy_file(parallel), remap_dict) return new_workdir, remap_dict, new_args
def _create_workdir_shared(workdir, args, parallel, tmpdir=None): """Create a work directory given inputs from the shared filesystem. If tmpdir is not None, we create a local working directory within the temporary space so IO and processing occurs there, remapping the input argument paths at needed. """ if not tmpdir: return workdir, {}, args else: new_workdir = utils.safe_makedir( os.path.join(tmpdir, "bcbio-work-%s" % uuid.uuid1())) remap_dict = _remap_dict_shared(workdir, new_workdir, args) new_args = remap.walk_files(args, _remap_copy_file(parallel), remap_dict) return new_workdir, remap_dict, new_args
def _unpack_s3(bucket, args): """Create local directory in current directory with pulldowns from S3. """ local_dir = utils.safe_makedir(os.path.join(os.getcwd(), bucket)) remote_key = "s3://%s" % bucket def _get_s3(orig_fname, context, remap_dict): """Pull down s3 published data locally for processing. """ if orig_fname.startswith(remote_key): for fname in utils.file_plus_index(orig_fname): out_fname = fname.replace(remote_key, local_dir) keyname = fname.replace(remote_key + "/", "") _get_s3(out_fname, keyname, bucket) return orig_fname.replace(remote_key, local_dir) else: return orig_fname new_args = remap.walk_files(args, _get_s3, {remote_key: local_dir}) return local_dir, new_args
def _unpack_s3(bucket, args): """Create local directory in current directory with pulldowns from S3. """ local_dir = utils.safe_makedir(os.path.join(os.getcwd(), bucket)) remote_key = "s3://%s" % bucket def _get_s3(orig_fname, context, remap_dict): """Pull down s3 published data locally for processing. """ if orig_fname.startswith(remote_key): if context[0] in ["reference", "genome_resources", "sam_ref"]: cur_dir = os.path.join(local_dir, "genomes") else: cur_dir = local_dir for fname in utils.file_plus_index(orig_fname): out_fname = fname.replace(remote_key, cur_dir) keyname = fname.replace(remote_key + "/", "") _transfer_s3(out_fname, keyname, bucket) return orig_fname.replace(remote_key, cur_dir) else: return orig_fname new_args = remap.walk_files(args, _get_s3, {remote_key: local_dir}) return local_dir, new_args
def _prep_s3_directories(args): dirs = set([]) def _get_dirs(fname, context, remap_dict): dirs.add(os.path.normpath(os.path.dirname(os.path.abspath(fname)))) remap.walk_files(args, _get_dirs, {}) print dirs