def load_s3(sample_config): """Move a sample configuration locally, providing remote upload. """ with objectstore.open(sample_config) as in_handle: config = yaml.load(in_handle) r_sample_config = objectstore.parse_remote(sample_config) config["upload"] = { "method": "s3", "dir": os.path.join(os.pardir, "final"), "bucket": r_sample_config.bucket, "folder": os.path.join(os.path.dirname(r_sample_config.key), "final") } region = r_sample_config.region or objectstore.default_region( sample_config) if region: config["upload"]["region"] = region if not os.access(os.pardir, os.W_OK | os.X_OK): raise IOError( "Cannot write to the parent directory of work directory %s\n" "bcbio wants to store prepared uploaded files to %s\n" "We recommend structuring your project in a project specific directory structure\n" "with a specific work directory (mkdir -p your-project/work && cd your-project/work)." % (os.getcwd(), os.path.join(os.pardir, "final"))) config = _add_jar_resources(config, sample_config) out_file = os.path.join( utils.safe_makedir(os.path.join(os.getcwd(), "config")), os.path.basename(r_sample_config.key)) with open(out_file, "w") as out_handle: yaml.dump(config, out_handle, default_flow_style=False, allow_unicode=False) return out_file
def _write_config_file(items, global_vars, template, project_name, out_dir, remotes): """Write configuration file, adding required top level attributes. """ config_dir = utils.safe_makedir(os.path.join(out_dir, "config")) out_config_file = os.path.join(config_dir, "%s.yaml" % project_name) out = {"fc_date": datetime.datetime.now().strftime("%Y-%m-%d"), "fc_name": project_name, "upload": {"dir": "../final"}, "details": items} if remotes.get("base"): r_base = objectstore.parse_remote(remotes.get("base")) out["upload"]["method"] = r_base.store out["upload"]["bucket"] = r_base.bucket out["upload"]["folder"] = os.path.join(r_base.key, "final") if r_base.key else "final" if r_base.region: out["upload"]["region"] = r_base.region if global_vars: out["globals"] = global_vars for k, v in template.iteritems(): if k not in ["details"]: out[k] = v if os.path.exists(out_config_file): shutil.move(out_config_file, out_config_file + ".bak%s" % datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")) with open(out_config_file, "w") as out_handle: yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False) return out_config_file
def _upload_biodata(gbuild, target, all_dirs): """Upload biodata for a specific genome build and target to S3. """ if target == "seq": want_dirs = set([ "coverage", "editing", "prioritization", "rnaseq", "seq", "snpeff", "srnaseq", "validation", "variation", "vep" ]) target_dirs = [x for x in all_dirs if x in want_dirs] else: target_dirs = [x for x in all_dirs if x == target] target_dirs = [os.path.join(gbuild, x) for x in target_dirs] fname = objectstore.BIODATA_INFO["s3"].format(build=gbuild, target=target) remotef = objectstore.parse_remote(fname) conn = objectstore.connect(fname) bucket = conn.get_bucket(remotef.bucket) key = bucket.get_key(remotef.key) if not key: keyname = remotef.key bucketname = remotef.bucket target_dirs = " ".join(target_dirs) cmd = ( "tar -cvpf - {target_dirs} | pigz -c | " "gof3r put --no-md5 -k {keyname} -b {bucketname} " "-m x-amz-storage-class:REDUCED_REDUNDANCY -m x-amz-acl:public-read" ) do.run(cmd.format(**locals()), "Upload pre-prepared genome data: %s %s" % (gbuild, target))
def load_s3(sample_config): """Move a sample configuration locally, providing remote upload. """ with objectstore.open(sample_config) as in_handle: config = yaml.safe_load(in_handle) r_sample_config = objectstore.parse_remote(sample_config) config["upload"] = {"method": "s3", "dir": os.path.join(os.pardir, "final"), "bucket": r_sample_config.bucket, "folder": os.path.join(os.path.dirname(r_sample_config.key), "final")} region = r_sample_config.region or objectstore.default_region(sample_config) if region: config["upload"]["region"] = region if not os.access(os.pardir, os.W_OK | os.X_OK): raise IOError("Cannot write to the parent directory of work directory %s\n" "bcbio wants to store prepared uploaded files to %s\n" "We recommend structuring your project in a project specific directory structure\n" "with a specific work directory (mkdir -p your-project/work && cd your-project/work)." % (os.getcwd(), os.path.join(os.pardir, "final"))) config = _add_jar_resources(config, sample_config) out_file = os.path.join(utils.safe_makedir(os.path.join(os.getcwd(), "config")), os.path.basename(r_sample_config.key)) with open(out_file, "w") as out_handle: yaml.dump(config, out_handle, default_flow_style=False, allow_unicode=False) return out_file
def file_size(file_ref, config=None): """Retrieve file size in Mb. """ conn = objectstore.connect(file_ref) remote = objectstore.parse_remote(file_ref) bucket = conn.get_bucket(remote.bucket) key = bucket.lookup(remote.key) return key.size / (1024.0 * 1024.0)
def file_exists(file_ref, config): """Check for existence of a remote file, returning path if present """ conn = objectstore.connect(file_ref) remote = objectstore.parse_remote(file_ref) bucket = conn.get_bucket(remote.bucket) key = bucket.lookup(remote.key) if key: return file_ref
def upload_file_boto(fname, remote_fname, mditems=None): """Upload a file using boto instead of external tools. """ r_fname = objectstore.parse_remote(remote_fname) conn = objectstore.connect(remote_fname) bucket = conn.lookup(r_fname.bucket) if not bucket: bucket = conn.create_bucket(r_fname.bucket, location=objectstore.get_region(remote_fname)) key = bucket.get_key(r_fname.key, validate=False) if mditems is None: mditems = {} if "x-amz-server-side-encryption" not in mditems: mditems["x-amz-server-side-encryption"] = "AES256" for name, val in mditems.iteritems(): key.set_metadata(name, val) key.set_contents_from_filename(fname, encrypt_key=True)
def upload_file_boto(fname, remote_fname, mditems=None): """Upload a file using boto instead of external tools. """ r_fname = objectstore.parse_remote(remote_fname) conn = objectstore.connect(remote_fname) bucket = conn.lookup(r_fname.bucket) if not bucket: bucket = conn.create_bucket(r_fname.bucket) key = bucket.get_key(r_fname.key, validate=False) if mditems is None: mditems = {} if "x-amz-server-side-encryption" not in mditems: mditems["x-amz-server-side-encryption"] = "AES256" for name, val in mditems.iteritems(): key.set_metadata(name, val) key.set_contents_from_filename(fname, encrypt_key=True)
def _upload_biodata(gbuild, target, all_dirs): """Upload biodata for a specific genome build and target to S3. """ if target == "seq": want_dirs = set(["rnaseq", "seq", "variation", "vep", "snpeff"]) target_dirs = [x for x in all_dirs if (x.startswith("rnaseq-") or x in want_dirs)] else: target_dirs = [x for x in all_dirs if x == target] target_dirs = [os.path.join(gbuild, x) for x in target_dirs] fname = objectstore.BIODATA_INFO["s3"].format(build=gbuild, target=target) remotef = objectstore.parse_remote(fname) conn = objectstore.connect(fname) bucket = conn.get_bucket(remotef.bucket) key = bucket.get_key(remotef.key) if not key: keyname = remotef.key bucketname = remotef.bucket target_dirs = " ".join(target_dirs) cmd = ("tar -cvpf - {target_dirs} | pigz -c | " "gof3r put --no-md5 -k {keyname} -b {bucketname} " "-m x-amz-storage-class:REDUCED_REDUNDANCY -m x-amz-acl:public-read") do.run(cmd.format(**locals()), "Upload pre-prepared genome data: %s %s" % (gbuild, target))