Exemple #1
0
def load_s3(sample_config):
    """Move a sample configuration locally, providing remote upload.
    """
    with objectstore.open(sample_config) as in_handle:
        config = yaml.load(in_handle)
    r_sample_config = objectstore.parse_remote(sample_config)
    config["upload"] = {
        "method": "s3",
        "dir": os.path.join(os.pardir, "final"),
        "bucket": r_sample_config.bucket,
        "folder": os.path.join(os.path.dirname(r_sample_config.key), "final")
    }
    region = r_sample_config.region or objectstore.default_region(
        sample_config)
    if region:
        config["upload"]["region"] = region
    if not os.access(os.pardir, os.W_OK | os.X_OK):
        raise IOError(
            "Cannot write to the parent directory of work directory %s\n"
            "bcbio wants to store prepared uploaded files to %s\n"
            "We recommend structuring your project in a project specific directory structure\n"
            "with a specific work directory (mkdir -p your-project/work && cd your-project/work)."
            % (os.getcwd(), os.path.join(os.pardir, "final")))
    config = _add_jar_resources(config, sample_config)
    out_file = os.path.join(
        utils.safe_makedir(os.path.join(os.getcwd(), "config")),
        os.path.basename(r_sample_config.key))
    with open(out_file, "w") as out_handle:
        yaml.dump(config,
                  out_handle,
                  default_flow_style=False,
                  allow_unicode=False)
    return out_file
Exemple #2
0
def _write_config_file(items, global_vars, template, project_name, out_dir,
                       remotes):
    """Write configuration file, adding required top level attributes.
    """
    config_dir = utils.safe_makedir(os.path.join(out_dir, "config"))
    out_config_file = os.path.join(config_dir, "%s.yaml" % project_name)
    out = {"fc_date": datetime.datetime.now().strftime("%Y-%m-%d"),
           "fc_name": project_name,
           "upload": {"dir": "../final"},
           "details": items}
    if remotes.get("base"):
        r_base = objectstore.parse_remote(remotes.get("base"))
        out["upload"]["method"] = r_base.store
        out["upload"]["bucket"] = r_base.bucket
        out["upload"]["folder"] = os.path.join(r_base.key, "final") if r_base.key else "final"
        if r_base.region:
            out["upload"]["region"] = r_base.region
    if global_vars:
        out["globals"] = global_vars
    for k, v in template.iteritems():
        if k not in ["details"]:
            out[k] = v
    if os.path.exists(out_config_file):
        shutil.move(out_config_file,
                    out_config_file + ".bak%s" % datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))
    with open(out_config_file, "w") as out_handle:
        yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
    return out_config_file
Exemple #3
0
def _upload_biodata(gbuild, target, all_dirs):
    """Upload biodata for a specific genome build and target to S3.
    """
    if target == "seq":
        want_dirs = set([
            "coverage", "editing", "prioritization", "rnaseq", "seq", "snpeff",
            "srnaseq", "validation", "variation", "vep"
        ])
        target_dirs = [x for x in all_dirs if x in want_dirs]
    else:
        target_dirs = [x for x in all_dirs if x == target]
    target_dirs = [os.path.join(gbuild, x) for x in target_dirs]
    fname = objectstore.BIODATA_INFO["s3"].format(build=gbuild, target=target)
    remotef = objectstore.parse_remote(fname)
    conn = objectstore.connect(fname)
    bucket = conn.get_bucket(remotef.bucket)
    key = bucket.get_key(remotef.key)
    if not key:
        keyname = remotef.key
        bucketname = remotef.bucket
        target_dirs = " ".join(target_dirs)
        cmd = (
            "tar -cvpf - {target_dirs} | pigz -c | "
            "gof3r put --no-md5 -k {keyname} -b {bucketname} "
            "-m x-amz-storage-class:REDUCED_REDUNDANCY -m x-amz-acl:public-read"
        )
        do.run(cmd.format(**locals()),
               "Upload pre-prepared genome data: %s %s" % (gbuild, target))
Exemple #4
0
def load_s3(sample_config):
    """Move a sample configuration locally, providing remote upload.
    """
    with objectstore.open(sample_config) as in_handle:
        config = yaml.safe_load(in_handle)
    r_sample_config = objectstore.parse_remote(sample_config)
    config["upload"] = {"method": "s3",
                        "dir": os.path.join(os.pardir, "final"),
                        "bucket": r_sample_config.bucket,
                        "folder": os.path.join(os.path.dirname(r_sample_config.key), "final")}
    region = r_sample_config.region or objectstore.default_region(sample_config)
    if region:
        config["upload"]["region"] = region
    if not os.access(os.pardir, os.W_OK | os.X_OK):
        raise IOError("Cannot write to the parent directory of work directory %s\n"
                      "bcbio wants to store prepared uploaded files to %s\n"
                      "We recommend structuring your project in a project specific directory structure\n"
                      "with a specific work directory (mkdir -p your-project/work && cd your-project/work)."
                      % (os.getcwd(), os.path.join(os.pardir, "final")))
    config = _add_jar_resources(config, sample_config)
    out_file = os.path.join(utils.safe_makedir(os.path.join(os.getcwd(), "config")),
                            os.path.basename(r_sample_config.key))
    with open(out_file, "w") as out_handle:
        yaml.dump(config, out_handle, default_flow_style=False, allow_unicode=False)
    return out_file
def file_size(file_ref, config=None):
    """Retrieve file size in Mb.
    """
    conn = objectstore.connect(file_ref)
    remote = objectstore.parse_remote(file_ref)
    bucket = conn.get_bucket(remote.bucket)
    key = bucket.lookup(remote.key)
    return key.size / (1024.0 * 1024.0)
def file_exists(file_ref, config):
    """Check for existence of a remote file, returning path if present
    """
    conn = objectstore.connect(file_ref)
    remote = objectstore.parse_remote(file_ref)
    bucket = conn.get_bucket(remote.bucket)
    key = bucket.lookup(remote.key)
    if key:
        return file_ref
Exemple #7
0
def upload_file_boto(fname, remote_fname, mditems=None):
    """Upload a file using boto instead of external tools.
    """
    r_fname = objectstore.parse_remote(remote_fname)
    conn = objectstore.connect(remote_fname)
    bucket = conn.lookup(r_fname.bucket)
    if not bucket:
        bucket = conn.create_bucket(r_fname.bucket, location=objectstore.get_region(remote_fname))
    key = bucket.get_key(r_fname.key, validate=False)
    if mditems is None:
        mditems = {}
    if "x-amz-server-side-encryption" not in mditems:
        mditems["x-amz-server-side-encryption"] = "AES256"
    for name, val in mditems.iteritems():
        key.set_metadata(name, val)
    key.set_contents_from_filename(fname, encrypt_key=True)
Exemple #8
0
def upload_file_boto(fname, remote_fname, mditems=None):
    """Upload a file using boto instead of external tools.
    """
    r_fname = objectstore.parse_remote(remote_fname)
    conn = objectstore.connect(remote_fname)
    bucket = conn.lookup(r_fname.bucket)
    if not bucket:
        bucket = conn.create_bucket(r_fname.bucket)
    key = bucket.get_key(r_fname.key, validate=False)
    if mditems is None:
        mditems = {}
    if "x-amz-server-side-encryption" not in mditems:
        mditems["x-amz-server-side-encryption"] = "AES256"
    for name, val in mditems.iteritems():
        key.set_metadata(name, val)
    key.set_contents_from_filename(fname, encrypt_key=True)
Exemple #9
0
def _upload_biodata(gbuild, target, all_dirs):
    """Upload biodata for a specific genome build and target to S3.
    """
    if target == "seq":
        want_dirs = set(["rnaseq", "seq", "variation", "vep", "snpeff"])
        target_dirs = [x for x in all_dirs if (x.startswith("rnaseq-") or x in want_dirs)]
    else:
        target_dirs = [x for x in all_dirs if x == target]
    target_dirs = [os.path.join(gbuild, x) for x in target_dirs]
    fname = objectstore.BIODATA_INFO["s3"].format(build=gbuild, target=target)
    remotef = objectstore.parse_remote(fname)
    conn = objectstore.connect(fname)
    bucket = conn.get_bucket(remotef.bucket)
    key = bucket.get_key(remotef.key)
    if not key:
        keyname = remotef.key
        bucketname = remotef.bucket
        target_dirs = " ".join(target_dirs)
        cmd = ("tar -cvpf - {target_dirs} | pigz -c | "
               "gof3r put --no-md5 -k {keyname} -b {bucketname} "
               "-m x-amz-storage-class:REDUCED_REDUNDANCY -m x-amz-acl:public-read")
        do.run(cmd.format(**locals()), "Upload pre-prepared genome data: %s %s" % (gbuild, target))