Ejemplo n.º 1
0
def name_to_config(template):
    """Read template file into a dictionary to use as base for all samples.

    Handles well-known template names, pulled from GitHub repository and local
    files.
    """
    if objectstore.is_remote(template):
        with objectstore.open(template) as in_handle:
            config = yaml.load(in_handle)
        with objectstore.open(template) as in_handle:
            txt_config = in_handle.read()
    elif os.path.isfile(template):
        if template.endswith(".csv"):
            raise ValueError("Expected YAML file for template and found CSV, are arguments switched? %s" % template)
        with open(template) as in_handle:
            txt_config = in_handle.read()
        with open(template) as in_handle:
            config = yaml.load(in_handle)
    else:
        base_url = "https://raw.github.com/chapmanb/bcbio-nextgen/master/config/templates/%s.yaml"
        try:
            with contextlib.closing(urllib2.urlopen(base_url % template)) as in_handle:
                txt_config = in_handle.read()
            with contextlib.closing(urllib2.urlopen(base_url % template)) as in_handle:
                config = yaml.load(in_handle)
        except (urllib2.HTTPError, urllib2.URLError):
            raise ValueError("Could not find template '%s' locally or in standard templates on GitHub"
                             % template)
    return config, txt_config
Ejemplo n.º 2
0
def load_s3(sample_config):
    """Move a sample configuration locally, providing remote upload.
    """
    with objectstore.open(sample_config) as in_handle:
        config = yaml.load(in_handle)
    r_sample_config = objectstore.parse_remote(sample_config)
    config["upload"] = {
        "method": "s3",
        "dir": os.path.join(os.pardir, "final"),
        "bucket": r_sample_config.bucket,
        "folder": os.path.join(os.path.dirname(r_sample_config.key), "final")
    }
    region = r_sample_config.region or objectstore.default_region(
        sample_config)
    if region:
        config["upload"]["region"] = region
    if not os.access(os.pardir, os.W_OK | os.X_OK):
        raise IOError(
            "Cannot write to the parent directory of work directory %s\n"
            "bcbio wants to store prepared uploaded files to %s\n"
            "We recommend structuring your project in a project specific directory structure\n"
            "with a specific work directory (mkdir -p your-project/work && cd your-project/work)."
            % (os.getcwd(), os.path.join(os.pardir, "final")))
    config = _add_jar_resources(config, sample_config)
    out_file = os.path.join(
        utils.safe_makedir(os.path.join(os.getcwd(), "config")),
        os.path.basename(r_sample_config.key))
    with open(out_file, "w") as out_handle:
        yaml.dump(config,
                  out_handle,
                  default_flow_style=False,
                  allow_unicode=False)
    return out_file
Ejemplo n.º 3
0
def load_s3(sample_config):
    """Move a sample configuration locally, providing remote upload.
    """
    with objectstore.open(sample_config) as in_handle:
        config = yaml.safe_load(in_handle)
    r_sample_config = objectstore.parse_remote(sample_config)
    config["upload"] = {"method": "s3",
                        "dir": os.path.join(os.pardir, "final"),
                        "bucket": r_sample_config.bucket,
                        "folder": os.path.join(os.path.dirname(r_sample_config.key), "final")}
    region = r_sample_config.region or objectstore.default_region(sample_config)
    if region:
        config["upload"]["region"] = region
    if not os.access(os.pardir, os.W_OK | os.X_OK):
        raise IOError("Cannot write to the parent directory of work directory %s\n"
                      "bcbio wants to store prepared uploaded files to %s\n"
                      "We recommend structuring your project in a project specific directory structure\n"
                      "with a specific work directory (mkdir -p your-project/work && cd your-project/work)."
                      % (os.getcwd(), os.path.join(os.pardir, "final")))
    config = _add_jar_resources(config, sample_config)
    out_file = os.path.join(utils.safe_makedir(os.path.join(os.getcwd(), "config")),
                            os.path.basename(r_sample_config.key))
    with open(out_file, "w") as out_handle:
        yaml.dump(config, out_handle, default_flow_style=False, allow_unicode=False)
    return out_file
Ejemplo n.º 4
0
def open_fastq(in_file):
    """ open a fastq file, using gzip if it is gzipped
    """
    if objectstore.is_remote(in_file):
        return objectstore.open(in_file)
    _, ext = os.path.splitext(in_file)
    if ext == ".gz":
        return gzip.open(in_file, 'rb')
    if ext in [".fastq", ".fq"]:
        return open(in_file, 'r')
Ejemplo n.º 5
0
def open_fastq(in_file):
    """ open a fastq file, using gzip if it is gzipped
    """
    if objectstore.is_remote(in_file):
        return objectstore.open(in_file)
    _, ext = os.path.splitext(in_file)
    if ext == ".gz":
        return gzip.open(in_file, 'rb')
    if ext in [".fastq", ".fq"]:
        return open(in_file, 'r')
    # default to just opening it
    return open(in_file, "r")
Ejemplo n.º 6
0
def _pname_and_metadata(in_file):
    """Retrieve metadata and project name from the input metadata CSV file.

    Uses the input file name for the project name and

    For back compatibility, accepts the project name as an input, providing no metadata.
    """
    if os.path.isfile(in_file):
        with open(in_file) as in_handle:
            md, global_vars = _parse_metadata(in_handle)
        base = os.path.splitext(os.path.basename(in_file))[0]
    elif objectstore.is_remote(in_file):
        with objectstore.open(in_file) as in_handle:
            md, global_vars = _parse_metadata(in_handle)
        base = os.path.splitext(os.path.basename(in_file))[0]
    else:
        base, md, global_vars = _safe_name(in_file), {}, {}
    return _safe_name(base), md, global_vars
Ejemplo n.º 7
0
def _pname_and_metadata(in_file):
    """Retrieve metadata and project name from the input metadata CSV file.

    Uses the input file name for the project name and for back compatibility,
    accepts the project name as an input, providing no metadata.
    """
    if os.path.isfile(in_file):
        with open(in_file) as in_handle:
            md, global_vars = _parse_metadata(in_handle)
        base = os.path.splitext(os.path.basename(in_file))[0]
        md_file = in_file
    elif objectstore.is_remote(in_file):
        with objectstore.open(in_file) as in_handle:
            md, global_vars = _parse_metadata(in_handle)
        base = os.path.splitext(os.path.basename(in_file))[0]
        md_file = None
    else:
        if in_file.endswith(".csv"):
            raise ValueError("Did not find input metadata file: %s" % in_file)
        base, md, global_vars = _safe_name(os.path.splitext(os.path.basename(in_file))[0]), {}, {}
        md_file = None
    return _safe_name(base), md, global_vars, md_file