def name_to_config(template): """Read template file into a dictionary to use as base for all samples. Handles well-known template names, pulled from GitHub repository and local files. """ if objectstore.is_remote(template): with objectstore.open(template) as in_handle: config = yaml.load(in_handle) with objectstore.open(template) as in_handle: txt_config = in_handle.read() elif os.path.isfile(template): if template.endswith(".csv"): raise ValueError("Expected YAML file for template and found CSV, are arguments switched? %s" % template) with open(template) as in_handle: txt_config = in_handle.read() with open(template) as in_handle: config = yaml.load(in_handle) else: base_url = "https://raw.github.com/chapmanb/bcbio-nextgen/master/config/templates/%s.yaml" try: with contextlib.closing(urllib2.urlopen(base_url % template)) as in_handle: txt_config = in_handle.read() with contextlib.closing(urllib2.urlopen(base_url % template)) as in_handle: config = yaml.load(in_handle) except (urllib2.HTTPError, urllib2.URLError): raise ValueError("Could not find template '%s' locally or in standard templates on GitHub" % template) return config, txt_config
def load_s3(sample_config): """Move a sample configuration locally, providing remote upload. """ with objectstore.open(sample_config) as in_handle: config = yaml.load(in_handle) r_sample_config = objectstore.parse_remote(sample_config) config["upload"] = { "method": "s3", "dir": os.path.join(os.pardir, "final"), "bucket": r_sample_config.bucket, "folder": os.path.join(os.path.dirname(r_sample_config.key), "final") } region = r_sample_config.region or objectstore.default_region( sample_config) if region: config["upload"]["region"] = region if not os.access(os.pardir, os.W_OK | os.X_OK): raise IOError( "Cannot write to the parent directory of work directory %s\n" "bcbio wants to store prepared uploaded files to %s\n" "We recommend structuring your project in a project specific directory structure\n" "with a specific work directory (mkdir -p your-project/work && cd your-project/work)." % (os.getcwd(), os.path.join(os.pardir, "final"))) config = _add_jar_resources(config, sample_config) out_file = os.path.join( utils.safe_makedir(os.path.join(os.getcwd(), "config")), os.path.basename(r_sample_config.key)) with open(out_file, "w") as out_handle: yaml.dump(config, out_handle, default_flow_style=False, allow_unicode=False) return out_file
def load_s3(sample_config): """Move a sample configuration locally, providing remote upload. """ with objectstore.open(sample_config) as in_handle: config = yaml.safe_load(in_handle) r_sample_config = objectstore.parse_remote(sample_config) config["upload"] = {"method": "s3", "dir": os.path.join(os.pardir, "final"), "bucket": r_sample_config.bucket, "folder": os.path.join(os.path.dirname(r_sample_config.key), "final")} region = r_sample_config.region or objectstore.default_region(sample_config) if region: config["upload"]["region"] = region if not os.access(os.pardir, os.W_OK | os.X_OK): raise IOError("Cannot write to the parent directory of work directory %s\n" "bcbio wants to store prepared uploaded files to %s\n" "We recommend structuring your project in a project specific directory structure\n" "with a specific work directory (mkdir -p your-project/work && cd your-project/work)." % (os.getcwd(), os.path.join(os.pardir, "final"))) config = _add_jar_resources(config, sample_config) out_file = os.path.join(utils.safe_makedir(os.path.join(os.getcwd(), "config")), os.path.basename(r_sample_config.key)) with open(out_file, "w") as out_handle: yaml.dump(config, out_handle, default_flow_style=False, allow_unicode=False) return out_file
def open_fastq(in_file): """ open a fastq file, using gzip if it is gzipped """ if objectstore.is_remote(in_file): return objectstore.open(in_file) _, ext = os.path.splitext(in_file) if ext == ".gz": return gzip.open(in_file, 'rb') if ext in [".fastq", ".fq"]: return open(in_file, 'r')
def open_fastq(in_file): """ open a fastq file, using gzip if it is gzipped """ if objectstore.is_remote(in_file): return objectstore.open(in_file) _, ext = os.path.splitext(in_file) if ext == ".gz": return gzip.open(in_file, 'rb') if ext in [".fastq", ".fq"]: return open(in_file, 'r') # default to just opening it return open(in_file, "r")
def _pname_and_metadata(in_file): """Retrieve metadata and project name from the input metadata CSV file. Uses the input file name for the project name and For back compatibility, accepts the project name as an input, providing no metadata. """ if os.path.isfile(in_file): with open(in_file) as in_handle: md, global_vars = _parse_metadata(in_handle) base = os.path.splitext(os.path.basename(in_file))[0] elif objectstore.is_remote(in_file): with objectstore.open(in_file) as in_handle: md, global_vars = _parse_metadata(in_handle) base = os.path.splitext(os.path.basename(in_file))[0] else: base, md, global_vars = _safe_name(in_file), {}, {} return _safe_name(base), md, global_vars
def _pname_and_metadata(in_file): """Retrieve metadata and project name from the input metadata CSV file. Uses the input file name for the project name and for back compatibility, accepts the project name as an input, providing no metadata. """ if os.path.isfile(in_file): with open(in_file) as in_handle: md, global_vars = _parse_metadata(in_handle) base = os.path.splitext(os.path.basename(in_file))[0] md_file = in_file elif objectstore.is_remote(in_file): with objectstore.open(in_file) as in_handle: md, global_vars = _parse_metadata(in_handle) base = os.path.splitext(os.path.basename(in_file))[0] md_file = None else: if in_file.endswith(".csv"): raise ValueError("Did not find input metadata file: %s" % in_file) base, md, global_vars = _safe_name(os.path.splitext(os.path.basename(in_file))[0]), {}, {} md_file = None return _safe_name(base), md, global_vars, md_file