def __init__( self, pipeline=None, release=None, outdir=None, compress_type=None, force=False, container=None, singularity_cache_only=False, parallel_downloads=4, ): self.pipeline = pipeline self.release = release self.outdir = outdir self.output_filename = None self.compress_type = compress_type self.force = force self.container = container self.singularity_cache_only = singularity_cache_only self.parallel_downloads = parallel_downloads self.wf_releases = {} self.wf_branches = {} self.wf_sha = None self.wf_download_url = None self.nf_config = dict() self.containers = list() # Fetch remote workflows self.wfs = nf_core.list.Workflows() self.wfs.get_remote_workflows()
def __init__(self, pipeline, release=None, singularity=False, outdir=None): self.pipeline = pipeline self.release = release self.singularity = singularity self.outdir = outdir self.wf_name = None self.wf_sha = None self.wf_download_url = None self.config = dict() self.containers = list()
def __init__(self, pipeline, release=None, singularity=False, outdir=None, compress_type='tar.gz'): self.pipeline = pipeline self.release = release self.singularity = singularity self.outdir = outdir self.output_filename = None self.compress_type = compress_type if self.compress_type == 'none': self.compress_type = None self.wf_name = None self.wf_sha = None self.wf_download_url = None self.config = dict() self.containers = list()
def validate_schema(self, schema=None): """ Check that the Schema is valid Returns: Number of parameters found """ if schema is None: schema = self.schema try: jsonschema.Draft7Validator.check_schema(schema) log.debug("JSON Schema Draft7 validated") except jsonschema.exceptions.SchemaError as e: raise AssertionError( "Schema does not validate as Draft 7 JSON Schema:\n {}".format( e)) param_keys = list(schema.get("properties", {}).keys()) num_params = len(param_keys) for d_key, d_schema in schema.get("definitions", {}).items(): # Check that this definition is mentioned in allOf assert "allOf" in schema, "Schema has definitions, but no allOf key" in_allOf = False for allOf in schema["allOf"]: if allOf["$ref"] == "#/definitions/{}".format(d_key): in_allOf = True assert in_allOf, "Definition subschema `{}` not included in schema `allOf`".format( d_key) for d_param_id in d_schema.get("properties", {}): # Check that we don't have any duplicate parameter IDs in different definitions assert d_param_id not in param_keys, "Duplicate parameter found in schema `definitions`: `{}`".format( d_param_id) param_keys.append(d_param_id) num_params += 1 # Check that everything in allOf exists for allOf in schema.get("allOf", []): assert "definitions" in schema, "Schema has allOf, but no definitions" def_key = allOf["$ref"][14:] assert def_key in schema[ "definitions"], "Subschema `{}` found in `allOf` but not `definitions`".format( def_key) # Check that the schema describes at least one parameter assert num_params > 0, "No parameters found in schema" return num_params
def find_container_images(self): """Find container image names for workflow. Starts by using `nextflow config` to pull out any process.container declarations. This works for DSL1. Second, we look for DSL2 containers. These can't be found with `nextflow config` at the time of writing, so we scrape the pipeline files. """ log.debug("Fetching container names for workflow") containers_raw = [] # Use linting code to parse the pipeline nextflow config self.nf_config = nf_core.utils.fetch_wf_config( os.path.join(self.outdir, "workflow")) # Find any config variables that look like a container for k, v in self.nf_config.items(): if k.startswith("process.") and k.endswith(".container"): containers_raw.append(v.strip('"').strip("'")) # Recursive search through any DSL2 module files for container spec lines. for subdir, dirs, files in os.walk( os.path.join(self.outdir, "workflow", "modules")): for file in files: if file.endswith(".nf"): with open(os.path.join(subdir, file), "r") as fh: # Look for any lines with `container = "xxx"` matches = [] for line in fh: match = re.match( r"\s*container\s+[\"']([^\"']+)[\"']", line) if match: matches.append(match.group(1)) # If we have matches, save the first one that starts with http for m in matches: if m.startswith("http"): containers_raw.append(m.strip('"').strip("'")) break # If we get here then we didn't call break - just save the first match else: if len(matches) > 0: containers_raw.append( matches[0].strip('"').strip("'")) # Remove duplicates and sort containers_raw = sorted(list(set(containers_raw))) # Strip any container names that have dynamic names - eg. {params.foo} self.containers = [] for container in containers_raw: if "{" in container and "}" in container: log.error( f"[red]Container name [green]'{container}'[/] has dynamic Nextflow logic in name - skipping![/]" ) log.info( "Please use a 'nextflow run' command to fetch this container. Ask on Slack if you need help." ) else: self.containers.append(container) log.info("Found {} container{}".format( len(self.containers), "s" if len(self.containers) > 1 else ""))
def find_container_images(self): """Find container image names for workflow. Starts by using `nextflow config` to pull out any process.container declarations. This works for DSL1. It should return a simple string with resolved logic. Second, we look for DSL2 containers. These can't be found with `nextflow config` at the time of writing, so we scrape the pipeline files. This returns raw source code that will likely need to be cleaned. If multiple containers are found, prioritise any prefixed with http for direct download. Example syntax: Early DSL2: if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { container "https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0" } else { container "quay.io/biocontainers/fastqc:0.11.9--0" } Later DSL2: container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : 'quay.io/biocontainers/fastqc:0.11.9--0' }" DSL1 / Special case DSL2: container "nfcore/cellranger:6.0.2" """ log.debug("Fetching container names for workflow") containers_raw = [] # Use linting code to parse the pipeline nextflow config self.nf_config = nf_core.utils.fetch_wf_config( os.path.join(self.outdir, "workflow")) # Find any config variables that look like a container for k, v in self.nf_config.items(): if k.startswith("process.") and k.endswith(".container"): containers_raw.append(v.strip('"').strip("'")) # Recursive search through any DSL2 module files for container spec lines. for subdir, dirs, files in os.walk( os.path.join(self.outdir, "workflow", "modules")): for file in files: if file.endswith(".nf"): with open(os.path.join(subdir, file), "r") as fh: # Look for any lines with `container = "xxx"` this_container = None contents = fh.read() matches = re.findall(r"container\s*\"([^\"]*)\"", contents, re.S) if matches: for match in matches: # Look for a http download URL. # Thanks Stack Overflow for the regex: https://stackoverflow.com/a/3809435/713980 url_regex = r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)" url_match = re.search(url_regex, match, re.S) if url_match: this_container = url_match.group(0) break # Prioritise http, exit loop as soon as we find it # No https download, is the entire container string a docker URI? else: # Thanks Stack Overflow for the regex: https://stackoverflow.com/a/39672069/713980 docker_regex = r"^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(?<!-)(?:\.(?!-)[a-zA-Z0-9-]{1,63}(?<!-))*(?::[0-9]{1,5})?/)?((?![._-])(?:[a-z0-9._-]*)(?<![._-])(?:/(?![._-])[a-z0-9._-]*(?<![._-]))*)(?::(?![.-])[a-zA-Z0-9_.-]{1,128})?$" docker_match = re.match( docker_regex, match.strip(), re.S) if docker_match: this_container = docker_match.group(0) # Don't recognise this, throw a warning else: log.error( f"[red]Cannot parse container string, skipping: [green]{match}" ) if this_container: containers_raw.append(this_container) # Remove duplicates and sort self.containers = sorted(list(set(containers_raw))) log.info("Found {} container{}".format( len(self.containers), "s" if len(self.containers) > 1 else ""))