Ejemplo n.º 1
0
class Main(Step):
    """From multi-lane FASTQ files to merged duplicate-marked BAMs,
    with conditional execution depending on QC metrics.
    
    To run locally, change into project directory and type:
    
    $ python -m sampleqc run --manifest_filename manifest.tsv --project_name my_project
    """

    manifest_filename = Input(str)
    project_name = Input(str)
    qc_summary = Output(File)

    def execute(self):
        "Main execution method. Execution starts here."

        # setup execution project, stage apps, ref files
        Context().initialize(project_name=self.project_name)

        # parse manifest into cohort, import fastq files, set metadata
        cohort = load_manifest(self.manifest_filename)

        # process samples in loop
        # note: processing happens in parallel due to use of promises
        processed_bams = [
            ProcessSample(fastqs=s.fastqs, name_=s.id).processed_bam
            for s in cohort.samples
        ]

        # collect BAM QC metrics and upload summary file
        self.qc_summary = CollectAndUploadQCSummary(
            processed_bams=processed_bams).uploaded_file
Ejemplo n.º 2
0
class WESsomatic(AppStep):
    case_id = Input(str)
    tumor_bam = Input(File)
    normal_bam = Input(File)

    annotated_mutect_variants = Output(File)

    def execute(self):
        ctx = Context()
        self.run_task(
            app_name="wes_somatic",
            inputs={
                "tumor_reads": self.tumor_bam,
                "normal_reads": self.normal_bam,
                "target_bed": ctx.refs["v5_core"],
                "kgsnp_database": ctx.refs["g1k_snps"],
                "kgindel_database": ctx.refs["g1k_indels"],
                "mgindel_database": ctx.refs["hc_indels"],
                "snpEff_database": ctx.refs["snpeff"],
                "cosmic_database": ctx.refs["cosmic"],
                "cache_file": ctx.refs["vep"],
                "annotation_reference": ctx.refs["grch37_fasta"],
                "ExAC_database": ctx.refs["exac"],
                "input_tar_with_reference": ctx.refs["hg19_fasta"],
                "dbSNP": ctx.refs["dbsnp_138"],
            },
            task_name="WESsomatic-" + self.case_id,
        )

        self.annotated_mutect_variants = self.task.outputs[
            "annotated_mutect_variants"]
Ejemplo n.º 3
0
class Main(Step):
    """Example automation that imports FASTq files from volume, aligns 
    FASTq files with BWA, and exports resulting BAM files back to volume. 
    Inputs of the automation are name of the SB project, SB volume ID, 
    and volume source and destination directories. 
    
    To run this automation on your computer, type the following command
    while inside the project root directory:
    
      python -m app run --project_name <project-name> [--volume_id <volume-id> --src_dir <source-directory> --dest_dir <destination-directory>]
    """

    project_name = Input(str, description="Name of SB execution project")
    volume_id = Input(
        str,
        description="ID of volume for file import and export",
        default="external-demos/volumes_api_demo_bucket",
    )
    src_dir = Input(
        str,
        description="Source directory on volume containing input FASTq files",
        default="",
    )
    dest_dir = Input(
        str,
        description=
        "Target directory on volume where outputs will be exported to",
        default="automation/import-run-export/result",
    )

    def execute(self):
        "Execution starts here."

        # initialize automation context with execution project and volume
        ctx = Context().initialize(self.project_name, self.volume_id)

        # stage input FASTq files and group them into samples
        samples = ImportFiles(src_dir=self.src_dir).samples

        # run BWA for each sample; samples are processed in parallel
        # because app outputs are promises and we can access them
        # before output values become available (lazy evaluation)
        bams = []
        for sample in samples:
            bwa = BWAmem(f"BWAmem-{sample.sample_id}",
                         input_reads=sample.fastq_files)
            bams.append(bwa.aligned_reads)

        # export all BAM files to volume; export step starts executing
        # as soon as all BAM files have become available
        ExportFiles(files=bams,
                    to_volume=ctx.volume,
                    prefix=self.dest_dir,
                    overwrite=True)
Ejemplo n.º 4
0
class ProcessBam(Step):
    """Processes single BAM file with execution conditioned on
    automation setting (static conditional) and alignment QC
    metric (dynamic conditional)."""

    input_bam = Input(File)
    processed_bam = Output(File)
    qc_metrics = Output(QCMetrics)

    def execute(self):

        # compute alignment quality metrics and provide on output
        picard = PicardAlignmentSummaryMetrics(input_bam=self.input_bam)
        self.qc_metrics = picard.qc_metrics

        # if mark duplicates is not required return input BAM;
        # note: static conditional that does not cause exeuction block
        if self.config_.skip_duplicate_marking:
            self.processed_bam = self.input_bam
            return

        # if BAM failed QC do not process further and return input BAM;
        # note: dynamic conditional that blocks this thread until QC
        # metrics finished computing
        if not bam_qc_metrics_ok(self.qc_metrics, self.config_):
            logging.info(f"Sample failed QC: {self.input_bam.name}")
            self.processed_bam = self.input_bam
            return

        # mark duplicates and return de-duped BAM as result
        self.processed_bam = PicardMarkDuplicates(input_bam=self.input_bam).deduped_bam
Ejemplo n.º 5
0
class ProcessSample(Step):
    "Processes a single sample"

    fastqs = Input(List[File])
    aligned_bam = Output(File)
    processed_bam = Output(File)
    bam_qc_metrics = Output(QCMetrics)

    def execute(self):

        # run trimgalore on all lanes
        tg = Trimgalore(reads=self.fastqs, paired=True, fastqc=True)

        # only keep fastq pairs that meet quality cutoff
        filter = FilterFastq(input_fastq=tg.trimmed_reads)

        # run BWA on remaining lanes and provide BAM on output;
        # immediately unblocks other steps waiting for BAM output,
        # even before this execute() function finishes
        self.aligned_bam = BWAmem(fastqs=filter.pass_fastq).merged_bam

        # process BAM with conditional execution inside
        process_bam = ProcessBam(input_bam=self.aligned_bam)

        # return processed BAM and BAM QC metrics as output
        self.processed_bam = process_bam.processed_bam
        self.bam_qc_metrics = process_bam.qc_metrics
Ejemplo n.º 6
0
class Trimgalore(AppStep):
    reads = Input(List[File])
    paired = Input(bool)
    fastqc = Input(bool)
    trimmed_reads = Output(List[File])

    def execute(self):
        self.run_task(
            app_name="trimgalore",
            inputs={
                "reads": self.reads,
                "paired": self.paired,
                "fastqc": self.fastqc
            },
            task_name="Trimgalore-" + self.reads[0].metadata["sample_id"],
        )

        self.trimmed_reads = self.task.outputs["trimmed_reads"]
Ejemplo n.º 7
0
class FilterFastq(Step):
    "Filters out FASTq files not meeting QC criteria"

    input_fastq = Input(List[File])
    pass_fastq = Output(List[File])

    def execute(self):
        self.pass_fastq = [
            fq for fq in self.input_fastq if fq.size > self.config_.qc.min_fastq_size
        ]
Ejemplo n.º 8
0
class BAMprep(AppStep):
    sample_id = Input(str)
    fastq_files = Input(List[File])

    dedup_metrics = Output(File)
    recal_table = Output(File)
    alignment_metrics = Output(File)
    hs_metrics = Output(File)
    per_target_coverage = Output(File)
    output_bam = Output(File)
    median_target_coverage = Output(int)

    def execute(self):
        ctx = Context()
        self.run_task(
            app_name="bam_prep",
            inputs={
                "input_reads": self.fastq_files,
                "input_tar_with_reference": ctx.refs["hg19_fasta"],
                "bait_bed": ctx.refs["sureselect_xt"],
                "target_bed": ctx.refs["v5_core"],
                "kgsnp_database": ctx.refs["g1k_snps"],
                "mgindel_database": ctx.refs["hc_indels"],
            },
            task_name="BAMprep-" + self.sample_id,
        )

        self.dedup_metrics = self.task.outputs["dedup_metrics"]
        self.recal_table = self.task.outputs["recal_table"]
        self.alignment_metrics = self.task.outputs["alignment_metrics"]
        self.hs_metrics = self.task.outputs["hs_metrics"]
        self.per_target_coverage = self.task.outputs["per_target_coverage"]
        self.output_bam = self.task.outputs["output_bam"]
        self.hs_metrics = self.task.outputs["hs_metrics"]
        self.median_target_coverage = self.get_median_target_coverage(
            self.hs_metrics)

    def get_median_target_coverage(self, file):
        "Parses median target coverage from hs metrics file"

        for line in file.content().split("\n"):
            if line.startswith("SureSelect"):
                return int(line.strip().split("\t")[23])
Ejemplo n.º 9
0
class Main(Step):
    file_name = Input(str)

    def execute(self):
        with open(str(self.file_name), "r") as f:
            counts = [
                WordCounter(f"counter{idx}", line=line).count
                for idx, line in enumerate(f)
            ]

        logging.info(f"Found {sum(counts)} words.")
Ejemplo n.º 10
0
class ProcessSample(Step):
    "Processes a single sample"

    fastqs = Input(List[File])
    processed_bam = Output(ProcessedBam)

    def execute(self):
        tg = Trimgalore(reads=self.fastqs, paired=True, fastqc=True)
        filter = FilterFastq(input_fastq=tg.trimmed_reads)
        aligned_bam = BWAmem(fastqs=filter.pass_fastq).merged_bam
        self.processed_bam = ProcessBam(input_bam=aligned_bam).processed_bam
Ejemplo n.º 11
0
class ImportFilesAndGroupBySample(Step):
    """Finds FASTq files on volume, imports them into project, sets file 
    metadata, and returns updated files grouped by sample"""

    src_dir = Input(VolumeFolder)
    samples = Output(List[Sample])

    def execute(self):
        imported_files = self.import_files_from_volume()
        updated_files = self.update_file_metadata(imported_files)
        self.samples = self.group_files_by_sample(updated_files)

    def import_files_from_volume(self):
        "Imports all fastq files found at volume source location"

        volume = SBApi().volumes.get(self.src_dir.volume_id)

        fastq_paths = [
            l.location for l in volume.list(prefix=self.src_dir.prefix)
            if "TCRBOA7" in l.location and l.location.endswith(".fastq")
        ]

        return FindOrImportFiles(
            filepaths=fastq_paths,
            from_volume=volume,
            to_project=Context().project,
        ).imported_files

    def update_file_metadata(self, files):
        """Sets file metadata in bulk for list of files based on file names.
        Setting metadata in bulk instead of per-file reduces API calls.
        Example filename: TCRBOA7-N-WEX-TEST.read1.fastq"""

        metadata = []
        for file in files:
            sample_id = file.name.split("-WEX")[0]
            paired_end = file.name.split("read")[1].split(".")[0]
            metadata.append({"sample_id": sample_id, "paired_end": paired_end})

        return SetMetadataBulk(to_files=files, metadata=metadata).updated_files

    def group_files_by_sample(self, files):
        """Groups files into list of sample objects for easier downstream
        processing."""

        samples = {}
        for file in files:
            sample_id = file.metadata["sample_id"]
            if sample_id not in samples:
                samples[sample_id] = Sample(sample_id)
            samples[sample_id].fastq_files.append(file)

        return list(samples.values())
Ejemplo n.º 12
0
class PicardMarkDuplicates(AppStep):
    input_bam = Input(File)
    deduped_bam = Output(File)

    def execute(self):
        self.run_task(
            app_name="markdup",
            inputs={"input_bam": [self.input_bam]},
            task_name="MarkDup-" + self.input_bam.metadata["sample_id"],
        )

        self.deduped_bam = self.task.outputs["deduped_bam"]
Ejemplo n.º 13
0
class MultiQC(AppStep):
    input_files = Input(List[File])
    config_files = Input(Optional[List[File]])
    sample_names = Input(Optional[File])

    html_report = Output(File)
    pdf_report = Output(File)

    def execute(self):
        ctx = Context()
        self.run_task(
            app_name="multi_qc",
            inputs={
                "in_reports": self.input_files,
                "config": self.config_files,
                "sample_names": self.sample_names,
                "pdf": True,
            },
            task_name="MultiQC",
        )

        self.html_report = self.task.outputs["out_html"]
        self.pdf_report = self.task.outputs["out_pdf"]
Ejemplo n.º 14
0
class Main(Step):
    """From multi-lane FASTQ files to merged duplicate-marked BAMs,
    with conditional execution depending on QC metrics.
    
    To run locally, change into project directory and type:
    
    $ python -m sampleqc run --manifest_filename manifest.tsv --project_name my_project
    """

    manifest_filename = Input(str)
    project_name = Input(str)
    qc_summary = Output(File)

    def execute(self):
        "Main execution method. Execution starts here."

        # setup execution project, stage apps, ref files
        Context().initialize(project_name=self.project_name)

        # parse manifest into cohort, import fastq files, set metadata
        cohort = load_manifest(self.manifest_filename)

        for sample in cohort.samples:

            # process sample in seprate step
            # step must be named explicitly b/c of loop
            ps = ProcessSample(f"Process-{sample.id}", fastqs=sample.fastqs)

            # collect results for downstream aggregation steps
            sample.aligned_bam = ps.aligned_bam
            sample.bam_qc_metrics = ps.bam_qc_metrics

        # upload QC metrics summary file to SB platform and 
        # provide uploaded file on output
        self.qc_summary = CollectAndUploadQCSummary(
            qc_metrics=[s.bam_qc_metrics for s in cohort.samples]
        ).uploaded_file
Ejemplo n.º 15
0
class CollectAndUploadQCSummary(Step):
    """Collects BAM QC metrics from all processed samples and uploads
    summary file in tab-separated format to SB project. Overwrites 
    existing file. Returns uploaded file object."""

    qc_metrics = Input(List[QCMetrics])
    uploaded_file = Output(File)

    def execute(self):

        # NOTE: don't create a transient temporary file (not thread safe)
        # because actual upload happens in another thread
        temp_filename = tempfile.gettempdir() + "/bam_qc_metrics.tsv"
        temp = open(temp_filename, "wt")

        # write header
        temp.write(
            "\t".join(
                [
                    "sample_id",
                    "bam_file",
                    "pct_pf_reads_aligned",
                    "strand_balance",
                    "status",
                ]
            )
            + "\n"
        )

        # write content
        for qc in self.qc_metrics:
            temp.write(
                "\t".join(
                    [
                        qc.bam_file.metadata["sample_id"],
                        qc.bam_file.name,
                        str(qc.pct_pf_reads_aligned),
                        str(qc.strand_balance),
                        "PASS" if bam_qc_metrics_ok(qc, self.config_) else "FAIL",
                    ]
                )
                + "\n"
            )
        temp.close()

        # upload file to platform (overwrites existing file)
        self.uploaded_file = UploadFile(
            local_path=temp.name, to_project=Context().project, overwrite=True
        ).file
Ejemplo n.º 16
0
class BWAmem(AppStep):
    fastqs = Input(List[File])
    merged_bam = Output(File)

    def execute(self):
        ctx = Context()
        self.run_task(
            app_name="bwa",
            inputs={
                "FASTQ": self.fastqs,
                "Input_reference": ctx.refs["bwa_bundle"]
            },
            task_name="BWAmem-" + self.fastqs[0].metadata["sample_id"],
        )

        self.merged_bam = self.task.outputs["merged_bam"]
Ejemplo n.º 17
0
class ProcessBam(Step):
    """Processes single BAM file including alignment QC.
    If mark duplicates is not required (static conditional) or
    BAM failed alignment QC (dynamic conditional), returns input BAM
    without further processing. Otherwise, runs deduplication and
    return deduplicated BAM"""

    input_bam = Input(File)
    processed_bam = Output(ProcessedBam)

    def execute(self):

        asm = PicardAlignmentSummaryMetrics(input_bam=self.input_bam)
        qc_failed = not bam_qc_metrics_ok(asm.qc_metrics, self.config_)

        if self.config_.skip_duplicate_marking or qc_failed:
            self.processed_bam = ProcessedBam(self.input_bam, asm.qc_metrics)
        else:
            md = PicardMarkDuplicates(input_bam=self.input_bam)
            self.processed_bam = ProcessedBam(md.deduped_bam, asm.qc_metrics)
Ejemplo n.º 18
0
class BWAmem(Step):
    """App wrapper step that runs BWA-MEM on SB platform. 
    Names task after sample ID metadata."""

    input_reads = Input(List[File])
    aligned_reads = Output(File)

    def execute(self):
        ctx = Context()
        task = FindOrCreateAndRunTask(
            new_name="BWAmem - " + self.input_reads[0].metadata["sample_id"] +
            " - " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            inputs={
                "input_reads": self.input_reads,
                "reference_index_tar": ctx.refs["bwa_bundle"],
            },
            app=ctx.apps["bwa"],
            in_project=ctx.project,
        ).finished_task

        self.aligned_reads = task.outputs["aligned_reads"]
Ejemplo n.º 19
0
class PicardAlignmentSummaryMetrics(AppStep):
    input_bam = Input(File)

    summary_metrics_file = Output(File)
    qc_metrics = Output(QCMetrics)

    def execute(self):
        ctx = Context()
        self.run_task(
            app_name="alignmentqc",
            inputs={
                "input_bam": self.input_bam,
                "reference": ctx.refs["reference_fasta"],
            },
            task_name="AlignmentQC-" + self.input_bam.metadata["sample_id"],
        )

        self.summary_metrics_file = self.task.outputs["summary_metrics"]
        self.qc_metrics = self.parse_qc_from_metrics_file()

        logging.info(
            f"pct_pf_reads_aligned: {self.qc_metrics.pct_pf_reads_aligned}")
        logging.info(f"strand balance: {self.qc_metrics.strand_balance}")

    def parse_qc_from_metrics_file(self):
        "reads QC metrics from picard output file into QC object"

        for s in self.summary_metrics_file.stream():
            for line in s.decode("utf-8").split("\n"):
                if not line.startswith("PAIR"):
                    continue
                record = line.strip().split("\t")

                return QCMetrics(
                    bam_file=self.input_bam,
                    pct_pf_reads_aligned=float(record[6]),
                    strand_balance=float(record[19]),
                )
Ejemplo n.º 20
0
def generate_cwl_step(
    app, project, execute_method=run_task, import_suggested_files=True
):
    """
    Generates a Step object with Input and Output ports named the same
    as the given CWL app
    :param app: App to wrap
    :param project: Project where the app resides
    :param execute_method: Execute method to be used in this step.
    :param import_suggested_files: Import suggested files in the project
    :return: Freyja Step
    """
    input_dict = {}
    outp_dict = {}

    if isinstance(app, sb.App):
        pass
    elif isinstance(app, str):
        app = FindOrCopyApp(app_id=app, to_project=project, name_=f"Copy {app}").app

    suggested_values = get_suggested_values(app)

    for key in suggested_values:
        if isinstance(suggested_values[key], list):
            if isinstance(suggested_values[key][0], sb.File):
                if import_suggested_files:
                    suggested_values[key] = FindOrCopyFiles(
                        "Copying suggested file {} for {}".format(key, app.id),
                        files=suggested_values[key],
                        to_project=project,
                    ).copied_files
                else:
                    suggested_values[key] = None
        elif isinstance(suggested_values[key], sb.File):
            if import_suggested_files:
                suggested_values[key] = FindOrCopyFiles(
                    "Copying suggested file {} for {}".format(key, app.id),
                    files=[suggested_values[key]],
                    to_project=project,
                ).copied_files[0]
            else:
                suggested_values[key] = None
    cwl_version = app.raw["cwlVersion"]
    if cwl_version == "v1.0":
        for inp in app.raw["inputs"]:
            inp_id = inp["id"]
            inp_type = inp["type"]
            value = None
            if inp_id in suggested_values:
                value = suggested_values[inp_id]
            if isinstance(inp_type, str) or isinstance(inp_type, dict):
                if inp_type in CWLTypes.File:
                    input_dict[inp_id] = Input(Optional[sb.File], default=value)
                elif inp_type in CWLTypes.Array:
                    input_dict[inp_id] = Input(Optional[list], default=value)
                elif inp_type in CWLTypes.String:
                    input_dict[inp_id] = Input(Optional[str], default=value)
                elif inp_type in CWLTypes.Int:
                    input_dict[inp_id] = Input(Optional[int], default=value)
                elif inp_type in CWLTypes.Bool:
                    input_dict[inp_id] = Input(Optional[bool], default=value)
                elif inp_type in CWLTypes.Float:
                    input_dict[inp_id] = Input(Optional[float], default=value)
            elif isinstance(inp_type, list):
                if inp_type[1] in CWLTypes.Array:
                    input_dict[inp_id] = Input(Optional[list], default=value)
                elif inp_type[1]["type"] == "enum":
                    input_dict[inp_id] = Input(Optional[str], default=value)

            else:
                if inp["type"][1]["type"] == "enum":
                    input_dict[inp_id] = Input(Optional[str], default=value)

        for outp in app.raw["outputs"]:
            outp_id = outp["id"]
            outp_type = get_type(outp_id, app.raw)
            if outp_type in CWLTypes.File:
                outp_dict[outp_id] = Output(Optional[sb.File])
            elif outp_type in CWLTypes.Array:
                outp_dict[outp_id] = Output(Optional[list])
            elif outp_type in CWLTypes.String:
                outp_dict[outp_id] = Output(Optional[str])
            elif outp_type in CWLTypes.Int:
                outp_dict[outp_id] = Output(Optional[int])
            elif outp_type in CWLTypes.Bool:
                outp_dict[outp_id] = Output(Optional[bool])
            elif outp_type in CWLTypes.Float:
                outp_dict[outp_id] = Output(Optional[float])

    elif cwl_version == "sbg:draft-2":
        for inp in app.raw["inputs"]:
            inp_id = inp["id"][1:]
            inp_type = [t for t in inp["type"] if t != "null"][0]
            value = None
            if inp_id in suggested_values:
                value = suggested_values[inp_id]
            if inp_type in CWLDraft2Types.File:
                input_dict[inp_id] = Input(Optional[sb.File], default=value)
            elif inp_type in CWLDraft2Types.Array:
                input_dict[inp_id] = Input(Optional[list], default=value)
            elif inp_type in CWLDraft2Types.String:
                input_dict[inp_id] = Input(Optional[str], default=value)
            elif inp_type in CWLDraft2Types.Int:
                input_dict[inp_id] = Input(Optional[int], default=value)
            elif inp_type in CWLDraft2Types.Bool:
                input_dict[inp_id] = Input(Optional[bool], default=value)
            elif inp_type in CWLDraft2Types.Float:
                input_dict[inp_id] = Input(Optional[float], default=value)
            elif "type" in inp_type:
                if inp_type["type"] == "enum":
                    input_dict[inp_id] = Input(Optional[str], default=value)
                elif inp_type["type"] == "array":
                    input_dict[inp_id] = Input(Optional[list], default=value)
                elif inp_type["type"] == "record":
                    input_dict[inp_id] = Input(Optional[dict], default=value)

        for outp in app.raw["outputs"]:
            outp_id = outp["id"][1:]
            outp_type = get_type(outp["id"], app.raw)
            if outp_type in CWLDraft2Types.File:
                outp_dict[outp_id] = Output(Optional[sb.File])
            elif outp_type in CWLDraft2Types.Array:
                outp_dict[outp_id] = Output(Optional[list])
            elif outp_type in CWLDraft2Types.String:
                outp_dict[outp_id] = Output(Optional[str])
            elif outp_type in CWLDraft2Types.Int:
                outp_dict[outp_id] = Output(Optional[int])
            elif outp_type in CWLDraft2Types.Bool:
                outp_dict[outp_id] = Output(Optional[bool])
            elif outp_type in CWLDraft2Types.Float:
                outp_dict[outp_id] = Output(Optional[float])
            elif "type" in outp_type:
                if outp_type["type"] == "enum":
                    outp_dict[outp_id] = Output(Optional[str])
                if outp_type["type"] == "array":
                    outp_dict[outp_id] = Output(Optional[list])
    else:
        logger.error(f"CWL version not recognised: {cwl_version}")
        raise Exception(f"CWL version not recognised: {cwl_version}")

    input_dict["app_"] = Input(sb.App, default=app)
    input_dict["project_"] = Input(sb.Project, default=project)
    subt = Step.new(
        inputs=input_dict, outputs=outp_dict, execute=execute_method, cls_name="RunApp"
    )
    return subt
Ejemplo n.º 21
0
class WordCounter(Step):
    line = Input(str)
    count = Output(int)

    def execute(self):
        self.count = len(self.line.strip().split())
Ejemplo n.º 22
0
class Main(Step):
    """Imports FASTq files from a cloud bucket, aligns them with BWA, and exports 
    resulting BAM files back to a cloud bucket location. 
    
    To run this automation from your local computer, type the following command
    while inside the project root directory:
    
      python -m app run --project_name <project-name> [--src_dir <location>] [--dest_dir <location>]
      
    whereas <location> refers to a cloud bucket directory in format <sb-volume-id>:<bucket-prefix>.
    If not provided, location defaults as specified in the automation code are used.
    
    """

    project_name = Input(
        str,
        name="Project name",
        description=
        "Name of platform project. Re-uses existing project if found, otherwise create new one.",
    )
    src_dir = Input(
        VolumeFolder,
        name="Input folder",
        description="Cloud bucket location containing input FASTq files.",
        default="external-demos/volumes_api_demo_bucket:inputs",
    )
    dest_dir = Input(
        VolumeFolder,
        name="Output folder",
        description=
        "Cloud bucket location for result files. Overwrites already existing files.",
        default=
        "external-demos/volumes_api_demo_bucket:automation/import-run-export/result",
    )

    project = Output(
        Project,
        name="Analysis project",
        description="SB project in which processing took place.",
    )
    bams = Output(
        List[File],
        name="BAM files",
        description="BAM files containing aligned reads.",
    )

    def execute(self):
        "Execution starts here."

        # initialize context singleton used througout the automation
        ctx = Context().initialize(self.project_name)

        # stage input FASTq files, set file metadata, and group by samples
        samples = ImportFilesAndGroupBySample(src_dir=self.src_dir).samples

        # run BWA for each sample; samples are processed in parallel
        # because app outputs are promises and we can use them
        # before results are available (lazy evaluation)
        self.bams = [
            BWAmem(
                f"BWAmem-{sample.sample_id}",  # name the step (must be unique)
                input_reads=sample.fastq_files).aligned_reads
            for sample in samples
        ]

        # export all BAM files to volume; export step starts executing
        # as soon as all BAM files have become available
        export_volume = SBApi().volumes.get(self.dest_dir.volume_id)
        ExportFiles(
            files=self.bams,
            to_volume=export_volume,
            prefix=self.dest_dir.prefix,
            overwrite=True,
        )

        # capture analysis project as output
        self.project = ctx.project