Esempio n. 1
0
    def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url,
                 provenance):
        self.scratch_dir = scratch_dir
        self.workspace_url = workspace_url
        self.callback_url = callback_url
        self.srv_wiz_url = srv_wiz_url
        self.provenance = provenance

        # from the provenance, extract out the version to run by exact hash if possible
        self.my_version = 'release'
        if len(provenance) > 0:
            if 'subactions' in provenance[0]:
                self.my_version = self.get_version_from_subactions(
                    'kb_Bwa', provenance[0]['subactions'])
        print('Running kb_Bwa version = ' + self.my_version)

        self.ws = Workspace(self.workspace_url)
        self.bwa = BwaRunner(self.scratch_dir)
        self.parallel_runner = KBParallel(self.callback_url)
        self.qualimap = kb_QualiMap(self.callback_url)
Esempio n. 2
0
    def run_batch(self, reads_refs, params):
        """
        Runs HISAT2 in batch mode.
        reads_refs should be a list of dicts, where each looks like the following:
        {
            "ref": reads object reference,
            "condition": condition for that ref (string)
        }
        """
        # build task list and send it to KBParallel
        tasks = list()
        set_name = get_object_names(
            [params["sampleset_ref"]],
            self.workspace_url)[params["sampleset_ref"]]
        for idx, reads_ref in enumerate(reads_refs):
            single_param = dict(params)  # need a copy of the params
            single_param["build_report"] = 0
            single_param["sampleset_ref"] = reads_ref["ref"]
            if "condition" in reads_ref:
                single_param["condition"] = reads_ref["condition"]
            else:
                single_param["condition"] = "unspecified"

            tasks.append({
                "module_name": "kb_hisat2",
                "function_name": "run_hisat2",
                "version": self.my_version,
                "parameters": single_param
            })
        # UNCOMMENT BELOW FOR LOCAL TESTING
        batch_run_params = {
            "tasks": tasks,
            "runner": "parallel",
            # "concurrent_local_tasks": 3,
            # "concurrent_njsw_tasks": 0,
            "max_retries": 2
        }
        parallel_runner = KBParallel(self.callback_url)
        results = parallel_runner.run_batch(batch_run_params)["results"]
        alignment_items = list()
        alignments = dict()
        for idx, result in enumerate(results):
            # idx of the result is the same as the idx of the inputs AND reads_refs
            if result["is_error"] != 0:
                raise RuntimeError(
                    "Failed a parallel run of HISAT2! {}".format(
                        result["result_package"]["error"]))
            reads_ref = tasks[idx]["parameters"]["sampleset_ref"]
            alignment_items.append({
                "ref":
                result["result_package"]["result"][0]["alignment_objs"]
                [reads_ref]["ref"],
                "label":
                reads_refs[idx].get("condition",
                                    params.get("condition", "unspecified"))
            })
            alignments[reads_ref] = result["result_package"]["result"][0][
                "alignment_objs"][reads_ref]
        # build the final alignment set
        output_ref = self.upload_alignment_set(
            alignment_items, set_name + params["alignmentset_suffix"],
            params["ws_name"])
        return (alignments, output_ref)