Ejemplo n.º 1
0
def run(madeline_process: Process, ped_stream: List[str]):
    """Run madeline and generate a file with the results."""
    output_dir = mkdtemp()
    output_prefix = f"{output_dir}/madeline"
    out_path = f"{output_prefix}.xml"

    # write the input to a temp file
    with NamedTemporaryFile("w") as in_file:
        madeline_content = "\n".join(ped_stream)
        in_file.write(madeline_content)
        in_file.flush()
        madeline_call = [
            "--color",
            "--nolabeltruncation",
            "--outputprefix",
            output_prefix,
            in_file.name,
        ]
        madeline_process.run_command(madeline_call)

    with open(out_path, "r") as output:
        svg_content = output.read()

    # strip away the script tag
    script_tag = '<script type="text/javascript" xlink:href=' '"javascript/madeline.js"></script>'
    svg_content.replace(script_tag, "")

    with open(out_path, "w") as out_handle:
        out_handle.write(svg_content)

    return out_path
Ejemplo n.º 2
0
    def __init__(self, config: dict):

        self.process = Process("sbatch")
        self.slurm_account = config["crunchy"]["slurm"]["account"]
        self.crunchy_env = config["crunchy"]["slurm"]["conda_env"]
        self.mail_user = config["crunchy"]["slurm"]["mail_user"]
        self.reference_path = config["crunchy"]["cram_reference"]
Ejemplo n.º 3
0
class ShippingAPI:
    """Class to communicate with the tool shipping

    Shipping is used as a unified tool for deploying in the Clinical Genomics environments
    """
    def __init__(self, config: Dict[str, str]):
        self.config = config
        self.host_config = config["host_config"]
        self.binary_path = config["binary_path"]
        self.process = Process(binary=str(self.binary_path),
                               config=self.host_config,
                               config_parameter="--host-config")
        self.dry_run = False

    def set_dry_run(self, dry_run: bool) -> None:
        """Update dry run"""
        LOG.info("Set dry run to %s", dry_run)
        self.dry_run = dry_run

    def deploy(self, app_name: str, app_config: Path = None):
        """Command to deploy a tool according to the specifications in the config files"""
        LOG.info("Deploying the %s software", app_name)
        deploy_args = []
        if app_config:
            LOG.info("Use app config %s", app_config)
            deploy_args.extend(["--app-config", str(app_config)])
        else:
            deploy_args.extend(["--tool-name", app_name])

        deploy_args.append("deploy")
        self.process.run_command(deploy_args, dry_run=self.dry_run)
        for line in self.process.stderr_lines():
            LOG.info(line)
Ejemplo n.º 4
0
def test_process_run_invalid_command():
    # GIVEN a binary with non existing command
    binary = "false"
    process = Process(binary=binary)
    # WHEN running the command
    with pytest.raises(CalledProcessError):
        # THEN assert an exception is raised
        process.run_command()
Ejemplo n.º 5
0
 def __init__(self, config: Dict[str, str]):
     self.config = config
     self.host_config = config["host_config"]
     self.binary_path = config["binary_path"]
     self.process = Process(binary=str(self.binary_path),
                            config=self.host_config,
                            config_parameter="--host-config")
     self.dry_run = False
Ejemplo n.º 6
0
    def __init__(self, config: dict, analysis_type: str = "wgs"):
        super(LoqusdbAPI, self).__init__()

        self.analysis_type = analysis_type

        self.loqusdb_config = config["loqusdb"]["config_path"]
        self.loqusdb_binary = config["loqusdb"]["binary_path"]

        if self.analysis_type == "wes":
            self.loqusdb_config = config["loqusdb-wes"]["config_path"]
            self.loqusdb_binary = config["loqusdb-wes"]["binary_path"]

        self.process = Process(self.loqusdb_binary, self.loqusdb_config)
Ejemplo n.º 7
0
    def __init__(self, config: CGConfig):
        self.housekeeper_api: HousekeeperAPI = config.housekeeper_api
        self.lims_api: LimsAPI = config.lims_api
        self.status_db: Store = config.status_db
        self.gisaid_submitter: str = config.gisaid.submitter
        self.upload_password: str = config.gisaid.upload_password
        self.upload_cid: str = config.gisaid.upload_cid
        self.gisaid_binary: str = config.gisaid.binary_path
        self.gisaid_log_dir: str = config.gisaid.log_dir
        self.log_watch: str = config.gisaid.logwatch_email
        self.email_base_settings = config.email_base_settings
        self.mutant_root_dir = Path(config.mutant.root)

        self.process = Process(binary=self.gisaid_binary)
Ejemplo n.º 8
0
def ls_process():
    """
        list files process
    """
    binary = "ls"
    process = Process(binary=binary)
    return process
Ejemplo n.º 9
0
def echo_process():
    """
        echo process
    """
    binary = "echo"
    process = Process(binary=binary)
    return process
Ejemplo n.º 10
0
 def process(self) -> Process:
     if not self._process:
         self._process = Process(
             binary=self.config.microsalt.binary_path,
             environment=self.config.microsalt.conda_env,
         )
     return self._process
Ejemplo n.º 11
0
 def process(self) -> Process:
     if not self._process:
         self._process = Process(
             binary=f"{self.script} {self.mip_pipeline}",
             config=self.config.mip_rd_rna.mip_config,
             environment=self.conda_env,
         )
     return self._process
Ejemplo n.º 12
0
def test_process():
    # GIVEN a binary with 'ls'
    binary = "ls"
    # WHEN instantiating a Process with the binary
    process = Process(binary=binary)
    # THEN assert the instantiation worked as expected
    assert process.binary == binary
    assert repr(process) == "Process:base_call:['ls']"
    assert process.stdout == ""
Ejemplo n.º 13
0
 def __init__(self):
     self.process: Process = Process("sbatch")
     self.dry_run: bool = False
Ejemplo n.º 14
0
class LoqusdbAPI:
    """
        API for loqusdb
    """
    def __init__(self, config: dict, analysis_type: str = "wgs"):
        super(LoqusdbAPI, self).__init__()

        self.analysis_type = analysis_type

        self.loqusdb_config = config["loqusdb"]["config_path"]
        self.loqusdb_binary = config["loqusdb"]["binary_path"]

        if self.analysis_type == "wes":
            self.loqusdb_config = config["loqusdb-wes"]["config_path"]
            self.loqusdb_binary = config["loqusdb-wes"]["binary_path"]

        self.process = Process(self.loqusdb_binary, self.loqusdb_config)

    def load(self,
             family_id: str,
             ped_path: str,
             vcf_path: str,
             gbcf_path: str,
             vcf_sv_path: str = None) -> dict:
        """Add observations from a VCF."""
        load_call_parameters = [
            "load",
            "-c",
            family_id,
            "-f",
            ped_path,
            "--variant-file",
            vcf_path,
            "--check-profile",
            gbcf_path,
            "--hard-threshold",
            "0.95",
            "--soft-threshold",
            "0.90",
        ]
        if self.analysis_type == "wgs" and vcf_sv_path:
            load_call_parameters.extend(["--sv-variants", vcf_sv_path])

        nr_variants = 0
        self.process.run_command(load_call_parameters)
        for line in self.process.stderr_lines():
            line_content = line.split("INFO")[-1].strip()
            if "inserted" in line_content:
                nr_variants = int(line_content.split(":")[-1].strip())

        return dict(variants=nr_variants)

    def get_case(self, case_id: str) -> dict:
        """Find a case in the database by case id."""
        case_obj = None
        cases_parameters = ["cases", "-c", case_id, "--to-json"]

        self.process.run_command(cases_parameters)

        output = self.process.stdout

        # If case not in loqusdb, stdout of loqusdb command will be empty.
        if not output:
            raise CaseNotFoundError(f"Case {case_id} not found in loqusdb")

        case_obj = json.loads(output)[0]

        return case_obj

    def get_duplicate(self, vcf_file: str) -> dict:
        """Find matching profiles in loqusdb"""
        ind_obj = {}
        duplicates_params = [
            "profile", "--check-vcf", vcf_file, "--profile-threshold", "0.95"
        ]

        try:
            self.process.run_command(duplicates_params)
        except CalledProcessError:
            # If CalledProcessError is raised, log and raise error
            LOG.critical("Could not run profile command")
            raise

        output = self.process.stdout

        if not output:
            LOG.info("No duplicates found")
            return ind_obj

        ind_obj = json.loads(output)

        return ind_obj

    def __repr__(self):

        return f"LoqusdbAPI(binary={self.loqusdb_binary}," f"config={self.loqusdb_config})"
Ejemplo n.º 15
0
class MadelineAPI:
    """Interface to madeline, tool to generate pedigree pictures"""
    def __init__(self, config: dict):
        self.madeline_binary = str(
            pathlib.Path(config["madeline_exe"]).absolute())
        self.process = Process(self.madeline_binary)

    @staticmethod
    def make_ped(family_id: str, samples: List[dict]) -> Iterable[str]:
        """Yield lines that are used as madeline input."""
        columns = {
            "family": "FamilyId",
            "sample": "IndividualId",
            "sex": "Gender",
            "father": "Father",
            "mother": "Mother",
            "deceased": "Deceased",
            "proband": "Proband",
            "status": "Affected",
        }
        sex_gender = {"male": "M", "female": "F"}
        status_affected = {"affected": "Y", "unaffected": "N"}
        LOG.info("Generating madeline input lines")

        yield "\t".join(columns.values())

        for sample in samples:
            row = [
                family_id,
                sample["sample"],
                sex_gender.get(sample["sex"]) or ".",
                sample.get("father") or ".",
                sample.get("mother") or ".",
                "Y" if sample.get("deceased") else ".",
                "Y" if sample.get("proband") else ".",
                status_affected.get(sample.get("status")) or ".",
            ]
            yield "\t".join(row)

    @staticmethod
    def strip_script_tag(content: str) -> str:
        """Strip away a script tag from a string"""
        script_tag = ('<script type="text/javascript" xlink:href='
                      '"javascript/madeline.js"></script>')
        return content.replace(script_tag, "")

    def run(self,
            family_id: str,
            samples: List[dict],
            out_path: str = None) -> pathlib.Path:
        """Run madeline and generate a file with the results."""
        if out_path:
            out_path = pathlib.Path(out_path)
        else:
            output_dir = pathlib.Path(tempfile.mkdtemp())
            out_path = output_dir / "madeline.xml"

        output_prefix = str(out_path.with_suffix(""))
        LOG.info("Generate madeline output to %s", out_path)

        ped_stream = self.make_ped(family_id, samples)

        # write the input to a temp file
        with tempfile.NamedTemporaryFile("w") as in_file:
            madeline_content = "\n".join(ped_stream)
            in_file.write(madeline_content)
            in_file.flush()
            madeline_call = [
                "--color",
                "--nolabeltruncation",
                "--outputprefix",
                output_prefix,
                in_file.name,
            ]
            self.process.run_command(parameters=madeline_call)

        with open(out_path, "r") as output:
            svg_content = output.read()

        svg_content = self.strip_script_tag(svg_content)

        with open(out_path, "w") as out_handle:
            out_handle.write(svg_content)

        return out_path

    def __repr__(self):
        return f"MadelineApi({self.madeline_binary})"
Ejemplo n.º 16
0
 def __init__(self, config: dict):
     self.madeline_binary = str(
         pathlib.Path(config["madeline_exe"]).absolute())
     self.process = Process(self.madeline_binary)
Ejemplo n.º 17
0
class CrunchyAPI:
    """
        API for samtools
    """
    def __init__(self, config: dict):

        self.process = Process("sbatch")
        self.slurm_account = config["crunchy"]["slurm"]["account"]
        self.crunchy_env = config["crunchy"]["slurm"]["conda_env"]
        self.mail_user = config["crunchy"]["slurm"]["mail_user"]
        self.reference_path = config["crunchy"]["cram_reference"]

    def bam_to_cram(self,
                    bam_path: Path,
                    ntasks: int,
                    mem: int,
                    dry_run: bool = False):
        """
            Compress BAM file into CRAM
        """
        cram_path = self.get_cram_path_from_bam(bam_path)
        job_name = bam_path.name + "_bam_to_cram"
        flag_path = self.get_flag_path(file_path=cram_path)
        pending_path = self.get_pending_path(file_path=bam_path)
        log_dir = bam_path.parent

        sbatch_header = self._get_slurm_header(
            job_name=job_name,
            account=self.slurm_account,
            log_dir=log_dir,
            mail_user=self.mail_user,
            conda_env=self.crunchy_env,
            ntasks=ntasks,
            mem=mem,
        )

        sbatch_body = self._get_slurm_bam_to_cram(
            bam_path=bam_path,
            cram_path=cram_path,
            flag_path=flag_path,
            pending_path=pending_path,
            reference_path=self.reference_path,
        )

        sbatch_content = sbatch_header + "\n" + sbatch_body
        self._submit_sbatch(sbatch_content=sbatch_content, dry_run=dry_run)

    def _submit_sbatch(self, sbatch_content: str, dry_run: bool = False):
        """Submit slurm job"""
        if not dry_run:
            with tempfile.NamedTemporaryFile(mode="w+t") as sbatch_file:

                sbatch_file.write(sbatch_content)
                sbatch_file.flush()
                sbatch_parameters = [sbatch_file.name]
                self.process.run_command(sbatch_parameters)
                LOG.info(self.process.stderr)
                LOG.info(self.process.stdout)
        else:
            LOG.info("Would submit following to slurm:\n\n%s", sbatch_content)

    def is_cram_compression_done(self, bam_path: Path) -> bool:
        """Check if CRAM compression already done for BAM file"""
        cram_path = self.get_cram_path_from_bam(bam_path)
        flag_path = self.get_flag_path(file_path=cram_path)

        if not cram_path.exists():
            LOG.info("No cram-file for %s", bam_path)
            return False
        index_paths = self.get_index_path(cram_path)
        index_single_suffix = index_paths["single_suffix"]
        index_double_suffix = index_paths["double_suffix"]
        if (not index_single_suffix.exists()) and (
                not index_double_suffix.exists()):
            LOG.info("No index-file for %s", cram_path)
            return False
        if not flag_path.exists():
            LOG.info("No %s file for %s", FLAG_PATH_SUFFIX, cram_path)
            return False
        return True

    def is_cram_compression_pending(self, bam_path: Path) -> bool:
        """Check if cram compression has started, but not yet finished"""
        pending_path = self.get_pending_path(file_path=bam_path)
        if pending_path.exists():
            LOG.info("Cram compression is pending for %s", bam_path)
            return True
        return False

    def is_bam_compression_possible(self, bam_path: Path) -> bool:
        """Check if it CRAM compression for BAM file is possible"""
        if bam_path is None or not bam_path.exists():
            LOG.warning("Could not find bam %s", bam_path)
            return False
        if self.is_cram_compression_done(bam_path=bam_path):
            LOG.info("cram compression already exists for %s", bam_path)
            return False
        return True

    @staticmethod
    def get_flag_path(file_path):
        """Get path to 'finished' flag"""
        return file_path.with_suffix(FLAG_PATH_SUFFIX)

    @staticmethod
    def get_pending_path(file_path):
        """Gives path to pending-flag path"""
        return file_path.with_suffix(PENDING_PATH_SUFFIX)

    @staticmethod
    def get_index_path(file_path):
        """Get possible paths for index
            Args:
                file_path (Path): path to BAM or CRAM
            Returns (dict): path with single_suffix, e.g. .bai
                and path with double_suffix, e.g. .bam.bai
        """
        index_type = CRAM_INDEX_SUFFIX
        if file_path.suffix == BAM_SUFFIX:
            index_type = BAM_INDEX_SUFFIX
        with_single_suffix = file_path.with_suffix(index_type)
        with_double_suffix = file_path.with_suffix(file_path.suffix +
                                                   index_type)
        return {
            "single_suffix": with_single_suffix,
            "double_suffix": with_double_suffix,
        }

    @staticmethod
    def get_cram_path_from_bam(bam_path: Path) -> Path:
        """ Get corresponding CRAM file path from bam file path """
        if not bam_path.suffix == BAM_SUFFIX:
            LOG.error("%s does not end with %s", bam_path, BAM_SUFFIX)
            raise ValueError
        cram_path = bam_path.with_suffix(CRAM_SUFFIX)
        return cram_path

    @staticmethod
    def _get_slurm_header(
        job_name: str,
        log_dir: str,
        account: str,
        mail_user: str,
        conda_env: str,
        ntasks: int,
        mem: int,
    ) -> str:
        sbatch_header = SBATCH_HEADER_TEMPLATE.format(
            job_name=job_name,
            account=account,
            log_dir=log_dir,
            conda_env=conda_env,
            mail_user=mail_user,
            ntasks=ntasks,
            mem=mem,
        )
        return sbatch_header

    @staticmethod
    def _get_slurm_bam_to_cram(
        bam_path: str,
        cram_path: str,
        flag_path: str,
        pending_path: str,
        reference_path: str,
    ) -> str:
        sbatch_body = SBATCH_BAM_TO_CRAM.format(
            bam_path=bam_path,
            cram_path=cram_path,
            flag_path=flag_path,
            pending_path=pending_path,
            reference_path=reference_path,
        )
        return sbatch_body
Ejemplo n.º 18
0
 def process(self):
     if not self._process:
         self._process = Process(self.config.balsamic.binary_path)
     return self._process
Ejemplo n.º 19
0
class GisaidAPI:
    """Interface with Gisaid cli uppload"""
    def __init__(self, config: CGConfig):
        self.housekeeper_api: HousekeeperAPI = config.housekeeper_api
        self.lims_api: LimsAPI = config.lims_api
        self.status_db: Store = config.status_db
        self.gisaid_submitter: str = config.gisaid.submitter
        self.upload_password: str = config.gisaid.upload_password
        self.upload_cid: str = config.gisaid.upload_cid
        self.gisaid_binary: str = config.gisaid.binary_path
        self.gisaid_log_dir: str = config.gisaid.log_dir
        self.log_watch: str = config.gisaid.logwatch_email
        self.email_base_settings = config.email_base_settings
        self.mutant_root_dir = Path(config.mutant.root)

        self.process = Process(binary=self.gisaid_binary)

    def get_completion_file_from_hk(self, case_id: str) -> File:
        """Find completon file in Housekeeper and return it"""

        completion_file: Optional[
            File] = self.housekeeper_api.find_file_in_latest_version(
                case_id=case_id, tags=["komplettering"])
        if not completion_file:
            msg = f"completion file missing for bundle {case_id}"
            raise HousekeeperFileMissingError(message=msg)
        return completion_file

    def get_completion_dataframe(self, completion_file: File) -> pd.DataFrame:
        """Read completion file in to dataframe, drop duplicates, and return the dataframe"""
        completion_df = pd.read_csv(completion_file.full_path,
                                    index_col=None,
                                    header=0)
        completion_df.drop_duplicates(inplace=True)
        completion_df = completion_df[completion_df["provnummer"].str.contains(
            SARS_COV_REGEX)]
        return completion_df

    def get_gisaid_sample_list(self, case_id: str) -> List[models.Sample]:
        """Get list of Sample objects eligeble for upload.
        The criteria is that the sample reached 20x coverage for >95% bases.
        The sample will be included in completion file."""

        completion_file = self.get_completion_file_from_hk(case_id=case_id)
        completion_df = self.get_completion_dataframe(
            completion_file=completion_file)
        sample_names = list(completion_df["provnummer"].unique())
        return [
            self.status_db.get_sample_by_name(name=sample_name)
            for sample_name in sample_names
        ]

    def get_gisaid_fasta_path(self, case_id: str) -> Path:
        """Get path to gisaid fasta"""
        return Path(self.mutant_root_dir, case_id, "results",
                    f"{case_id}.fasta")

    def get_gisaid_csv_path(self, case_id: str) -> Path:
        """Get path to gisaid csv"""
        return Path(self.mutant_root_dir, case_id, "results", f"{case_id}.csv")

    def get_gisaid_samples(self, case_id: str) -> List[GisaidSample]:
        """Get list of Gisaid sample objects."""

        samples: List[models.Sample] = self.get_gisaid_sample_list(
            case_id=case_id)
        gisaid_samples = []
        for sample in samples:
            sample_id: str = sample.internal_id
            LOG.info(f"Creating GisaidSample for {sample_id}")
            gisaid_sample = GisaidSample(
                case_id=case_id,
                cg_lims_id=sample_id,
                covv_subm_sample_id=sample.name,
                submitter=self.gisaid_submitter,
                fn=f"{case_id}.fasta",
                covv_collection_date=str(
                    self.lims_api.get_sample_attribute(lims_id=sample_id,
                                                       key="collection_date")),
                region=self.lims_api.get_sample_attribute(lims_id=sample_id,
                                                          key="region"),
                region_code=self.lims_api.get_sample_attribute(
                    lims_id=sample_id, key="region_code"),
                covv_orig_lab=self.lims_api.get_sample_attribute(
                    lims_id=sample_id, key="original_lab"),
                covv_orig_lab_addr=self.lims_api.get_sample_attribute(
                    lims_id=sample_id, key="original_lab_address"),
            )
            gisaid_samples.append(gisaid_sample)
        return gisaid_samples

    def create_gisaid_fasta(self, gisaid_samples: List[GisaidSample],
                            case_id: str) -> None:
        """Writing a new fasta with headers adjusted for gisaid upload_results_to_gisaid"""

        gisaid_fasta_file = self.housekeeper_api.find_file_in_latest_version(
            case_id=case_id, tags=["gisaid-fasta", case_id])
        if gisaid_fasta_file:
            gisaid_fasta_path = gisaid_fasta_file.full_path
        else:
            gisaid_fasta_path: Path = self.get_gisaid_fasta_path(
                case_id=case_id)

        fasta_lines: List[str] = []

        for sample in gisaid_samples:
            fasta_file: File = self.housekeeper_api.find_file_in_latest_version(
                case_id=case_id, tags=[sample.cg_lims_id, "consensus-sample"])
            if not fasta_file:
                raise HousekeeperFileMissingError(
                    message=
                    f"No fasta file found for sample {sample.cg_lims_id}")
            with open(str(fasta_file.full_path)) as handle:
                for line in handle.readlines():
                    if line[0] == ">":
                        fasta_lines.append(f">{sample.covv_virus_name}\n")
                    else:
                        fasta_lines.append(line)

        with open(gisaid_fasta_path, "w") as write_file_obj:
            write_file_obj.writelines(fasta_lines)

        if gisaid_fasta_file:
            return

        self.housekeeper_api.add_and_include_file_to_latest_version(
            case_id=case_id,
            file=gisaid_fasta_path,
            tags=["gisaid-fasta", case_id])

    def create_gisaid_csv(self, gisaid_samples: List[GisaidSample],
                          case_id: str) -> None:
        """Create csv file for gisaid upload"""
        samples_df = pd.DataFrame(
            data=[gisaid_sample.dict() for gisaid_sample in gisaid_samples],
            columns=HEADERS,
        )

        gisaid_csv_file = self.housekeeper_api.find_file_in_latest_version(
            case_id=case_id, tags=["gisaid-csv", case_id])
        if gisaid_csv_file:
            LOG.info(f"GISAID CSV for case {case_id} exists, will be replaced")
            gisaid_csv_path = gisaid_csv_file.full_path
        else:
            gisaid_csv_path = self.get_gisaid_csv_path(case_id=case_id)
        samples_df.to_csv(gisaid_csv_path, sep=",", index=False)

        if gisaid_csv_file:
            return

        self.housekeeper_api.add_and_include_file_to_latest_version(
            case_id=case_id,
            file=gisaid_csv_path,
            tags=["gisaid-csv", case_id])

    def create_gisaid_log_file(self, case_id: str) -> None:
        """Path for gisaid bundle log"""
        gisaid_log_file = self.housekeeper_api.get_files(
            bundle=case_id, tags=["gisaid-log", case_id]).first()
        if gisaid_log_file:
            LOG.info("GISAID log exists in case bundle in Housekeeper")
            return

        log_file_path = Path(self.gisaid_log_dir, case_id).with_suffix(".log")
        if not log_file_path.parent.exists():
            raise ValueError(
                f"Gisaid log dir: {self.gisaid_log_dir} doesnt exist")
        if not log_file_path.exists():
            log_file_path.touch()
        self.housekeeper_api.add_and_include_file_to_latest_version(
            case_id=case_id, file=log_file_path, tags=["gisaid-log", case_id])

    def create_gisaid_files_in_housekeeper(self, case_id: str) -> None:
        """Create all gisaid files in Housekeeper, if needed."""
        gisaid_samples = self.get_gisaid_samples(case_id=case_id)
        self.create_gisaid_csv(gisaid_samples=gisaid_samples, case_id=case_id)
        self.create_gisaid_fasta(gisaid_samples=gisaid_samples,
                                 case_id=case_id)
        self.create_gisaid_log_file(case_id=case_id)

    def authenticate_gisaid(self):
        load_call: list = [
            "CoV",
            "authenticate",
            "--cid",
            self.upload_cid,
            "--user",
            self.gisaid_submitter,
            "--pass",
            self.upload_password,
        ]
        self.process.run_command(parameters=load_call)

    def upload_results_to_gisaid(self, case_id: str) -> None:
        """Load batch data to GISAID using the gisiad cli."""

        temp_log_file = tempfile.NamedTemporaryFile(dir=self.gisaid_log_dir,
                                                    mode="w+",
                                                    delete=False)
        gisaid_csv_path = self.housekeeper_api.find_file_in_latest_version(
            case_id=case_id, tags=["gisaid-csv", case_id]).full_path

        gisaid_fasta_path = self.housekeeper_api.find_file_in_latest_version(
            case_id=case_id, tags=["gisaid-fasta", case_id]).full_path

        gisaid_log_path = (self.housekeeper_api.get_files(
            bundle=case_id, tags=["gisaid-log", case_id]).first().full_path)

        self.authenticate_gisaid()
        load_call: list = [
            "--logfile",
            temp_log_file.name,
            "CoV",
            "upload",
            "--csv",
            gisaid_csv_path,
            "--fasta",
            gisaid_fasta_path,
        ]
        self.process.run_command(parameters=load_call)
        self.append_log(temp_log=Path(temp_log_file.name),
                        gisaid_log=Path(gisaid_log_path))
        temp_log_file.close()
        if self.process.stderr:
            LOG.info(f"gisaid stderr:\n{self.process.stderr}")
        if self.process.stdout:
            LOG.info(f"gisaid stdout:\n{self.process.stdout}")

    def append_log(self, temp_log: Path, gisaid_log: Path) -> None:
        """Appends temp log to gisaid log and delete temp file"""

        with open(str(temp_log.absolute()), "r") as open_temp_log:
            new_log_data: List = json.load(open_temp_log)
            if gisaid_log.stat().st_size != 0:
                with open(str(gisaid_log.absolute()), "r") as open_gisaid_log:
                    old_log_data: List = json.load(open_gisaid_log)
                    new_log_data.extend(old_log_data)

        with open(str(gisaid_log.absolute()), "w") as open_gisaid_log:
            json.dump(new_log_data, open_gisaid_log)
        temp_log.unlink()

    def get_accession_numbers(self, case_id: str) -> Dict[str, str]:
        """Parse accession numbers and sample ids from log file"""

        LOG.info("Parsing accession numbers from log file")
        accession_numbers = {}
        log_file = Path(
            self.housekeeper_api.get_files(bundle=case_id,
                                           tags=["gisaid-log",
                                                 case_id]).first().full_path)
        if log_file.stat().st_size != 0:
            with open(str(log_file.absolute())) as log_file:
                log_data = json.load(log_file)
                for log in log_data:
                    if log.get("code") == "epi_isl_id":
                        log_message = log.get("msg")
                    elif log.get(
                            "code"
                    ) == "validation_error" and "existing_ids" in log.get(
                            "msg"):
                        log_message = (
                            f'{log.get("msg").split(";")[0]}; '
                            f'{re.findall(UPLOADED_REGEX_MATCH, log.get("msg"))[0]}'
                        )
                    else:
                        continue
                    accession_obj = GisaidAccession(log_message=log_message)
                    accession_numbers[
                        accession_obj.sample_id] = accession_obj.accession_nr
        return accession_numbers

    def update_completion_file(self, case_id: str) -> None:
        """Update completion file with accession numbers"""
        completion_file = self.get_completion_file_from_hk(case_id=case_id)
        accession_dict = self.get_accession_numbers(case_id=case_id)
        completion_df = self.get_completion_dataframe(
            completion_file=completion_file)
        completion_df["GISAID_accession"] = completion_df["provnummer"].apply(
            lambda x: accession_dict[x])
        completion_df.to_csv(
            completion_file.full_path,
            sep=",",
            index=False,
        )

    def upload(self, case_id: str) -> None:
        """Uploading results to gisaid and saving the accession numbers in completion file"""

        completion_file = self.get_completion_file_from_hk(case_id=case_id)
        completion_df = self.get_completion_dataframe(
            completion_file=completion_file)
        if len(completion_df["GISAID_accession"].dropna()) == len(
                completion_df["provnummer"]):
            LOG.info("All samples already uploaded")
            return

        self.create_gisaid_files_in_housekeeper(case_id=case_id)
        self.upload_results_to_gisaid(case_id=case_id)
        self.update_completion_file(case_id=case_id)
Ejemplo n.º 20
0
 def process(self) -> Process:
     if not self._process:
         self._process = Process(binary=self.config.fluffy.binary_path)
     return self._process
Ejemplo n.º 21
0
    def __init__(self, config: dict):

        self.chanjo_config = config["chanjo"]["config_path"]
        self.chanjo_binary = config["chanjo"]["binary_path"]
        self.process = Process(self.chanjo_binary, self.chanjo_config)
Ejemplo n.º 22
0
class ChanjoAPI:

    """Interface to Chanjo, the coverage analysis tool."""

    def __init__(self, config: dict):

        self.chanjo_config = config["chanjo"]["config_path"]
        self.chanjo_binary = config["chanjo"]["binary_path"]
        self.process = Process(self.chanjo_binary, self.chanjo_config)

    def upload(
        self, sample_id: str, sample_name: str, group_id: str, group_name: str, bed_file: str,
    ):
        """Upload coverage for a sample."""

        load_parameters = [
            "load",
            "--sample",
            sample_id,
            "--name",
            sample_name,
            "--group",
            group_id,
            "--group-name",
            group_name,
            "--threshold",
            "10",
            bed_file,
        ]

        self.process.run_command(load_parameters)

    def sample(self, sample_id: str) -> dict:
        """Fetch sample from the database."""

        sample_parameters = ["db", "samples", "-s", sample_id]
        self.process.run_command(sample_parameters)
        samples = json.loads(self.process.stdout)

        for sample in samples:
            if sample["id"] == sample_id:
                return sample

        return None

    def delete_sample(self, sample_id: str):
        """Delete sample from database."""
        delete_parameters = ["db", "remove", sample_id]
        self.process.run_command(delete_parameters)

    def omim_coverage(self, samples: List[str]) -> dict:
        """Calculate omim coverage for samples"""

        omim_parameters = ["calculate", "coverage", "--omim"]
        for sample in samples:
            omim_parameters.extend(["-s", sample["id"]])
        self.process.run_command(omim_parameters)
        data = json.loads(self.process.stdout)
        return data

    def sample_coverage(self, sample_id: str, panel_genes: list) -> dict:
        """Calculate coverage for samples."""

        with tempfile.NamedTemporaryFile(mode="w+t") as tmp_gene_file:
            tmp_gene_file.write("\n".join([str(gene) for gene in panel_genes]))
            tmp_gene_file.flush()
            coverage_parameters = [
                "calculate",
                "coverage",
                "-s",
                sample_id,
                "-f",
                tmp_gene_file.name,
            ]
            self.process.run_command(coverage_parameters)
        data = json.loads(self.process.stdout).get(sample_id)
        return data