Example #1
0
    def __init__(self, config):
        client = MongoClient(config["scout"]["database"], serverSelectionTimeoutMS=20)
        super(ScoutAPI, self).__init__(client[config["scout"]["database_name"]])

        binary_path = config["scout"]["binary_path"]
        config_path = config["scout"]["config_path"]
        self.process = Process(binary=binary_path, config=config_path)
Example #2
0
class HermesApi:
    """Class to communicate with hermes"""
    def __init__(self, config: dict):
        self.process = Process(config["hermes"]["binary_path"])

    def convert_deliverables(
            self,
            deliverables_file: Path,
            pipeline: str,
            analysis_type: Optional[str] = None) -> CGDeliverables:
        """Convert deliverables file in raw pipeline format to CG format with hermes"""
        LOG.info("Converting pipeline deliverables to CG deliverables")
        convert_command = [
            "convert",
            "deliverables",
            "--pipeline",
            pipeline,
            str(deliverables_file),
        ]
        if analysis_type:
            convert_command.extend(["--analysis-type", analysis_type])
        self.process.run_command(convert_command)

        return CGDeliverables.parse_raw(self.process.stdout)

    def create_housekeeper_bundle(
        self,
        bundle_name: str,
        deliverables: Path,
        pipeline: str,
        analysis_type: Optional[str],
        created: Optional[datetime],
    ) -> hk_models.InputBundle:
        """Convert pipeline deliverables to housekeeper bundle ready to be inserted into hk"""
        cg_deliverables: CGDeliverables = self.convert_deliverables(
            deliverables_file=deliverables,
            pipeline=pipeline,
            analysis_type=analysis_type)
        return self.get_housekeeper_bundle(deliverables=cg_deliverables,
                                           created=created,
                                           bundle_name=bundle_name)

    @staticmethod
    def get_housekeeper_bundle(
            deliverables: CGDeliverables,
            bundle_name: str,
            created: Optional[datetime] = None) -> hk_models.InputBundle:
        """Convert a deliverables object to a housekeeper object"""
        bundle_info = {
            "name": bundle_name,
            "files": [file_info.dict() for file_info in deliverables.files],
        }
        if created:
            bundle_info["created"] = created

        return hk_models.InputBundle(**bundle_info)
Example #3
0
    def __init__(self, config):

        binary_path = config["scout"]["binary_path"]
        config_path = config["scout"]["config_path"]
        self.process = Process(binary=binary_path, config=config_path)
Example #4
0
class ScoutAPI:

    """Interface to Scout."""

    def __init__(self, config):

        binary_path = config["scout"]["binary_path"]
        config_path = config["scout"]["config_path"]
        self.process = Process(binary=binary_path, config=config_path)

    def upload(self, scout_load_config: Path, threshold: int = 5, force: bool = False):
        """Load analysis of a new family into Scout."""
        with open(scout_load_config, "r") as stream:
            data = yaml.safe_load(stream)
        scout_load_config_object: ScoutLoadConfig = ScoutLoadConfig(**data)
        existing_case: Optional[ScoutExportCase] = self.get_case(
            case_id=scout_load_config_object.family
        )
        load_command = ["load", "case", str(scout_load_config)]
        if existing_case:
            if force or scout_load_config_object.analysis_date > existing_case.analysis_date:
                load_command.append("--update")
                LOG.info("update existing Scout case")
            else:
                existing_date = existing_case.analysis_date.date()
                LOG.warning("analysis of case already loaded: %s", existing_date)
                return
        LOG.debug("load new Scout case")
        self.process.run_command(load_command)
        LOG.debug("Case loaded successfully to Scout")

    def update_alignment_file(self, case_id: str, sample_id: str, alignment_path: Path):
        """Update alignment file for individual in case"""
        parameters = [
            "update",
            "individual",
            "--case-id",
            case_id,
            "--ind-id",
            sample_id,
            "--alignment-path",
            str(alignment_path),
        ]
        self.process.run_command(parameters=parameters)

    def export_panels(self, panels: List[str], build: str = GENOME_BUILD_37) -> List[str]:
        """Pass through to export of a list of gene panels.

        Return list of lines in bed format
        """
        export_panels_command = ["export", "panel", "--bed"]
        for panel_id in panels:
            export_panels_command.append(panel_id)

        if build:
            export_panels_command.extend(["--build", build])

        try:
            self.process.run_command(export_panels_command)
            if not self.process.stdout:
                return []
        except CalledProcessError:
            LOG.info("Could not find panels")
            return []

        return [line for line in self.process.stdout_lines()]

    def get_genes(self, panel_id: str, build: str = None) -> list:
        """Fetch panel genes.

        Args:
            panel_id (str): unique id for the panel
            build (str): version of the panel. If 'None' latest version will be returned

        Returns:
            panel genes: panel genes list
        """
        # This can be run from CLI with `scout export panels <panel1> `
        export_panel_command = ["export", "panel", panel_id]
        if build:
            export_panel_command.extend(["--build", build])

        try:
            self.process.run_command(export_panel_command)
            if not self.process.stdout:
                return []
        except CalledProcessError:
            LOG.info("Could not find panel %s", panel_id)
            return []

        panel_genes = []
        for gene_line in self.process.stdout_lines():
            if gene_line.startswith("#"):
                continue
            gene_info = gene_line.strip().split("\t")
            if not len(gene_info) > 1:
                continue
            panel_genes.append({"hgnc_id": int(gene_info[0]), "hgnc_symbol": gene_info[1]})

        return panel_genes

    def get_causative_variants(self, case_id: str) -> List[Variant]:
        """
        Get causative variants for a case
        """
        # These commands can be run with `scout export variants`
        get_causatives_command = ["export", "variants", "--json", "--case-id", case_id]
        try:
            self.process.run_command(get_causatives_command)
            if not self.process.stdout:
                return []
        except CalledProcessError:
            LOG.warning("Could not find case %s in scout", case_id)
            return []
        variants: List[Variant] = []
        for variant_info in json.loads(self.process.stdout):
            variants.append(Variant(**variant_info))
        return variants

    def get_case(self, case_id: str) -> Optional[ScoutExportCase]:
        """Fetch a case from Scout"""
        cases: List[ScoutExportCase] = self.get_cases(case_id=case_id)
        if not cases:
            return None
        return cases[0]

    def get_cases(
        self,
        case_id: Optional[str] = None,
        reruns: bool = False,
        finished: bool = False,
        status: Optional[str] = None,
        days_ago: int = None,
    ) -> List[ScoutExportCase]:
        """Interact with cases existing in the database."""
        # These commands can be run with `scout export cases`
        get_cases_command = ["export", "cases", "--json"]
        if case_id:
            get_cases_command.extend(["--case-id", case_id])

        elif status:
            get_cases_command.extend(["--status", status])

        elif finished:
            get_cases_command.append("--finished")

        if reruns:
            LOG.info("Fetching cases that are reruns")
            get_cases_command.append("--reruns")

        if days_ago:
            get_cases_command.extend(["--within-days", str(days_ago)])

        try:
            self.process.run_command(get_cases_command)
            if not self.process.stdout:
                return []
        except CalledProcessError:
            LOG.info("Could not find cases")
            return []

        cases = []
        for case_export in json.loads(self.process.stdout):
            LOG.info("Validating case %s", case_export.get("_id"))
            case_obj = ScoutExportCase(**case_export)
            cases.append(case_obj)
        return cases

    def get_solved_cases(self, days_ago: int) -> List[ScoutExportCase]:
        """
        Get cases solved within chosen timespan

        Args:
            days_ago (int): Maximum days ago a case has been solved

        Return:
            cases (list): list of cases
        """
        return self.get_cases(status="solved", days_ago=days_ago)

    def upload_delivery_report(self, report_path: str, case_id: str, update: bool = False) -> None:
        """Load a delivery report into a case in the database

        If the report already exists the function will exit.
        If the user want to load a report that is already in the database
        'update' has to be 'True'

        Args:
            report_path (string):       Path to delivery report
            case_id     (string):       Case identifier
            update      (bool):         If an existing report should be replaced

        Returns:
            updated_case(dict)

        """
        # This command can be run with `scout load delivery-report <CASE-ID> <REPORT-PATH>`
        upload_command = ["load", "delivery-report", case_id, report_path]

        if update:
            upload_command.append("--update")

        try:
            LOG.info("Uploading delivery report %s to case %s", report_path, case_id)
            self.process.run_command(upload_command)
        except CalledProcessError:
            LOG.warning("Something went wrong when uploading delivery report")

    def upload_fusion_report(
        self, case_id: str, report_path: str, research: bool, update: bool
    ) -> None:
        """Load a fusion report into a case in the database

        Args:
            report_path (string):       Path to delivery report
            case_id     (string):       Case identifier
            research    (bool):         Research report
            update      (bool):         If an existing report should be replaced
        Returns:
            Nothing
        """

        # This command can be run with
        # `scout load gene-fusion-report [-r] <case_id> <path/to/research_gene_fusion_report.pdf>`
        upload_command = ["load", "gene-fusion-report"]

        if research:
            upload_command.append("--research")

        if update:
            upload_command.append("--update")

        upload_command.extend([case_id, report_path])

        try:
            LOG.info("Uploading fusion report %s to case %s", report_path, case_id)
            self.process.run_command(upload_command)
        except CalledProcessError:
            raise ScoutUploadError("Something went wrong when uploading fusion report")

    def upload_splice_junctions_bed(self, file_path: str, case_id: str, customer_sample_id):
        """Load a splice junctions bed file into a case in the database

        Args:
            file_path           (string):       Path to delivery report
            case_id             (string):       Case identifier
            customer_sample_id  (bool):         Customers sample identifier
        Returns:
            updated_case(dict)

        """

        # This command can be run with
        # `scout update individual -c <case_id> -n <customer_sample_id> splice_junctions_bed
        #   <path/to/junction_file.bed>`
        upload_command = [
            "update",
            "individual",
            "-c",
            case_id,
            "-n",
            customer_sample_id,
            "splice_junctions_bed",
            file_path,
        ]

        try:
            LOG.info("Uploading splice junctions bed file %s to case %s", file_path, case_id)
            self.process.run_command(upload_command)
        except CalledProcessError:
            raise ScoutUploadError("Something went wrong when uploading splice junctions bed file")

    def upload_rna_coverage_bigwig(self, file_path: str, case_id: str, customer_sample_id: str):
        """Load a rna coverage bigwig file into a case in the database

        Args:
            file_path           (string):       Path to delivery report
            case_id             (string):       Case identifier
            customer_sample_id  (bool):         Customers sample identifier
        Returns:
            updated_case(dict)

        """

        # This command can be run with
        # `scout update individual -c <case_id> -n <customer_sample_id> rna_coverage_bigwig
        #         <path/to/coverage_file.bigWig>`
        upload_command = [
            "update",
            "individual",
            "-c",
            case_id,
            "-n",
            customer_sample_id,
            "rna_coverage_bigwig",
            file_path,
        ]

        try:
            LOG.info("Uploading rna coverage bigwig file %s to case %s", file_path, case_id)
            self.process.run_command(upload_command)
        except CalledProcessError:
            raise ScoutUploadError("Something went wrong when uploading rna coverage bigwig file")
Example #5
0
class ScoutAPI(MongoAdapter):

    """Interface to Scout."""

    def __init__(self, config):
        client = MongoClient(config["scout"]["database"], serverSelectionTimeoutMS=20)
        super(ScoutAPI, self).__init__(client[config["scout"]["database_name"]])

        binary_path = config["scout"]["binary_path"]
        config_path = config["scout"]["config_path"]
        self.process = Process(binary=binary_path, config=config_path)

    def upload(self, data: dict, threshold: int = 5, force: bool = False):
        """Load analysis of a new family into Scout."""
        data["rank_score_threshold"] = threshold
        config_data = parse_case_data(config=data)
        existing_case = self.case(
            institute_id=config_data["owner"], display_name=config_data["family_name"]
        )
        if existing_case:
            if force or config_data["analysis_date"] > existing_case["analysis_date"]:
                LOG.info("update existing Scout case")
                load_scout(self, config_data, update=True)
            else:
                existing_date = existing_case["analysis_date"].date()
                LOG.warning("analysis of case already loaded: %s", existing_date)
            return
        LOG.debug("load new Scout case")
        load_scout(self, config_data)
        LOG.debug("Case loaded successfully to Scout")

    def update_alignment_file(self, case_id: str, sample_id: str, alignment_path: Path):
        """Update alignment file for individual in case"""
        parameters = [
            "update",
            "individual",
            "--case-id",
            case_id,
            "--ind-id",
            sample_id,
            "--alignment-path",
            str(alignment_path),
        ]
        self.process.run_command(parameters=parameters)

    def export_panels(self, panels: List[str], versions=None):
        """Pass through to export of a list of gene panels."""
        return scout_export_panels(self, panels, versions)

    def get_genes(self, panel_id: str, version: str = None) -> list:
        """Fetch panel genes.

        Args:
            panel_id (str): unique id for the panel
            version (str): version of the panel. If 'None' latest version will be returned

        Returns:
            panel genes: panel genes list
        """
        gene_panel = self.gene_panel(panel_id=panel_id, version=version)
        return gene_panel.get("genes")

    def get_cases(
        self,
        case_id=None,
        institute=None,
        reruns=None,
        finished=None,
        causatives=None,
        research_requested=None,
        is_research=None,
        status=None,
    ):
        """Interact with cases existing in the database."""

        models = []
        if case_id:
            case_obj = self.case(case_id=case_id)
            if case_obj:
                models.append(case_obj)

        else:
            models = self.cases(
                collaborator=institute,
                reruns=reruns,
                finished=finished,
                has_causatives=causatives,
                research_requested=research_requested,
                is_research=is_research,
                status=status,
            )

        return models

    def get_causative_variants(self, case_id=None, collaborator=None):
        """
        Get causative variants for a case
        """
        causative_ids = self.get_causatives(institute_id=collaborator, case_id=case_id)

        causatives = [self.variant(causative_id) for causative_id in causative_ids]

        return causatives

    def get_solved_cases(self, days_ago):
        """
            Get cases solved within chosen timespan

            Args:
                days_ago (int): Maximum days ago a case has been solved

            Return:
                cases (list): list of cases
        """

        days_datetime = dt.datetime.now() - dt.timedelta(days=days_ago)

        # Look up 'mark_causative' events added since specified number days ago
        event_query = {
            "category": "case",
            "verb": "mark_causative",
            "created_at": {"$gte": days_datetime},
        }
        recent_events = self.event_collection.find(event_query)
        solved_cases = set()

        # Find what cases these events concern
        for event in recent_events:
            solved_cases.add(event["case"])

        solved_cases = list(solved_cases)

        # Find these cases in the database
        cases = self.case_collection.find({"_id": {"$in": solved_cases}})

        return cases

    def upload_delivery_report(self, report_path: str, case_id: str, update: bool = False):
        """ Load a delivery report into a case in the database

        If the report already exists the function will exit.
        If the user want to load a report that is already in the database
        'update' has to be 'True'

        Args:
            report_path (string):       Path to delivery report
            case_id     (string):       Case identifier
            update      (bool):         If an existing report should be replaced

        Returns:
            updated_case(dict)

        """

        return load_delivery_report(
            adapter=self, case_id=case_id, report_path=report_path, update=update
        )
Example #6
0
 def __init__(self, config: dict):
     self.process = Process(config["hermes"]["binary_path"])
Example #7
0
 def __init__(self, config: dict):
     self.process = Process(binary=config["genotype"]["binary_path"],
                            config=config["genotype"]["config_path"])
     self.dry_run = False
Example #8
0
class GenotypeAPI:
    """Interface with Genotype app.

    The config should contain a 'genotype' key:

        { 'database': 'mysql://localhost:3306/database' }
    """
    def __init__(self, config: dict):
        self.process = Process(binary=config["genotype"]["binary_path"],
                               config=config["genotype"]["config_path"])
        self.dry_run = False

    def set_dry_run(self, dry_run: bool) -> None:
        """Set the dry run state"""
        self.dry_run = dry_run

    def upload(self,
               bcf_path: str,
               samples_sex: dict,
               force: bool = False) -> None:
        """Upload genotypes for a family of samples."""
        upload_parameters = ["load", str(bcf_path)]
        if force:
            upload_parameters.append("--force")

        LOG.info("loading VCF genotypes for sample(s): %s",
                 ", ".join(samples_sex.keys()))
        self.process.run_command(parameters=upload_parameters,
                                 dry_run=self.dry_run)

        for sample_id in samples_sex:
            # This is the sample sex specified by the customer
            sample_sex = samples_sex[sample_id]["pedigree"]
            self.update_sample_sex(sample_id, sample_sex)
            # This is the predicted sex based on variant calls from the pipeline
            analysis_predicted_sex = samples_sex[sample_id]["analysis"]
            self.update_analysis_sex(sample_id, sex=analysis_predicted_sex)

    def update_sample_sex(self, sample_id: str, sex: str) -> None:
        """Update the sex for a sample in the genotype tool"""
        sample_sex_parameters = ["add-sex", sample_id, "-s", sex]
        LOG.debug("Set sex for sample %s to %s", sample_id, sex)
        self.process.run_command(parameters=sample_sex_parameters,
                                 dry_run=self.dry_run)

    def update_analysis_sex(self, sample_id: str, sex: str) -> None:
        """Update the predicted sex for a sample based on genotype analysis in the genotype tool"""
        analysis_sex_parameters = ["add-sex", sample_id, "-a", "sequence", sex]
        LOG.debug(
            "Set predicted sex for sample %s to %s for the sequence analysis",
            sample_id, sex)
        self.process.run_command(parameters=analysis_sex_parameters,
                                 dry_run=self.dry_run)

    def export_sample(self, days: int = 0) -> str:
        """Export sample info."""
        export_sample_parameters = ["export-sample", "-d", str(days)]

        self.process.run_command(parameters=export_sample_parameters,
                                 dry_run=self.dry_run)
        output = self.process.stdout
        # If sample not in genotype db, stdout of genotype command will be empty.
        if not output:
            raise CaseNotFoundError("samples not found in genotype db")
        return output

    def export_sample_analysis(self, days: int = 0) -> str:
        """Export analysis."""
        export_sample_analysis_parameters = [
            "export-sample-analysis", "-d",
            str(days)
        ]

        self.process.run_command(parameters=export_sample_analysis_parameters,
                                 dry_run=self.dry_run)
        output = self.process.stdout
        # If sample not in genotype db, stdout of genotype command will be empty.
        if not output:
            raise CaseNotFoundError("samples not found in genotype db")
        return output

    def __str__(self):
        return f"GenotypeAPI(dry_run: {self.dry_run})"
Example #9
0
 def __init__(self, config: dict):
     super(VogueAPI, self).__init__()
     self.vogue_config = config["vogue"]["config_path"]
     self.vogue_binary = config["vogue"]["binary_path"]
     self.process = Process(binary=self.vogue_binary,
                            config=self.vogue_config)
Example #10
0
class VogueAPI:
    """
    API for vogue
    """
    def __init__(self, config: dict):
        super(VogueAPI, self).__init__()
        self.vogue_config = config["vogue"]["config_path"]
        self.vogue_binary = config["vogue"]["binary_path"]
        self.process = Process(binary=self.vogue_binary,
                               config=self.vogue_config)

    def load_genotype_data(self, genotype_dict: dict) -> None:
        """Load genotype data from a dict."""

        load_call = ["load", "genotype", "-s", json.dumps(genotype_dict)]
        self.process.run_command(parameters=load_call)

        # Execute command and print its stdout+stderr as it executes
        for line in self.process.stderr_lines():
            LOG.info("vogue output: %s", line)

    def load_apptags(self, apptag_list: list) -> None:
        """Add observations from a VCF."""
        load_call = ["load", "apptag", json.dumps(apptag_list)]
        self.process.run_command(parameters=load_call)

        # Execute command and print its stdout+stderr as it executes
        for line in self.process.stderr_lines():
            LOG.info("vogue output: %s", line)

    def load_samples(self, days: int) -> None:
        """Running vogue load samples."""

        load_call = ["load", "sample", "-d", str(days)]
        self.process.run_command(parameters=load_call)

        # Execute command and print its stdout+stderr as it executes
        for line in self.process.stderr_lines():
            LOG.info("vogue output: %s", line)

    def load_flowcells(self, days: int) -> None:
        """Running vogue load flowcells."""

        load_call = ["load", "flowcell", "-d", str(days)]
        self.process.run_command(parameters=load_call)

        # Execute command and print its stdout+stderr as it executes
        for line in self.process.stderr_lines():
            LOG.info("vogue output: %s", line)

    def load_reagent_labels(self, days: int) -> None:
        """Running vogue load reagent_labels."""

        load_call = ["load", "reagent_labels", "-d", str(days)]
        self.process.run_command(parameters=load_call)

        # Execute command and print its stdout+stderr as it executes
        for line in self.process.stderr_lines():
            LOG.info("vogue output: %s", line)

    def load_bioinfo_raw(self, load_bioinfo_inputs: dict) -> None:
        """Running vogue load bioinfo raw."""

        load_bioinfo_raw_call = [
            "load",
            "bioinfo",
            "raw",
            "--sample-list",
            load_bioinfo_inputs["samples"],
            "--analysis-result",
            load_bioinfo_inputs["analysis_result_file"],
            "--analysis-type",
            load_bioinfo_inputs["analysis_type"],
            "--analysis-case",
            load_bioinfo_inputs["analysis_case_name"],
            "--workflow-version",
            load_bioinfo_inputs["analysis_workflow_version"],
            "--case-analysis-type",
            load_bioinfo_inputs["case_analysis_type"],
            "--analysis-workflow",
            load_bioinfo_inputs["analysis_workflow_name"],
        ]

        self.process.run_command(parameters=load_bioinfo_raw_call)

    def load_bioinfo_process(self, load_bioinfo_inputs: dict,
                             cleanup_flag: bool) -> None:
        """Running load bioinfo process."""

        load_bioinfo_process_call = [
            "load",
            "bioinfo",
            "process",
            "--analysis-type",
            load_bioinfo_inputs["analysis_type"],
            "--analysis-case",
            load_bioinfo_inputs["analysis_case_name"],
            "--analysis-workflow",
            load_bioinfo_inputs["analysis_workflow_name"],
            "--workflow-version",
            load_bioinfo_inputs["analysis_workflow_version"],
            "--case-analysis-type",
            load_bioinfo_inputs["case_analysis_type"],
        ]

        if cleanup_flag:
            load_bioinfo_process_call.append("--cleanup")

        self.process.run_command(parameters=load_bioinfo_process_call)

    def load_bioinfo_sample(self, load_bioinfo_inputs: dict) -> None:
        """Running load bioinfo sample."""

        load_bioinfo_sample_call = [
            "load",
            "bioinfo",
            "sample",
            "--analysis-case",
            load_bioinfo_inputs["analysis_case_name"],
        ]

        self.process.run_command(parameters=load_bioinfo_sample_call)