Ejemplo n.º 1
0
def generate_deliverables_file(context, dry, config_path, case_id):
    """Generate a deliverables file for the case_id."""

    conda_env = context.obj["balsamic"]["conda_env"]
    root_dir = Path(context.obj["balsamic"]["root"])
    case_obj = context.obj["store_api"].family(case_id)
    analysis_api = context.obj["analysis_api"]

    if not case_obj:
        raise CgError(f"Case {case_id} not found")

    if not config_path:
        config_path = analysis_api.get_config_path(root_dir, case_id)
        if not config_path.is_file():
            raise FileNotFoundError(
                f"Missing the sample-config file for {case_id}: {config_path}")

    command_str = f" report deliver" f" --sample-config {config_path}'"
    command = [f"bash -c 'source activate {conda_env}; balsamic"]
    command.extend(command_str.split(" "))

    if dry:
        click.echo(" ".join(command))
        return SUCCESS

    process = subprocess.run(" ".join(command), shell=True)

    if process == SUCCESS:
        LOG.info("Created deliverables file")

    return process
Ejemplo n.º 2
0
def analysis(context, case_id, deliverables_file_path, config_path):
    """Store a finished analysis in Housekeeper."""

    status = context.obj["store_api"]
    case_obj = status.family(case_id)
    root_dir = Path(context.obj["balsamic"]["root"])
    analysis_api = context.obj["analysis_api"]

    if not case_obj:
        raise CgError(f"Case {case_id} not found")

    if not deliverables_file_path:
        deliverables_file_path = analysis_api.get_deliverables_file_path(
            case_id, root_dir)
        if not os.path.isfile(deliverables_file_path):
            context.invoke(generate_deliverables_file, case_id=case_id)

    if not config_path:
        config_path = get_config_path(root_dir, case_id)

    hk_api = context.obj["hk_api"]

    try:
        new_analysis = gather_files_and_bundle_in_housekeeper(
            config_path, deliverables_file_path, hk_api, status, case_obj)
    except Exception:
        hk_api.rollback()
        status.rollback()
        raise StoreError(sys.exc_info()[0])

    status.add_commit(new_analysis)
    LOG.info("Included files in Housekeeper")
Ejemplo n.º 3
0
 def check_username(self) -> None:
     if self._dry_run:
         return
     if getpass.getuser() == self.config.fohm.valid_uploader:
         return
     raise CgError(
         f"Cannot upload to FOHM as {getpass.getuser()}, please log in as {self.config.fohm.valid_uploader}"
     )
Ejemplo n.º 4
0
 def resolve_panel_bed(self, panel_bed: Optional[str]) -> Optional[str]:
     if panel_bed:
         if panel_bed.endswith(".bed"):
             return panel_bed
         bed_version = self.status_db.bed_version(panel_bed)
         if not bed_version:
             raise CgError("Please provide a valid panel shortname or a path to panel.bed file!")
         return bed_version.filename
Ejemplo n.º 5
0
 def get_inbox_path(self, ticket_id: int) -> Path:
     cases: List[models.Family] = self.get_all_cases_from_ticket(ticket_id=ticket_id)
     if not cases:
         raise CgError(
             f"The customer id was not identified since no cases for ticket_id {ticket_id} was found"
         )
     customer_id: str = cases[0].customer.internal_id
     return self.delivery_path / customer_id / "inbox" / str(ticket_id)
Ejemplo n.º 6
0
 def get_source_and_destination_paths(self,
                                      ticket_id: int) -> Dict[str, Path]:
     cases: List[models.Family] = self.get_all_cases_from_ticket(
         ticket_id=ticket_id)
     source_and_destination_paths = {}
     if not cases:
         LOG.warning("Could not find any cases for ticket_id %s", ticket_id)
         raise CgError()
     customer_id: str = cases[0].customer.internal_id
     source_and_destination_paths["delivery_source_path"]: Path = Path(
         self.delivery_path, customer_id, "inbox", str(ticket_id))
     source_and_destination_paths["rsync_destination_path"]: Path = Path(
         self.destination_path, customer_id, "inbox")
     return source_and_destination_paths
Ejemplo n.º 7
0
    def concatenate(self, ticket_id: int, dry_run: bool) -> None:
        customer_inbox: Path = self.get_inbox_path(ticket_id=ticket_id)
        date: datetime.datetime = self.generate_date_tag(ticket_id=ticket_id)
        if not customer_inbox.exists() and dry_run:
            LOG.info("Dry run, nothing will be concatenated in: %s", customer_inbox)
            return
        if not customer_inbox.exists():
            LOG.info("The path %s do not exist, nothing will be concatenated", customer_inbox)
            return
        for dir_path in customer_inbox.iterdir():
            if len(os.listdir(dir_path)) == 0:
                LOG.info("Empty folder found: %s", dir_path)
                continue
            if not dir_path.is_dir():
                continue
            for read_direction in [1, 2]:
                same_direction: List[Path] = self.get_current_read_direction(
                    dir_path=dir_path, read_direction=read_direction
                )
                total_size: int = self.get_total_size(files=same_direction)
                output: Path = self.generate_output_filename(
                    date=date, dir_path=dir_path, read_direction=read_direction
                )
                if dry_run:
                    for file in same_direction:
                        LOG.info(
                            "Dry run activated, %s will not be appended to %s" % (file, output)
                        )
                else:
                    LOG.info("Concatenating sample: %s", dir_path.name)
                    self.concatenate_same_read_direction(reads=same_direction, output=output)
                if dry_run:
                    continue
                concatenated_size = output.stat().st_size
                if total_size != concatenated_size:
                    raise CgError("WARNING data lost in concatenation")

                LOG.info(
                    "QC PASSED: Total size for files used in concatenation match the size of the concatenated file"
                )
                self.remove_files(reads=same_direction)
Ejemplo n.º 8
0
 def get_folders_to_deliver(self, case_id: str, sample_files_present: bool,
                            case_files_present: bool) -> List[str]:
     """Returns a list of all the folder names depending if sample and/or case data is to be
     transferred"""
     if not sample_files_present and not case_files_present:
         LOG.error(
             "Since no file parameter is true, no files will be transferred"
         )
         raise CgError(
             "Since no file parameter is true, no files will be transferred"
         )
     folder_list: List[str] = []
     if sample_files_present:
         folder_list.extend([
             sample.name
             for sample in self.status_db.get_samples_by_family_id(
                 family_id=case_id)
         ])
     if case_files_present:
         folder_list.append(self.status_db.family(case_id).name)
     return folder_list
Ejemplo n.º 9
0
def import_apptags(
    store: Store,
    excel_path: str,
    prep_category: str,
    sign: str,
    sheet_name: str,
    tag_column: int,
    activate: bool,
    inactivate: bool,
):
    """Syncs all applications from the specified excel file"""

    orderform_application_tags = []

    for raw_row in XlFileHelper.get_raw_cells_from_xl(excel_path, sheet_name,
                                                      tag_column):
        tag = _get_tag_from_column(raw_row, tag_column)
        logging.info("Found: %s in orderform", tag)
        orderform_application_tags.append(tag)

    if not orderform_application_tags:
        message = "No applications found in column %s (zero-based), exiting" % tag_column
        raise CgError(message)

    for orderform_application_tag in orderform_application_tags:
        application_obj = store.application(tag=orderform_application_tag)

        if not application_obj:
            message = "Application %s was not found" % orderform_application_tag
            raise CgError(message)

        if application_obj.prep_category != prep_category:
            message = "%s prep_category, expected: %s was: %s" % (
                orderform_application_tag,
                prep_category,
                application_obj.prep_category,
            )
            raise CgError(message)

        if application_obj.is_archived:
            if activate:
                application_obj.comment = (
                    f"{application_obj.comment}\n{str(datetime.now())[:-10]}"
                    f"Application un-archived by {sign}")
                application_obj.is_archived = False
                logging.info("Un-archiving %s", application_obj)
            else:
                logging.warning(
                    "%s is marked as archived but is used in the orderform, consider "
                    "activating it",
                    application_obj,
                )
        else:
            logging.info("%s is already active, no need to activate it",
                         application_obj)

    all_active_apps_for_category = store.applications(category=prep_category,
                                                      archived=False)

    for active_application in all_active_apps_for_category:
        if active_application.tag not in orderform_application_tags:
            if inactivate:
                active_application.is_archived = True
                active_application.comment = (
                    f"{active_application.comment}"
                    f"\n{str(datetime.now())[:-10]} "
                    f"Application archived by {sign}")
                logging.info("Archiving %s", active_application)
            else:
                logging.warning(
                    "%s is marked as active but is not used in the orderform, "
                    "consider archiving it",
                    active_application,
                )
        else:
            logging.info(
                "%s was found in orderform tags, no need to archive it",
                active_application)

    if not activate and not inactivate:
        logging.info("no change mode requested, rolling back transaction")
        store.rollback()
    else:
        logging.info(
            "all applications successfully synced, committing transaction")
        store.commit()
Ejemplo n.º 10
0
class RsyncAPI(MetaAPI):
    def __init__(self, config: CGConfig):
        super().__init__(config)
        self.delivery_path: str = config.delivery_path
        self.destination_path: str = config.data_delivery.destination_path
        self.covid_destination_path: str = config.data_delivery.covid_destination_path
        self.covid_report_path: str = config.data_delivery.covid_report_path
        self.base_path: Path = Path(config.data_delivery.base_path)
        self.account: str = config.data_delivery.account
        self.log_dir: Path = Path(config.data_delivery.base_path)
        self.mail_user: str = config.data_delivery.mail_user
        self.slurm_quality_of_service: str = SLURM_ACCOUNT_TO_QOS[
            self.account] or SlurmQos.LOW
        self.pipeline: str = Pipeline.RSYNC

    @property
    def trailblazer_config_path(self) -> Path:
        """Return Path to trailblazer config"""
        return self.log_dir / "slurm_job_ids.yaml"

    @property
    def rsync_processes(self) -> Iterable[Path]:
        """Yield existing rsync processes"""
        yield from self.base_path.iterdir()

    @staticmethod
    def format_covid_destination_path(covid_destination_path: str,
                                      customer_id: str) -> str:
        """Return destination path of covid report"""
        return covid_destination_path % customer_id

    @staticmethod
    def get_trailblazer_config(slurm_job_id: int) -> Dict[str, List[str]]:
        """Return dictionary of slurm job IDs"""
        return {"jobs": [str(slurm_job_id)]}

    @staticmethod
    def write_trailblazer_config(content: dict, config_path: Path) -> None:
        """Write slurm job IDs to a .YAML file used as the trailblazer config"""
        LOG.info(f"Writing slurm jobs to {config_path.as_posix()}")
        with config_path.open("w") as yaml_file:
            yaml.safe_dump(content, yaml_file, indent=4, explicit_start=True)

    @staticmethod
    def process_ready_to_clean(before: dt.datetime, process: Path) -> bool:
        """Return True if analysis is old enough to be cleaned"""

        ctime: dt.datetime = dt.datetime.fromtimestamp(process.stat().st_ctime)

        return before > ctime

    @staticmethod
    def concatenate_rsync_commands(folder_list: List[str],
                                   source_and_destination_paths: Dict[str,
                                                                      Path],
                                   ticket_id: int) -> str:
        """Concatenates the rsync commands for each folder to be transferred"""
        commands = ""
        for folder in folder_list:
            source_path: Path = source_and_destination_paths[
                "delivery_source_path"] / folder
            destination_path: Path = source_and_destination_paths[
                "rsync_destination_path"] / str(ticket_id)
            commands += RSYNC_COMMAND.format(source_path=source_path,
                                             destination_path=destination_path)
        return commands

    def set_log_dir(self, folder_prefix: str) -> None:
        if self.log_dir.as_posix() == self.base_path.as_posix():
            timestamp = dt.datetime.now()
            timestamp_str = timestamp.strftime("%y%m%d_%H_%M_%S_%f")
            folder_name = Path("_".join([folder_prefix, timestamp_str]))
            LOG.info(f"Setting log dir to: {self.base_path / folder_name}")
            self.log_dir: Path = self.base_path / folder_name

    def get_all_cases_from_ticket(self, ticket_id: int) -> List[models.Family]:
        cases: List[models.Family] = self.status_db.get_cases_from_ticket(
            ticket_id=ticket_id).all()
        return cases

    def get_source_and_destination_paths(self,
                                         ticket_id: int) -> Dict[str, Path]:
        cases: List[models.Family] = self.get_all_cases_from_ticket(
            ticket_id=ticket_id)
        source_and_destination_paths = {}
        if not cases:
            LOG.warning("Could not find any cases for ticket_id %s", ticket_id)
            raise CgError()
        customer_id: str = cases[0].customer.internal_id
        source_and_destination_paths["delivery_source_path"]: Path = Path(
            self.delivery_path, customer_id, "inbox", str(ticket_id))
        source_and_destination_paths["rsync_destination_path"]: Path = Path(
            self.destination_path, customer_id, "inbox")
        return source_and_destination_paths

    def add_to_trailblazer_api(self, tb_api: TrailblazerAPI, slurm_job_id: int,
                               ticket_id: int, dry_run: bool) -> None:
        """Add rsync process to trailblazer"""
        if dry_run:
            return
        self.write_trailblazer_config(
            content=self.get_trailblazer_config(slurm_job_id),
            config_path=self.trailblazer_config_path,
        )
        tb_api.add_pending_analysis(
            case_id=str(ticket_id),
            analysis_type="other",
            config_path=self.trailblazer_config_path.as_posix(),
            out_dir=self.log_dir.as_posix(),
            slurm_quality_of_service=self.slurm_quality_of_service,
            email=self.mail_user,
            data_analysis=Pipeline.RSYNC,
        )

    def format_covid_report_path(self, case: models.Family,
                                 ticket_id: int) -> str:
        """Return a formatted of covid report path"""
        covid_report_options: List[str] = glob.glob(
            self.covid_report_path % (str(case.internal_id), ticket_id))
        if not covid_report_options:
            LOG.error(
                f"No report file could be found with path"
                f" {self.covid_report_path % (str(case.internal_id), ticket_id)}!"
            )
            raise CgError()
        return covid_report_options[0]
Ejemplo n.º 11
0
 def verify_case_path_exists(self, case_id: str) -> None:
     if not self.get_case_path(case_id=case_id).exists():
         LOG.error("Working directory path for %s does not exist", case_id)
         raise CgError()
Ejemplo n.º 12
0
 def check_analysis_ongoing(self, case_id: str) -> None:
     if self.trailblazer_api.is_latest_analysis_ongoing(case_id=case_id):
         LOG.warning(f"{case_id} : analysis is still ongoing - skipping")
         raise CgError(
             f"Analysis still ongoing in Trailblazer for case {case_id}")
Ejemplo n.º 13
0
 def verify_analysis_finish_file_exists(self, case_id: str):
     if not Path(self.get_analysis_finish_path(case_id=case_id)).exists():
         raise CgError(f"No analysis_finish file found for case {case_id}")
Ejemplo n.º 14
0
 def verify_case_config_file_exists(self, case_id: str):
     if not Path(self.get_case_config_path(case_id=case_id)).exists():
         raise CgError(f"No config file found for case {case_id}")
Ejemplo n.º 15
0
 def verify_deliverables_file_exists(self, case_id: str) -> None:
     if not Path(self.get_deliverables_file_path(case_id=case_id)).exists():
         raise CgError(f"No deliverables file found for case {case_id}")