def generate_deliverables_file(context, dry, config_path, case_id): """Generate a deliverables file for the case_id.""" conda_env = context.obj["balsamic"]["conda_env"] root_dir = Path(context.obj["balsamic"]["root"]) case_obj = context.obj["store_api"].family(case_id) analysis_api = context.obj["analysis_api"] if not case_obj: raise CgError(f"Case {case_id} not found") if not config_path: config_path = analysis_api.get_config_path(root_dir, case_id) if not config_path.is_file(): raise FileNotFoundError( f"Missing the sample-config file for {case_id}: {config_path}") command_str = f" report deliver" f" --sample-config {config_path}'" command = [f"bash -c 'source activate {conda_env}; balsamic"] command.extend(command_str.split(" ")) if dry: click.echo(" ".join(command)) return SUCCESS process = subprocess.run(" ".join(command), shell=True) if process == SUCCESS: LOG.info("Created deliverables file") return process
def analysis(context, case_id, deliverables_file_path, config_path): """Store a finished analysis in Housekeeper.""" status = context.obj["store_api"] case_obj = status.family(case_id) root_dir = Path(context.obj["balsamic"]["root"]) analysis_api = context.obj["analysis_api"] if not case_obj: raise CgError(f"Case {case_id} not found") if not deliverables_file_path: deliverables_file_path = analysis_api.get_deliverables_file_path( case_id, root_dir) if not os.path.isfile(deliverables_file_path): context.invoke(generate_deliverables_file, case_id=case_id) if not config_path: config_path = get_config_path(root_dir, case_id) hk_api = context.obj["hk_api"] try: new_analysis = gather_files_and_bundle_in_housekeeper( config_path, deliverables_file_path, hk_api, status, case_obj) except Exception: hk_api.rollback() status.rollback() raise StoreError(sys.exc_info()[0]) status.add_commit(new_analysis) LOG.info("Included files in Housekeeper")
def check_username(self) -> None: if self._dry_run: return if getpass.getuser() == self.config.fohm.valid_uploader: return raise CgError( f"Cannot upload to FOHM as {getpass.getuser()}, please log in as {self.config.fohm.valid_uploader}" )
def resolve_panel_bed(self, panel_bed: Optional[str]) -> Optional[str]: if panel_bed: if panel_bed.endswith(".bed"): return panel_bed bed_version = self.status_db.bed_version(panel_bed) if not bed_version: raise CgError("Please provide a valid panel shortname or a path to panel.bed file!") return bed_version.filename
def get_inbox_path(self, ticket_id: int) -> Path: cases: List[models.Family] = self.get_all_cases_from_ticket(ticket_id=ticket_id) if not cases: raise CgError( f"The customer id was not identified since no cases for ticket_id {ticket_id} was found" ) customer_id: str = cases[0].customer.internal_id return self.delivery_path / customer_id / "inbox" / str(ticket_id)
def get_source_and_destination_paths(self, ticket_id: int) -> Dict[str, Path]: cases: List[models.Family] = self.get_all_cases_from_ticket( ticket_id=ticket_id) source_and_destination_paths = {} if not cases: LOG.warning("Could not find any cases for ticket_id %s", ticket_id) raise CgError() customer_id: str = cases[0].customer.internal_id source_and_destination_paths["delivery_source_path"]: Path = Path( self.delivery_path, customer_id, "inbox", str(ticket_id)) source_and_destination_paths["rsync_destination_path"]: Path = Path( self.destination_path, customer_id, "inbox") return source_and_destination_paths
def concatenate(self, ticket_id: int, dry_run: bool) -> None: customer_inbox: Path = self.get_inbox_path(ticket_id=ticket_id) date: datetime.datetime = self.generate_date_tag(ticket_id=ticket_id) if not customer_inbox.exists() and dry_run: LOG.info("Dry run, nothing will be concatenated in: %s", customer_inbox) return if not customer_inbox.exists(): LOG.info("The path %s do not exist, nothing will be concatenated", customer_inbox) return for dir_path in customer_inbox.iterdir(): if len(os.listdir(dir_path)) == 0: LOG.info("Empty folder found: %s", dir_path) continue if not dir_path.is_dir(): continue for read_direction in [1, 2]: same_direction: List[Path] = self.get_current_read_direction( dir_path=dir_path, read_direction=read_direction ) total_size: int = self.get_total_size(files=same_direction) output: Path = self.generate_output_filename( date=date, dir_path=dir_path, read_direction=read_direction ) if dry_run: for file in same_direction: LOG.info( "Dry run activated, %s will not be appended to %s" % (file, output) ) else: LOG.info("Concatenating sample: %s", dir_path.name) self.concatenate_same_read_direction(reads=same_direction, output=output) if dry_run: continue concatenated_size = output.stat().st_size if total_size != concatenated_size: raise CgError("WARNING data lost in concatenation") LOG.info( "QC PASSED: Total size for files used in concatenation match the size of the concatenated file" ) self.remove_files(reads=same_direction)
def get_folders_to_deliver(self, case_id: str, sample_files_present: bool, case_files_present: bool) -> List[str]: """Returns a list of all the folder names depending if sample and/or case data is to be transferred""" if not sample_files_present and not case_files_present: LOG.error( "Since no file parameter is true, no files will be transferred" ) raise CgError( "Since no file parameter is true, no files will be transferred" ) folder_list: List[str] = [] if sample_files_present: folder_list.extend([ sample.name for sample in self.status_db.get_samples_by_family_id( family_id=case_id) ]) if case_files_present: folder_list.append(self.status_db.family(case_id).name) return folder_list
def import_apptags( store: Store, excel_path: str, prep_category: str, sign: str, sheet_name: str, tag_column: int, activate: bool, inactivate: bool, ): """Syncs all applications from the specified excel file""" orderform_application_tags = [] for raw_row in XlFileHelper.get_raw_cells_from_xl(excel_path, sheet_name, tag_column): tag = _get_tag_from_column(raw_row, tag_column) logging.info("Found: %s in orderform", tag) orderform_application_tags.append(tag) if not orderform_application_tags: message = "No applications found in column %s (zero-based), exiting" % tag_column raise CgError(message) for orderform_application_tag in orderform_application_tags: application_obj = store.application(tag=orderform_application_tag) if not application_obj: message = "Application %s was not found" % orderform_application_tag raise CgError(message) if application_obj.prep_category != prep_category: message = "%s prep_category, expected: %s was: %s" % ( orderform_application_tag, prep_category, application_obj.prep_category, ) raise CgError(message) if application_obj.is_archived: if activate: application_obj.comment = ( f"{application_obj.comment}\n{str(datetime.now())[:-10]}" f"Application un-archived by {sign}") application_obj.is_archived = False logging.info("Un-archiving %s", application_obj) else: logging.warning( "%s is marked as archived but is used in the orderform, consider " "activating it", application_obj, ) else: logging.info("%s is already active, no need to activate it", application_obj) all_active_apps_for_category = store.applications(category=prep_category, archived=False) for active_application in all_active_apps_for_category: if active_application.tag not in orderform_application_tags: if inactivate: active_application.is_archived = True active_application.comment = ( f"{active_application.comment}" f"\n{str(datetime.now())[:-10]} " f"Application archived by {sign}") logging.info("Archiving %s", active_application) else: logging.warning( "%s is marked as active but is not used in the orderform, " "consider archiving it", active_application, ) else: logging.info( "%s was found in orderform tags, no need to archive it", active_application) if not activate and not inactivate: logging.info("no change mode requested, rolling back transaction") store.rollback() else: logging.info( "all applications successfully synced, committing transaction") store.commit()
class RsyncAPI(MetaAPI): def __init__(self, config: CGConfig): super().__init__(config) self.delivery_path: str = config.delivery_path self.destination_path: str = config.data_delivery.destination_path self.covid_destination_path: str = config.data_delivery.covid_destination_path self.covid_report_path: str = config.data_delivery.covid_report_path self.base_path: Path = Path(config.data_delivery.base_path) self.account: str = config.data_delivery.account self.log_dir: Path = Path(config.data_delivery.base_path) self.mail_user: str = config.data_delivery.mail_user self.slurm_quality_of_service: str = SLURM_ACCOUNT_TO_QOS[ self.account] or SlurmQos.LOW self.pipeline: str = Pipeline.RSYNC @property def trailblazer_config_path(self) -> Path: """Return Path to trailblazer config""" return self.log_dir / "slurm_job_ids.yaml" @property def rsync_processes(self) -> Iterable[Path]: """Yield existing rsync processes""" yield from self.base_path.iterdir() @staticmethod def format_covid_destination_path(covid_destination_path: str, customer_id: str) -> str: """Return destination path of covid report""" return covid_destination_path % customer_id @staticmethod def get_trailblazer_config(slurm_job_id: int) -> Dict[str, List[str]]: """Return dictionary of slurm job IDs""" return {"jobs": [str(slurm_job_id)]} @staticmethod def write_trailblazer_config(content: dict, config_path: Path) -> None: """Write slurm job IDs to a .YAML file used as the trailblazer config""" LOG.info(f"Writing slurm jobs to {config_path.as_posix()}") with config_path.open("w") as yaml_file: yaml.safe_dump(content, yaml_file, indent=4, explicit_start=True) @staticmethod def process_ready_to_clean(before: dt.datetime, process: Path) -> bool: """Return True if analysis is old enough to be cleaned""" ctime: dt.datetime = dt.datetime.fromtimestamp(process.stat().st_ctime) return before > ctime @staticmethod def concatenate_rsync_commands(folder_list: List[str], source_and_destination_paths: Dict[str, Path], ticket_id: int) -> str: """Concatenates the rsync commands for each folder to be transferred""" commands = "" for folder in folder_list: source_path: Path = source_and_destination_paths[ "delivery_source_path"] / folder destination_path: Path = source_and_destination_paths[ "rsync_destination_path"] / str(ticket_id) commands += RSYNC_COMMAND.format(source_path=source_path, destination_path=destination_path) return commands def set_log_dir(self, folder_prefix: str) -> None: if self.log_dir.as_posix() == self.base_path.as_posix(): timestamp = dt.datetime.now() timestamp_str = timestamp.strftime("%y%m%d_%H_%M_%S_%f") folder_name = Path("_".join([folder_prefix, timestamp_str])) LOG.info(f"Setting log dir to: {self.base_path / folder_name}") self.log_dir: Path = self.base_path / folder_name def get_all_cases_from_ticket(self, ticket_id: int) -> List[models.Family]: cases: List[models.Family] = self.status_db.get_cases_from_ticket( ticket_id=ticket_id).all() return cases def get_source_and_destination_paths(self, ticket_id: int) -> Dict[str, Path]: cases: List[models.Family] = self.get_all_cases_from_ticket( ticket_id=ticket_id) source_and_destination_paths = {} if not cases: LOG.warning("Could not find any cases for ticket_id %s", ticket_id) raise CgError() customer_id: str = cases[0].customer.internal_id source_and_destination_paths["delivery_source_path"]: Path = Path( self.delivery_path, customer_id, "inbox", str(ticket_id)) source_and_destination_paths["rsync_destination_path"]: Path = Path( self.destination_path, customer_id, "inbox") return source_and_destination_paths def add_to_trailblazer_api(self, tb_api: TrailblazerAPI, slurm_job_id: int, ticket_id: int, dry_run: bool) -> None: """Add rsync process to trailblazer""" if dry_run: return self.write_trailblazer_config( content=self.get_trailblazer_config(slurm_job_id), config_path=self.trailblazer_config_path, ) tb_api.add_pending_analysis( case_id=str(ticket_id), analysis_type="other", config_path=self.trailblazer_config_path.as_posix(), out_dir=self.log_dir.as_posix(), slurm_quality_of_service=self.slurm_quality_of_service, email=self.mail_user, data_analysis=Pipeline.RSYNC, ) def format_covid_report_path(self, case: models.Family, ticket_id: int) -> str: """Return a formatted of covid report path""" covid_report_options: List[str] = glob.glob( self.covid_report_path % (str(case.internal_id), ticket_id)) if not covid_report_options: LOG.error( f"No report file could be found with path" f" {self.covid_report_path % (str(case.internal_id), ticket_id)}!" ) raise CgError() return covid_report_options[0]
def verify_case_path_exists(self, case_id: str) -> None: if not self.get_case_path(case_id=case_id).exists(): LOG.error("Working directory path for %s does not exist", case_id) raise CgError()
def check_analysis_ongoing(self, case_id: str) -> None: if self.trailblazer_api.is_latest_analysis_ongoing(case_id=case_id): LOG.warning(f"{case_id} : analysis is still ongoing - skipping") raise CgError( f"Analysis still ongoing in Trailblazer for case {case_id}")
def verify_analysis_finish_file_exists(self, case_id: str): if not Path(self.get_analysis_finish_path(case_id=case_id)).exists(): raise CgError(f"No analysis_finish file found for case {case_id}")
def verify_case_config_file_exists(self, case_id: str): if not Path(self.get_case_config_path(case_id=case_id)).exists(): raise CgError(f"No config file found for case {case_id}")
def verify_deliverables_file_exists(self, case_id: str) -> None: if not Path(self.get_deliverables_file_path(case_id=case_id)).exists(): raise CgError(f"No deliverables file found for case {case_id}")