def test_fastq_to_spring_sbatch( crunchy_config_dict: dict, compression_object: CompressionData, sbatch_process: Process, sbatch_job_number: int, caplog, ): """Test fastq_to_spring method""" caplog.set_level(logging.DEBUG) # GIVEN a crunchy-api, and FASTQ paths crunchy_api = CrunchyAPI(crunchy_config_dict) crunchy_api.slurm_api.process = sbatch_process spring_path: Path = compression_object.spring_path log_path: Path = get_log_dir(spring_path) run_name: str = compression_object.run_name sbatch_path: Path = get_fastq_to_spring_sbatch_path(log_dir=log_path, run_name=run_name) # GIVEN that the sbatch file does not exist assert not sbatch_path.is_file() # GIVEN that the pending path does not exist assert compression_object.pending_exists() is False # WHEN calling fastq_to_spring on FASTQ files job_number: int = crunchy_api.fastq_to_spring(compression_obj=compression_object) # THEN assert that the sbatch file was created assert sbatch_path.is_file() # THEN assert that correct job number was returned assert job_number == sbatch_job_number # THEN assert that the pending path was created assert compression_object.pending_exists() is True
def test_spring_to_fastq( compression_object: CompressionData, spring_metadata_file: Path, crunchy_config_dict: dict, mocker, ): """Test SPRING to FASTQ method Test to decompress SPRING to FASTQ. This test will make sure that the correct sbatch content was submitted to the Process api """ # GIVEN a crunchy-api given an existing SPRING metadata file assert spring_metadata_file.exists() mocker_submit_sbatch = mocker.patch.object(SlurmAPI, "submit_sbatch") crunchy_api = CrunchyAPI(crunchy_config_dict) # GIVEN that the pending path does not exist assert compression_object.pending_exists() is False # WHEN calling bam_to_cram method on bam-path crunchy_api.spring_to_fastq(compression_obj=compression_object) # THEN _submit_sbatch method is called mocker_submit_sbatch.assert_called() # THEN assert that the pending path was created assert compression_object.pending_exists() is True
def fixture_compression_object( fastq_stub: Path, original_fastq_data: CompressionData) -> CompressionData: """Creates compression data object with information about files used in fastq compression""" working_files = CompressionData(fastq_stub) shutil.copy(str(original_fastq_data.fastq_first), str(working_files.fastq_first)) shutil.copy(str(original_fastq_data.fastq_second), str(working_files.fastq_second)) return working_files
def test_get_run_name(): """Test that the correct run name is returned""" # GIVEN a file path that ends with a run name file_path = Path("/path/to/dir") run_name = "a_run" # GIVEN a compression data object compression_obj = CompressionData(file_path / run_name) # WHEN fetching the run name # THEN assert the correct run name is returned assert compression_obj.run_name == run_name
def get_spring_paths(version_obj: hk_models.Version) -> List[CompressionData]: """Get all SPRING paths for a sample""" hk_files_dict = get_hk_files_dict(tags=["spring"], version_obj=version_obj) spring_paths = [] if hk_files_dict is None: return spring_paths for file_path in hk_files_dict: if file_path.suffix == ".spring": spring_paths.append(CompressionData(file_path.with_suffix(""))) return spring_paths
def get_compression_data(fastq_files: List[Path]) -> List[CompressionData]: """Return a list of compression data objects Each object has information about a pair of FASTQ files from the same run """ fastq_runs = set() compression_objects = [] for fastq_file in fastq_files: # file prefix is the run name identifier file_prefix = get_fastq_stub(fastq_file) if file_prefix is None: LOG.info("Invalid FASTQ name %s", fastq_file) continue run_name = str(file_prefix) if run_name not in fastq_runs: fastq_runs.add(run_name) compression_objects.append(CompressionData(file_prefix)) return compression_objects
def check_fastqs(compression_obj: CompressionData) -> bool: """Check if FASTQ files has the correct status More specific this means to check - Did we get the full path of the file? - Does the file exist? - Do we have permissions? - Is the file actually a symlink? - Is the file hardlinked? - Is the file older than the specified time delta? """ if not (compression_obj.is_absolute(compression_obj.fastq_first) or compression_obj.is_absolute(compression_obj.fastq_second)): return False if not compression_obj.pair_exists(): return False # Check if file is hardlinked multiple times if (compression_obj.get_nlinks(compression_obj.fastq_first) > 1 or compression_obj.get_nlinks(compression_obj.fastq_second) > 1): LOG.info("More than 1 inode to same file for %s", compression_obj.run_name) return False # Check if the FASTQ file is a symlinc (soft link) if compression_obj.is_symlink( compression_obj.fastq_first) or compression_obj.is_symlink( compression_obj.fastq_second): LOG.info("Run %s has symbolic link, skipping run", compression_obj.run_name) return False date_changed = compression_obj.get_change_date(compression_obj.fastq_first) today = datetime.datetime.now() # Check if date is older than FASTQ_DELTA if date_changed > today - FASTQ_DATETIME_DELTA: LOG.info("FASTQ files are not old enough") return False return True
def fixture_original_fastq_data(fastq_dir: Path, run_name) -> CompressionData: """Return a compression object with a path to the original fastq files""" return CompressionData(fastq_dir / run_name)