def fixed_tempfolder_processor(self, tmpdir, num_cores): """ Create processor with known temporary folder. Provide some basic required values to the constructor, and critically, fix the temporary folder to the location into which the individual reads chunk files were ensured to exist. The processor constructor provides tempfolder parent as a parameter, but then it creates a temporary folder within that, used to search for the individual chunk output files. Those files have already been created in a test temp folder, though, so the processor needs to know about that. Cleanup of both locations is handled. The processor registers its own temporary folder for removal while pytest cleans up its folder. Parameters ---------- tmpdir : py._path.local.LocalPath Path to where chunks' dummy output files have been placed. num_cores : int Number of cores to use for the test case, parameterized. Returns ------- pararead.ParaReadProcessor New processor instance, with updated temp folder knowledge. """ path_output_file = tmpdir.join("test-output.txt").strpath processor = IdentityProcessor(PATH_ALIGNED_FILE, cores=num_cores, outfile=path_output_file) processor.temp_folder = tmpdir.strpath return processor
def test_creates_fresh_reads_file(self, path_reads_file, require_aligned, remove_reads_file): """ Reads file pysam object is created by register_files(). """ # Note that remove_reads_file is here to clear the module-scoped map. # Explicitly set by_chromosome=False to prevent it from # controlling the requirement regarding aligned reads. processor = IdentityProcessor(path_reads_file=path_reads_file, action="test", allow_unaligned=not require_aligned, by_chromosome=False) # The pysam readsfile shouldn't exist before register_files(). with pytest.raises(CommandOrderException): processor.readsfile # Now do the registration, creating the pysam readsfile instance. processor.register_files() readsfile = processor.readsfile # Check out the new readsfile. assert isinstance(readsfile, AlignmentFile) num_reads = sum(1 for _ in readsfile) assert NUM_READS_BY_FILE[path_reads_file] == num_reads
def test_different_format(self, tmpdir, filetype, combined_output_type, which_names, extant_files, num_cores): """ File content is actually combined, and formats can differ. """ # Manual creation of the processor here to control output type. path_output_file = tmpdir.join( "testfile.{}".format(combined_output_type)).strpath processor = IdentityProcessor(PATH_ALIGNED_FILE, cores=num_cores, outfile=path_output_file, intermediate_output_type=filetype) processor.temp_folder = tmpdir.strpath # Write to the dummy output file for each chunk. expected_lines = { fp: "file{}: {}\n".format(i, fp) for i, fp in enumerate(extant_files) } for fp, line in expected_lines.items(): with open(fp, 'w') as f: f.write(line) # For control, enforce that combined output doesn't already exist. assert not os.path.exists(path_output_file) processor.combine(self.CHUNK_NAMES[which_names], strict=True) assert os.path.isfile(path_output_file) # Check that output was combined accurately. with open(path_output_file, 'r') as combined: observed_lines = combined.readlines() assert set(expected_lines.values()) == set(observed_lines)
def test_nothing_to_combine(self, tmpdir, path_logs_file, num_cores, error_if_missing): """ Complete lack of output is sufficient to warrant a warning. """ # Create the processor and do combine() step. path_output_file = tmpdir.join("output.txt").strpath processor = IdentityProcessor(PATH_ALIGNED_FILE, cores=num_cores, outfile=path_output_file) num_logs_before_combine = len(loglines(path_logs_file)) processor.combine(good_chromosomes=[], strict=error_if_missing) log_records = loglines(path_logs_file) # The log record should be a warning, and there's only one. assert 1 == len(log_records) - num_logs_before_combine assert "WARN" in log_records[num_logs_before_combine]
def identity_processor(request, num_cores, tmpdir): """ Provide a basic processor for a fast test of some behavior. Parameters ---------- request : pytest.fixtures.SubRequest Test case requesting the fixture parameterization. Returns ------- pararead.ParaReadProcessor A very basic processor, returning elements with no or very trivial modification(s) for speed. """ if IS_ALIGNED_PARAM_NAME in request.fixturenames \ and not request.getfixturevalue(IS_ALIGNED_PARAM_NAME): path_reads_file = PATH_UNALIGNED_FILE else: path_reads_file = PATH_ALIGNED_FILE path_output_file = tmpdir.join("placeholder-testfile.txt").strpath return IdentityProcessor(path_reads_file, cores=num_cores, outfile=path_output_file)
def test_adds_pysam_kwargs(self, require_aligned, pysam_kwargs, remove_reads_file): """ Unaligned input BAM needs check_sq=False to be created. """ # Note that remove_reads_file is here to clear the module-scoped map. # Explicitly set by_chromosome=False to prevent it from # controlling the requirement regarding aligned reads. processor = IdentityProcessor(path_reads_file=PATH_UNALIGNED_FILE, action="test", allow_unaligned=not require_aligned, by_chromosome=False) if require_aligned: exp_error = MissingHeaderException if pysam_kwargs else ValueError with pytest.raises(exp_error): processor.register_files(**pysam_kwargs) else: # No exception --> pass (file registration is just for effect.) processor.register_files(**pysam_kwargs)
def test_requires_outfile_or_action(self, filepath): """ Explicit output file or action name to derive one is needed. """ with pytest.raises(ValueError): IdentityProcessor(filepath)