def test_checkdir_with_str(temporary_folder): target_directory = temporary_folder / "target" # Make sure the target doesn't already exist assert not target_directory.exists() target_directory_string = str(target_directory) utilities.checkdir(target_directory_string) assert target_directory.exists()
def run(self, output_folder: Path, *reads) -> programio.FastQCOutput: utilities.checkdir(output_folder) command = self.get_command(output_folder, reads) output = self.get_output(output_folder, reads) if not output.exists(): systemio.command_runner.run(command, output_folder, srun=False) return output
def sample_variant_calling(reference: Path, samples: List[sampleio.SampleReads], project_folder: Path, ispop: bool = False): """ Performs simple variant calling between the supplied reference and the given samples. Parameters ---------- reference: Path The sample to use for the project samples: List[sampleio.SampleReads Contsins the source reads for variant calling. Trimming is not performmed at this stage. project_folder: Path The folder to use for the overall project. ispop: bool; default False Whether to run variant calling as populations or clones. """ # First validate the input parameters cancel = not utilities.verify_file_exists(reference) cancel = cancel or not sampleio.verify_samples(samples) if cancel: message = "Something went wrong when validating the variant calling parameters!" raise ValueError(message) # Set up the environment utilities.checkdir(project_folder) systemio.command_runner.set_command_log(project_folder / "commandlog_variant_calling.sh") systemio.command_runner.write_command_to_commandlog( ['module', 'load', 'breseq']) breseq_workflow = breseq.Breseq(reference, threads=16, population=ispop) breseq_workflow.test() results = list() for index, sample in enumerate(samples): logger.info( f"Running variant calling on sample {index} of {len(samples)}: {sample.name}" ) sample_folder = utilities.checkdir(project_folder / sample.name) breseq_folder = sample_folder / "breseq" result = breseq_workflow.run(breseq_folder, sample.forward, sample.reverse) results.append(result) return results
def main_shelly(): logger.info(" Running the large dataset of samples.") project_folder = Path.home() / "projects" / "shelly" project_output_folder = utilities.checkdir(project_folder / "output") table_filename = project_folder / "samples.tsv" reference = project_folder / "T4.gbff" samples = sampleio.get_samples_from_table(table_filename) # Trim the reads trimmed_output = trim(samples, project_output_folder) trimmed_samples = [i.as_sample() for i in trimmed_output] # Call variants sample_variant_calling(reference, trimmed_samples, project_output_folder, ispop=True)
def process_reference( self, reference: Union[str, Path, sampleio.SampleReads, programio.TrimmomaticOutput] ) -> Path: # If the reference already points to an assembly, return it without modification. if isinstance(reference, Path) and reference.is_file(): return reference # Otherwise, need to convert the input to the path to an assembly. # Since the assembly does not yet exist, the input must be a reference to a set of reads. # Coerce this reference to either SampleReads or TrimmomaticOutput, which can then be fed through the # AssemblyWorkflow. # test whether `reference` is a folder pointing to a set of reads. if isinstance(reference, Path) and reference.is_dir(): reference = get_reads_from_folder(reference) # `reference` should now be one of the accepts `Reads` objects. assembly_workflow = read_assembly.AssemblyWorkflow(self.project_folder) reference_assembly = assembly_workflow.assemble_sample( reference, utilities.checkdir(self.project_folder / reference.name)) return reference_assembly.contigs
def __init__(self, project_folder: Path): self.project_folder = utilities.checkdir(project_folder)
def reads_trimmed(project_folder) -> Path: source_folder = DATA_FOLDER / "outputs" / "trimmomatic" destination_folder = utilities.checkdir(project_folder / "reads") utilities.copydir(source_folder, destination_folder) return destination_folder
def reads_raw(project_folder) -> Path: source_folder = DATA_FOLDER / "inputs" / "reads" destination_folder = utilities.checkdir(project_folder / "reads") utilities.copydir(source_folder, destination_folder) return destination_folder
def project_folder(tmp_path) -> Path: f = utilities.checkdir(tmp_path / "project") return f