Esempio n. 1
0
def test_checkdir_with_str(temporary_folder):
    target_directory = temporary_folder / "target"
    # Make sure the target doesn't already exist
    assert not target_directory.exists()
    target_directory_string = str(target_directory)
    utilities.checkdir(target_directory_string)
    assert target_directory.exists()
Esempio n. 2
0
    def run(self, output_folder: Path, *reads) -> programio.FastQCOutput:
        utilities.checkdir(output_folder)
        command = self.get_command(output_folder, reads)
        output = self.get_output(output_folder, reads)

        if not output.exists():
            systemio.command_runner.run(command, output_folder, srun=False)
        return output
Esempio n. 3
0
def sample_variant_calling(reference: Path,
                           samples: List[sampleio.SampleReads],
                           project_folder: Path,
                           ispop: bool = False):
    """
		Performs simple variant calling between the supplied reference and the given samples.
	Parameters
	----------
	reference: Path
		The sample to use for the project
	samples: List[sampleio.SampleReads
		Contsins the source reads for variant calling. Trimming is not performmed at this stage.
	project_folder: Path
		The folder to use for the overall project.
	ispop: bool; default False
		Whether to run variant calling as populations or clones.
	"""
    # First validate the input parameters
    cancel = not utilities.verify_file_exists(reference)
    cancel = cancel or not sampleio.verify_samples(samples)

    if cancel:
        message = "Something went wrong when validating the variant calling parameters!"
        raise ValueError(message)

    # Set up the environment
    utilities.checkdir(project_folder)

    systemio.command_runner.set_command_log(project_folder /
                                            "commandlog_variant_calling.sh")
    systemio.command_runner.write_command_to_commandlog(
        ['module', 'load', 'breseq'])

    breseq_workflow = breseq.Breseq(reference, threads=16, population=ispop)
    breseq_workflow.test()

    results = list()
    for index, sample in enumerate(samples):
        logger.info(
            f"Running variant calling on sample {index} of {len(samples)}: {sample.name}"
        )
        sample_folder = utilities.checkdir(project_folder / sample.name)
        breseq_folder = sample_folder / "breseq"

        result = breseq_workflow.run(breseq_folder, sample.forward,
                                     sample.reverse)
        results.append(result)
    return results
Esempio n. 4
0
def main_shelly():
    logger.info(" Running the large dataset of samples.")

    project_folder = Path.home() / "projects" / "shelly"
    project_output_folder = utilities.checkdir(project_folder / "output")
    table_filename = project_folder / "samples.tsv"

    reference = project_folder / "T4.gbff"
    samples = sampleio.get_samples_from_table(table_filename)
    # Trim the reads
    trimmed_output = trim(samples, project_output_folder)
    trimmed_samples = [i.as_sample() for i in trimmed_output]
    # Call variants
    sample_variant_calling(reference,
                           trimmed_samples,
                           project_output_folder,
                           ispop=True)
Esempio n. 5
0
    def process_reference(
        self, reference: Union[str, Path, sampleio.SampleReads,
                               programio.TrimmomaticOutput]
    ) -> Path:

        # If the reference already points to an assembly, return it without modification.
        if isinstance(reference, Path) and reference.is_file():
            return reference

        # Otherwise, need to convert the input to the path to an assembly.
        # Since the assembly does not yet exist, the input must be a reference to a set of reads.
        # Coerce this reference to either SampleReads or TrimmomaticOutput, which can then be fed through the
        # AssemblyWorkflow.
        # test whether `reference` is a folder pointing to a set of reads.
        if isinstance(reference, Path) and reference.is_dir():
            reference = get_reads_from_folder(reference)

        # `reference` should now be one of the accepts `Reads` objects.
        assembly_workflow = read_assembly.AssemblyWorkflow(self.project_folder)
        reference_assembly = assembly_workflow.assemble_sample(
            reference,
            utilities.checkdir(self.project_folder / reference.name))

        return reference_assembly.contigs
Esempio n. 6
0
 def __init__(self, project_folder: Path):
     self.project_folder = utilities.checkdir(project_folder)
def reads_trimmed(project_folder) -> Path:
	source_folder = DATA_FOLDER / "outputs" / "trimmomatic"
	destination_folder = utilities.checkdir(project_folder / "reads")
	utilities.copydir(source_folder, destination_folder)
	return destination_folder
def reads_raw(project_folder) -> Path:
	source_folder = DATA_FOLDER / "inputs" / "reads"
	destination_folder = utilities.checkdir(project_folder / "reads")
	utilities.copydir(source_folder, destination_folder)
	return destination_folder
def project_folder(tmp_path) -> Path:
	f = utilities.checkdir(tmp_path / "project")
	return f