def test_humann2_rna_dna_norm_witten_bell_tsv(self): """ Test norm the tsv file entries from dna and rna input files with humann2_rna_dna_norm_table Test with witten bell """ # create a temp folder tempdir = utils.create_temp_folder("rna_dna_norm_witten_bell") output_basename = os.path.join(tempdir, "rna_dna_norm") # run the command utils.run_command([ "humann2_rna_dna_norm", "--input_dna", cfg.rna_dna_norm_dna_input, "--input_rna", cfg.rna_dna_norm_rna_input, "--output_basename", output_basename, "--method", "witten_bell" ]) # check the output files are as expected for file_extension, expected_output_file in zip( cfg.rna_dna_norm_file_names, cfg.rna_dna_norm_witten_bell_output_files): self.assertTrue( utils.files_almost_equal(output_basename + file_extension, expected_output_file)) # remove the temp file utils.remove_temp_folder(tempdir)
def test_humann2_rna_dna_norm_log_10_tsv(self): """ Test norm the tsv file entries from dna and rna input files with humann2_rna_dna_norm_table Test with log transform with base 10 """ # create a temp folder tempdir = utils.create_temp_folder("rna_dna_norm_log_10") output_basename = os.path.join(tempdir, "rna_dna_norm") # run the command utils.run_command([ "humann2_rna_dna_norm", "--input_dna", cfg.rna_dna_norm_dna_input, "--input_rna", cfg.rna_dna_norm_rna_input, "--output_basename", output_basename, "--log_transform", "--log_base", "10" ]) # check the output files are as expected # allow for varying precision in the calculations with almost equal for file_extension, expected_output_file in zip( cfg.rna_dna_norm_file_names, cfg.rna_dna_norm_log_10_output_files): self.assertTrue( utils.files_almost_equal(output_basename + file_extension, expected_output_file)) # remove the temp file utils.remove_temp_folder(tempdir)
def test_trimmomatic_fastqc_start_no_reference_database_paired_end(self): """ Test running the default flow of trimmomatic on paired end input as no reference database is provided Test running fastqc at the beginning of the workflow """ # create a temp directory for output tempdir = tempfile.mkdtemp(suffix="test_kneaddata_") # run kneaddata test command = [ "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file, "--output", tempdir, "--run-fastqc-start" ] utils.run_kneaddata(command) # get the basename of the input file basename = utils.file_basename(cfg.fastq_file) expected_output_files = [ os.path.join("fastqc", basename + cfg.fastqc_extensions[0]), os.path.join("fastqc", basename + cfg.fastqc_extensions[1]), basename + cfg.log_extension, basename + cfg.paired_trim_extensions[0], basename + cfg.paired_trim_extensions[1] ] # check the output files are as expected for expression, message in utils.check_output(expected_output_files, tempdir): self.assertTrue(expression, message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_humann_split_tables_tsv(self): """ Test splitting a tsv file with humann_split_tables """ input_file = cfg.multi_sample_genefamilies # create a temp directory temp_directory = utils.create_temp_folder("split_tables_tsv") # split the file utils.run_command([ "humann_split_table", "--input", input_file, "--output", temp_directory, "--verbose" ]) # test the split files are as expected output_files = os.listdir(temp_directory) # sort the output files file_pairs = [] for file in output_files: filebasename = os.path.basename(file) # get the sample number for the file file = os.path.join(temp_directory, file) if filebasename[-1] == 1: file_pairs.append([file, cfg.multi_sample_genefamilies_split1]) elif filebasename[-1] == 2: file_pairs.append([file, cfg.multi_sample_genefamilies_split2]) for temp_file, file in file_pairs: self.assertTrue(utils.files_almost_equal(temp_file, file)) # remove the temp folder utils.remove_temp_folder(temp_directory)
def test_humann2_fastq_biom_output_pathways(self): """ Test the standard humann2 flow on a fastq input file Test biom output is written Test the expected pathways are identified """ # create a temp directory for output tempdir = utils.create_temp_folder("fastq") # run humann2 test command = [ "humann2", "--input", cfg.demo_fastq, "--output", tempdir, "--output-format", "biom", "--gap-fill", "off" ] utils.run_humann2(command) # check the output file of pathway abundance has the expected pathways pathways_file_tsv = utils.read_biom_table( os.path.join(tempdir, "demo_pathabundance.biom")) pathways_found = set([ x.split("\t")[0].split(":")[0] for x in filter(lambda x: "PWY" in x, pathways_file_tsv) ]) self.assertEqual(pathways_found, cfg.expected_demo_output_files_biom_pathways) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_trf_only_paired_end(self): """ Test running only trf on paired end input """ # create a temp directory for output tempdir = tempfile.mkdtemp(suffix="test_kneaddata_") # run kneaddata test command = [ "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file, "--output", tempdir, "--run-trf", "--bypass-trim" ] utils.run_kneaddata(command) # get the basename of the input file basename = utils.file_basename(cfg.fastq_file) expected_output_files = [ basename + cfg.log_extension, basename + cfg.paired_repeats_removed_extensions[0], basename + cfg.paired_repeats_removed_extensions[1] ] # check the output files are as expected for expression, message in utils.check_output(expected_output_files, tempdir): self.assertTrue(expression, message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_humann2_gene_families_biom_input(self): """ Test the standard humann2 flow on a gene families output file as input Test with the biom format of the gene families file """ # create a temp directory for output tempdir = utils.create_temp_folder("gene_families") # run humann2 test command = [ "humann2", "--input", cfg.demo_gene_families_biom, "--output", tempdir ] utils.run_humann2(command) # check the output files are as expected # it will include all output files except the gene families output file # since this file was used as input for expression, message in utils.check_output( cfg.expected_demo_output_files_genefamilies_input, tempdir): self.assertTrue(expression, message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_trimmomatic_and_trf_no_reference_database_single_end(self): """ Test running the default flow of trimmomatic on single end input as no reference database is provided Test with also running trf """ # create a temp directory for output tempdir = tempfile.mkdtemp(suffix="test_kneaddata_") # run kneaddata test command = [ "kneaddata", "--input", cfg.fastq_file, "--output", tempdir, "--run-trf" ] utils.run_kneaddata(command) # get the basename of the input file basename = utils.file_basename(cfg.fastq_file) expected_output_files = [ basename + cfg.log_extension, basename + cfg.single_trim_extension, basename + cfg.repeats_removed_extension ] # check the output files are as expected for expression, message in utils.check_output(expected_output_files, tempdir): self.assertTrue(expression, message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_trimmomatic_bowtie2_database_and_trf_paired_end_remove_intermedite_temp_output( self): """ Test running the default flow of trimmomatic on paired end input with a bowtie2 database provided Test running with remove intermediate temp output files Test running trf """ # create a temp directory for output tempdir = tempfile.mkdtemp(suffix="test_kneaddata_") # run kneaddata test command = [ "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file, "--output", tempdir, "--reference-db", cfg.bowtie2_db_folder, "--run-trf", "--no-discordant" ] utils.run_kneaddata(command) # get the basename of the input file basename = basename = utils.file_basename(cfg.fastq_file) filtered_file_basename = utils.get_filtered_file_basename( basename, cfg.bowtie2_db_folder, "bowtie2", True) expected_non_empty_output_files = [ basename + cfg.log_extension, basename + cfg.paired_trim_extensions[0], basename + cfg.paired_trim_extensions[1], basename + cfg.final_extensions_paired[0], basename + cfg.final_extensions_paired[1], basename + cfg.paired_repeats_removed_extensions[0], basename + cfg.paired_repeats_removed_extensions[1] ] # check the output files are as expected for expression, message in utils.check_output( expected_non_empty_output_files, tempdir): self.assertTrue(expression, message) # add the expected output files which can be empty expected_output_files = expected_non_empty_output_files expected_output_files += [ filtered_file_basename + cfg.paired_contaminated_extension[0], filtered_file_basename + cfg.paired_contaminated_extension[1] ] # check there are only three files in the output folder actual_output_files = os.listdir(tempdir) self.assertEqual(len(actual_output_files), len(expected_output_files)) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_trimmomatic_bowtie2_paired_end_remove_intermedite_temp_output_discordant_trf( self): """ Test running the default flow of trimmomatic on paired end input with one bowtie2 database provided Test running with remove intermediate temp output files Test with discordant alignments Test with TRF """ # create a temp directory for output tempdir = tempfile.mkdtemp(suffix="test_kneaddata_") # run kneaddata test command = [ "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_pair_file, "--output", tempdir, "--reference-db", cfg.bowtie2_db_folder, "--reference-db", cfg.bowtie2_db_folder, "--run-trf" ] utils.run_kneaddata(command) # get the basename of the input file basename = basename = utils.file_basename(cfg.fastq_file) filtered_file_basename = utils.get_filtered_file_basename( basename, cfg.bowtie2_db_folder, "bowtie2") expected_non_empty_output_files = [ basename + cfg.log_extension, basename + cfg.paired_trim_extensions[0], basename + cfg.paired_trim_extensions[1], basename + cfg.final_extensions_paired[0], basename + cfg.final_extensions_paired[1], basename + cfg.paired_repeats_removed_extensions[0], basename + cfg.paired_repeats_removed_extensions[1] ] # check the output files are as expected for expression, message in utils.check_output( expected_non_empty_output_files, tempdir): self.assertTrue(expression, message) # check there are the expected number of files in the output folder actual_output_files = list( filter( os.path.getsize, [os.path.join(tempdir, file) for file in os.listdir(tempdir)])) self.assertEqual(len(actual_output_files), len(expected_non_empty_output_files)) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_trimmomatic_bowtie2_two_databases_paired_end_serial(self): """ Test running the default flow of trimmomatic on paired end input with two bowtie2 database provided (both with the same name) Test running in serial alignment mode """ # create a temp directory for output tempdir = tempfile.mkdtemp(suffix="test_kneaddata_") # run kneaddata test command = [ "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file, "--output", tempdir, "--reference-db", cfg.bowtie2_db_folder, "--reference-db", cfg.bowtie2_db_folder, "--no-discordant", "--serial" ] utils.run_kneaddata(command) # get the basename of the input file basename = basename = utils.file_basename(cfg.fastq_file) filtered_file_basename = utils.get_filtered_file_basename( basename, cfg.bowtie2_db_folder, "bowtie2") expected_non_empty_output_files = [ basename + cfg.log_extension, basename + cfg.paired_trim_extensions[0], basename + cfg.paired_trim_extensions[1], basename + cfg.final_extensions_paired[0], basename + cfg.final_extensions_paired[1] ] # check the output files are as expected for expression, message in utils.check_output( expected_non_empty_output_files, tempdir): self.assertTrue(expression, message) # add the expected output files which can be empty expected_output_files = expected_non_empty_output_files expected_output_files += [ filtered_file_basename + cfg.paired_contaminated_extension[0], filtered_file_basename + cfg.paired_contaminated_extension[1] ] # check there are at least the main expected files in the output folder actual_output_files = os.listdir(tempdir) self.assertGreater(len(actual_output_files), len(expected_output_files)) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_humann_m8(self): """ Test the standard humann flow on a m8 input file """ # create a temp directory for output tempdir = utils.create_temp_folder("m8") # run humann test command = ["humann","--input",cfg.demo_m8,"--output",tempdir] utils.run_humann(command) # check the output files are as expected for expression, message in utils.check_output(cfg.expected_demo_output_files, tempdir): self.assertTrue(expression,message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_trimmomatic_bowtie2_database_and_trf_single_end_gzipped_input( self): """ Test running the default flow of trimmomatic on single end input with bowtie2 database provided Test with keeping temp files Test with TRF Test with gzipped input fastq file """ # create a temp directory for output tempdir = tempfile.mkdtemp(suffix="test_kneaddata_") # run kneaddata test command = [ "kneaddata", "--input", cfg.fastq_file_gzipped, "--output", tempdir, "--reference-db", cfg.bowtie2_db_folder, "--store-temp-output", "--run-trf" ] utils.run_kneaddata(command) # get the basename of the input file fastq_file_basename = utils.file_basename(cfg.fastq_file_gzipped) basename = utils.file_basename(cfg.fastq_file) filtered_file_basename = utils.get_filtered_file_basename( basename, cfg.bowtie2_db_folder, "bowtie2") expected_output_files = [ fastq_file_basename, basename + cfg.log_extension, basename + cfg.single_trim_extension, filtered_file_basename + cfg.clean_extension, filtered_file_basename + cfg.contaminated_extension, filtered_file_basename + cfg.sam_extension, basename + cfg.final_extension, basename + cfg.repeats_removed_extension ] # check the output files are as expected for expression, message in utils.check_output(expected_output_files, tempdir): self.assertTrue(expression, message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_humann_fasta_bypass_prescreen(self): """ Test the standard humann flow on a fasta input file Test with bypassing prescreen """ # create a temp directory for output tempdir = utils.create_temp_folder("fasta_bypass_prescreen") # run humann test command = ["humann","--input",cfg.demo_fasta,"--output",tempdir,"--bypass-prescreen"] utils.run_humann(command) # check the output files are as expected for expression, message in utils.check_output(cfg.expected_demo_output_files, tempdir): self.assertTrue(expression,message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_trimmomatic_bowtie2_database_fastqc_end_single_end(self): """ Test running the default flow of trimmomatic on single end input with bowtie2 database provided Test with keeping temp files Test running fastqc at the end of the workflow """ # create a temp directory for output tempdir = tempfile.mkdtemp(suffix="test_kneaddata_") # run kneaddata test command = [ "kneaddata", "--input", cfg.fastq_file, "--output", tempdir, "--reference-db", cfg.bowtie2_db_folder, "--store-temp-output", "--run-fastqc-end" ] utils.run_kneaddata(command) # get the basename of the input file basename = utils.file_basename(cfg.fastq_file) final_basename = utils.file_basename(basename + cfg.final_extension) filtered_file_basename = utils.get_filtered_file_basename( basename, cfg.bowtie2_db_folder, "bowtie2") expected_output_files = [ os.path.join("fastqc", final_basename + cfg.fastqc_extensions[0]), os.path.join("fastqc", final_basename + cfg.fastqc_extensions[1]), basename + cfg.log_extension, basename + cfg.single_trim_extension, filtered_file_basename + cfg.clean_extension, filtered_file_basename + cfg.contaminated_extension, filtered_file_basename + cfg.sam_extension, basename + cfg.final_extension ] # check the output files are as expected for expression, message in utils.check_output(expected_output_files, tempdir): self.assertTrue(expression, message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_humann_fastq_biom_output(self): """ Test the standard humann flow on a fastq input file Test biom output is written """ # create a temp directory for output tempdir = utils.create_temp_folder("fastq") # run humann test command = ["humann","--input",cfg.demo_fastq,"--output",tempdir, "--output-format", "biom"] utils.run_humann(command) # check the output files are as expected for expression, message in utils.check_output(cfg.expected_demo_output_files_biom, tempdir): self.assertTrue(expression,message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_humann_fastq_custom_taxonomic_profile(self): """ Test the standard humann flow on a fastq input file Test with a custom taxonomic profile """ # create a temp directory for output tempdir = utils.create_temp_folder("fastq_custom_taxonomic_profile") # run humann test command = ["humann","--input",cfg.demo_fastq,"--output",tempdir,"--taxonomic-profile", cfg.demo_bugs_list] utils.run_humann(command) # check the output files are as expected for expression, message in utils.check_output(cfg.expected_demo_output_files, tempdir): self.assertTrue(expression,message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_humann2_split_tables_tsv(self): """ Test splitting a tsv file with humann2_split_tables """ input_file = cfg.multi_sample_genefamilies_biom # create a temp directory temp_directory = utils.create_temp_folder("split_tables_biom") # split the file utils.run_command([ "humann2_split_table", "--input", input_file, "--output", temp_directory, "--verbose" ]) # test the split files are as expected for file in cfg.multi_sample_split_files_biom: self.assertTrue(utils.check_output(file, temp_directory)) # remove the temp folder utils.remove_temp_folder(temp_directory)
def test_bowtie2_only_single_end(self): """ Test on single end input with bowtie2 database provided Test with keeping temp files Test bypassing trim step """ # create a temp directory for output tempdir = tempfile.mkdtemp(suffix="test_kneaddata_") # run kneaddata test command = [ "kneaddata", "--input", cfg.fastq_file, "--output", tempdir, "--reference-db", cfg.bowtie2_db_folder, "--store-temp-output", "--bypass-trim" ] utils.run_kneaddata(command) # get the basename of the input file basename = utils.file_basename(cfg.fastq_file) filtered_file_basename = utils.get_filtered_file_basename( basename, cfg.bowtie2_db_folder, "bowtie2") expected_output_files = [ basename + cfg.log_extension, filtered_file_basename + cfg.clean_extension, filtered_file_basename + cfg.contaminated_extension, filtered_file_basename + cfg.sam_extension, basename + cfg.final_extension ] # check the output files are as expected for expression, message in utils.check_output(expected_output_files, tempdir): self.assertTrue(expression, message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_humann2_fastq_bypass_translated_search(self): """ Test the standard humann2 flow on a fastq input file Test with bypassing translated search """ # create a temp directory for output tempdir = utils.create_temp_folder("fastq_bypass_translated_search") # run humann2 test command = [ "humann2", "--input", cfg.demo_fastq, "--output", tempdir, "--bypass-translated-search" ] utils.run_humann2(command) # check the output files are as expected for expression, message in utils.check_output( cfg.expected_demo_output_files, tempdir): self.assertTrue(expression, message) # remove the temp directory utils.remove_temp_folder(tempdir)
def test_humann2_strain_profile_tsv(self): """ Test the tsv file entries running humann2_strain_profile Test with critical mean and critical count values """ # create a temp folder tempdir = utils.create_temp_folder("strain_profile") # move to this folder as the output files will be created in the current working folder current_working_directory = os.getcwd() try: os.chdir(tempdir) except EnvironmentError: print("Warning: Unable to move to temp directory: " + tempdir) # run the command utils.run_command([ "humann2_strain_profiler", "--input", cfg.strain_profile_input, "--critical_mean", "1", "--critical_count", "2" ]) # check the output files are as expected # allow for varying precision in the calculations with almost equal for file, expected_output_file in zip( cfg.strain_profile_file_names, cfg.strain_profile_m1_n2_output_files): self.assertTrue( utils.files_almost_equal(os.path.join(tempdir, file), expected_output_file)) # return to original working directory os.chdir(current_working_directory) # remove the temp file utils.remove_temp_folder(tempdir)