def test_minimizers(self): """ ganon build with window size (minimizers) """ params = self.default_params.copy() params["db_prefix"] = self.results_dir + "test_minimizers" params["window_size"] = 23 # Build config from params cfg = Config("build", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon build exited with an error") # General sanity check of results res = build_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon build has inconsistent results") ibf_with_minimizers = params["db_prefix"] + ".ibf" # without minimizers comparison params["window_size"] = 0 params["db_prefix"] = self.results_dir + "test_without_minimizers" # Build config from params cfg = Config("build", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon build exited with an error") # General sanity check of results res = build_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon build has inconsistent results") ibf_without_minimizers = params["db_prefix"] + ".ibf" # Filter with minimizers should be smaller self.assertTrue( os.path.getsize(ibf_with_minimizers) < os.path.getsize(ibf_without_minimizers), "Filter with minimizers should be smaller")
def test_header(self): """ Test ganon table with different headers """ params = self.default_params.copy() params["output_file"] = self.results_dir + "test_header.tsv" params["header"] = "lineage" # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results") # check if printed lineage on all headers (but one root) self.assertTrue(all(["|" in c for c in res["out_pd"].columns.values]), "ganon table headers are wrong (lineage)") params["header"] = "taxid" # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results") # check if printed taxid (just numeric for this specific test) self.assertTrue( all([c.isdigit() for c in res["out_pd"].columns.values]), "ganon table headers are wrong (taxid)")
def main(which: str = None, cfg=None, **kwargs): # 3 entry points: # main() without args, cfg is parsed from sys.argv # main(which, **kwargs) -> main("build", db_prefix="test", ...) generate config and run # main(cfg) run directly with Config() if cfg is None: cfg = Config(which, **kwargs) # Validate if not cfg.validate(): return False # Set paths if not cfg.set_paths(): return False tx_total = time.time() print_log("- - - - - - - - - -", cfg.quiet) print_log(" _ _ _ _ _ ", cfg.quiet) print_log(" (_|(_|| |(_)| | ", cfg.quiet) print_log(" _| v. " + str(cfg.version), cfg.quiet) print_log("- - - - - - - - - -", cfg.quiet) if cfg.which == 'build': ret = build(cfg) elif cfg.which == 'update': ret = update(cfg) elif cfg.which == 'classify': ret = classify(cfg) elif cfg.which == 'report': ret = report(cfg) print_log( "Total elapsed time: " + str("%.2f" % (time.time() - tx_total)) + " seconds.", cfg.quiet) return ret
def test_update_complete_add_remove(self): """ Test run update complete adding and removing sequences (reusing same bins for new sequences) """ params = self.default_params.copy() params[ "output_db_prefix"] = self.results_dir + "test_update_complete_add_remove" params["update_complete"] = True params[ "seq_info_file"] = data_dir + "update/bacteria_half_virus_seqinfo.txt" params["input_files"].extend([ data_dir + "build/bacteria_NC_010333.1.fasta.gz", data_dir + "build/bacteria_NC_017164.1.fasta.gz" ]) # Build config from params cfg = Config("update", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon update exited with an error") # General sanity check of results res = update_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon update has inconsistent results") # Specific tes - should have 6 taxid targets (2 bacteria, 4 viruses) self.assertEqual(res["bins_pd"]["taxid"].drop_duplicates().shape[0], 6, "update failed to add new sequences") # Should re-use bins and reach max 18 (41 before) self.assertEqual(res["bins_pd"]["binid"].max(), 18, "bins were not re-used") self.assertEqual(res["map_pd"]["binid"].max(), 18, "bins were not re-used") # Classify against updated index params_classify = { "db_prefix": params["output_db_prefix"], "single_reads": [data_dir + "vir.sim.1.fq", data_dir + "bac.sim.1.fq"], "abs_cutoff": 0, "output_lca": True, "output_all": True, "quiet": True, "output_prefix": self.results_dir + "test_default" } # Build config from params cfg_classify = Config("classify", **params_classify) # Run self.assertTrue(ganon.main(cfg=cfg_classify), "ganon classify exited with an error") # General sanity check of results res = classify_sanity_check_and_parse(vars(cfg_classify)) self.assertIsNotNone(res, "ganon classify has inconsistent results") # Specific - no matches on the removed entries (taxid 1052684) self.assertFalse(res["all_pd"]["target"].isin(["1052684"]).any(), "ganon classify has inconsistent results") # should return Viruses and Bacteria matches on the updated index self.assertTrue( res["tre_pd"][res["tre_pd"]["rank"] == "superkingdom"] ["name"].isin(["Bacteria", "Viruses"]).all(), "classification on updated index failed")
def test_update_complete_add(self): """ Test run update complete adding sequences only """ params = self.default_params.copy() params[ "output_db_prefix"] = self.results_dir + "test_update_complete_add" params["update_complete"] = True params[ "seq_info_file"] = data_dir + "update/bacteria_virus_seqinfo.txt" params["input_files"].extend([ data_dir + "build/bacteria_NC_010333.1.fasta.gz", data_dir + "build/bacteria_NC_017164.1.fasta.gz", data_dir + "build/bacteria_NC_017163.1.fasta.gz", data_dir + "build/bacteria_NC_017543.1.fasta.gz" ]) # Build config from params cfg = Config("update", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon update exited with an error") # General sanity check of results res = update_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon update has inconsistent results") # Specific - check if number of bins increased self.assertTrue(res["map_pd"].binid.max() > 41, "no bins were added") # Classify simulated virus against updated index params_classify = { "db_prefix": params["output_db_prefix"], "single_reads": [data_dir + "vir.sim.1.fq", data_dir + "bac.sim.1.fq"], "abs_cutoff": 0, "output_lca": True, "output_all": True, "quiet": True, "output_prefix": self.results_dir + "test_update_complete_add" } # Build config from params cfg_classify = Config("classify", **params_classify) # Run self.assertTrue(ganon.main(cfg=cfg_classify), "ganon classify exited with an error") # General sanity check of results res = classify_sanity_check_and_parse(vars(cfg_classify)) self.assertIsNotNone(res, "ganon classify has inconsistent results") # Specific tes - should return Viruses and Bacteria matches on the updated index self.assertTrue( res["tre_pd"][res["tre_pd"]["rank"] == "superkingdom"] ["name"].isin(["Bacteria", "Viruses"]).all(), "classification on updated index failed")
def test_update_complete_remove(self): """ Test run update complete removing sequences only """ params = self.default_params.copy() params[ "output_db_prefix"] = self.results_dir + "test_update_complete_remove" params["update_complete"] = True params["seq_info_file"] = data_dir + "update/bacteria_half_seqinfo.txt" params["input_files"] = [ data_dir + "build/bacteria_NC_010333.1.fasta.gz", data_dir + "build/bacteria_NC_017164.1.fasta.gz" ] # Build config from params cfg = Config("update", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon update exited with an error") # General sanity check of results res = update_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon update has inconsistent results") # Specific - keep only two entries self.assertEqual(res["bins_pd"]["seqid"].drop_duplicates().shape[0], 2, "sequences not removed from bins") self.assertEqual(res["map_pd"]["target"].drop_duplicates().shape[0], 2, "sequences not removed from .map") # Classify against reduced updated index params_classify = { "db_prefix": params["output_db_prefix"], "single_reads": data_dir + "bac.sim.1.fq", "abs_cutoff": 0, "output_lca": True, "output_all": True, "quiet": True, "output_prefix": self.results_dir + "test_default" } # Build config from params cfg_classify = Config("classify", **params_classify) # Run self.assertTrue(ganon.main(cfg=cfg_classify), "ganon classify exited with an error") # General sanity check of results res = classify_sanity_check_and_parse(vars(cfg_classify)) self.assertIsNotNone(res, "ganon classify has inconsistent results") # Specific - only matches on remaining sequences (taxids 366602 and 470) self.assertTrue(res["all_pd"]["target"].isin(["366602", "470"]).all(), "ganon classify has inconsistent results")
def test_no_rank_no_root_unclassified(self): """ Test ganon table without specific --rank and no root reporting to unclassified """ params = self.default_params.copy() params[ "output_file"] = self.results_dir + "test_no_rank_no_root_unc.tsv" params["rank"] = "" params["header"] = "lineage" params["no_root"] = True params["unclassified_label"] = "unclassified" # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results") # should output just bacteria self.assertEqual(res["out_pd"].columns.values.size, 50, "ganon table without rank failed") # Test some outcomes self.assertTrue("2" in res["out_pd"].columns.values, "ganon table without rank failed") self.assertTrue( "2|1239|909932|1843489|31977" in res["out_pd"].columns.values, "ganon table without rank failed") self.assertTrue("unclassified" in res["out_pd"].columns.values, "ganon table without rank failed") # Sum of counts should be total of all reads with remaining root matches counted as unclassified self.assertEqual(res["out_pd"].sum().sum(), 3786439, "ganon table without rank failed")
def test_duplicated(self): """ Test duplicated entries on update """ params = self.default_params.copy() params["output_db_prefix"] = self.results_dir + "test_duplicated" params["input_files"].extend([ data_dir + "build/bacteria_NC_010333.1.fasta.gz", data_dir + "build/bacteria_NC_017164.1.fasta.gz", data_dir + "build/bacteria_NC_017163.1.fasta.gz", data_dir + "build/bacteria_NC_017543.1.fasta.gz" ]) params[ "seq_info_file"] = data_dir + "update/bacteria_virus_seqinfo.txt" # Build config from params cfg = Config("update", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon update exited with an error") # General sanity check of results res = update_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon update has inconsistent results") # Specific test - check if there are no duplicates in the datastructure self.assertFalse(res["tax_pd"]["taxid"].duplicated().any(), "duplicated entries on .tax after update") # Check if new map has any target from before in new bins map_before = parse_map(params["db_prefix"] + ".map") new_targets = res["map_pd"][ res["map_pd"]["binid"] > map_before.binid.max()] self.assertFalse( map_before["target"].isin(new_targets["target"]).any(), "duplicated entries on .map after update")
def test_specialization_file_single(self): """ ganon build --specialization file (with one file only online: eutils) """ params = self.default_params.copy() merge_gz(params["input_files"], self.results_dir + "merged_input_files.fasta.gz") params[ "input_files"] = self.results_dir + "merged_input_files.fasta.gz" params[ "db_prefix"] = self.results_dir + "test_specialization_file_single" params["specialization"] = "file" # Build config from params cfg = Config("build", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon build exited with an error") # General sanity check of results res = build_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon build has inconsistent results") # Specific test - count files self.assertEqual(sum(res["tax_pd"]["rank"] == "file"), 4, "failed to use file name as specialization") # Check if all targets starts with "NC_" - fails to use file specialization and replaces it with sequence accession self.assertTrue( (res["map_pd"]["target"].map(lambda x: x.startswith("NC_"))).all(), "failed to use sequence accession as specialization")
def test_multiple_rep_files_split_hierachy(self): """ Test run with multiple rep files as input """ params = self.default_params.copy() params["rep_files"] = [ data_dir + "report/results.rep", data_dir + "report/results2.rep" ] params[ "output_prefix"] = self.results_dir + "test_multiple_rep_files_split_hierachy_" params["split_hierarchy"] = True # Build config from params cfg = Config("report", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon report exited with an error") # General sanity check of results res = report_sanity_check_and_parse(vars(cfg), sum_full_percentage=False) self.assertIsNotNone(res, "ganon report has inconsistent results") # should have 2+4 outputs (6 hiearchies) self.assertEqual( len(res), 6, "ganon report did not generate multiple report files")
def test_split_hierachy(self): """ Test run splitting hierachies """ params = self.default_params.copy() params["output_prefix"] = self.results_dir + "test_split_hierachy" params["rep_files"] = [data_dir + "report/results2.rep"] params["split_hierarchy"] = True # Build config from params cfg = Config("report", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon report exited with an error") # General sanity check of results res = report_sanity_check_and_parse(vars(cfg), sum_full_percentage=False) self.assertIsNotNone(res, "ganon report has inconsistent results") # sum all root value total_root_split = 0 for file, r in res.items(): total_root_split += r["tre_pd"][ r["tre_pd"]['rank'] == "root"]["cumulative_perc"].values[0] # sum one time unclassified total_root_split += r["tre_pd"][ r["tre_pd"]['rank'] == "unclassified"]["cumulative_perc"].values[0] # values reported on root of splitted reports should equal 100 self.assertEqual(int(total_root_split), 100, "ganon report with wrong root counts")
def test_no_rank(self): """ Test ganon table without specific --rank """ params = self.default_params.copy() params["output_file"] = self.results_dir + "test_no_rank.tsv" params["rank"] = "" params["header"] = "lineage" # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results") self.assertEqual(res["out_pd"].columns.values.size, 50, "ganon table without rank failed") # Test some outcomes self.assertTrue("1|2" in res["out_pd"].columns.values, "ganon table without rank failed") self.assertTrue( "1|2|1239|909932|1843489|31977" in res["out_pd"].columns.values, "ganon table without rank failed") # Sum of counts should be total of all reads self.assertEqual(res["out_pd"].sum().sum(), 1973568, "ganon table without rank failed")
def test_bin_fragment_overlap_length(self): """ Test changing bin, fragment and overlap length """ params = self.default_params.copy() params[ "db_prefix"] = self.results_dir + "test_bin_fragment_overlap_length" params["bin_length"] = 5692 params["fragment_length"] = 667 params["overlap_length"] = 349 # Build config from params cfg = Config("build", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon build exited with an error") # General sanity check of results res = build_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon build has inconsistent results") # Specific test # Check max size of fragments on bins self.assertTrue( max(res["bins_pd"]["length"]) <= params["fragment_length"] + params["overlap_length"], "Fragment greater than max.") # Check max size of bins self.assertTrue( max(res["bins_pd"].groupby("binid").sum()["length"]) <= params["bin_length"], "Bin length greater than max.")
def test_specialization_custom(self): """ ganon build --specialization custom (with --seq-info-file) """ params = self.default_params.copy() params["db_prefix"] = self.results_dir + "test_specialization_custom" params["specialization"] = "custom" # Build config from params cfg = Config("build", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon build exited with an error") # General sanity check of results res = build_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon build has inconsistent results") # Specific test # Check if all assembly ids are on bins and map and tax self.assertTrue( res["seq_info"]["specialization"].isin( res["bins_pd"]["specialization"]).all(), "Missing assembly ids on bins") self.assertTrue( res["seq_info"]["specialization"].isin( res["map_pd"]["target"].drop_duplicates()).all(), "Missing assembly ids on .map") self.assertTrue( res["seq_info"]["specialization"].isin( res["tax_pd"]["taxid"].drop_duplicates()).all(), "Missing assembly ids on .tax")
def test_min_count_and_percentages(self): """ Test run with min_percentage and min_count """ params = self.default_params.copy() params[ "output_prefix"] = self.results_dir + "test_min_count_and_percentages" params["min_percentage"] = 0.2 params["min_count"] = 50 # Build config from params cfg = Config("report", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon report exited with an error") # General sanity check of results res = report_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon report has inconsistent results") # check if none is higher than min_percentage self.assertTrue((res["tre_pd"][~res["idx_base"]]["cumulative"] >= params["min_count"]).all(), "ganon report failed filtering with --min-count") # check if none is higher than min_percentage self.assertTrue((res["tre_pd"][~res["idx_base"]]["cumulative_perc"] >= params["min_percentage"]).all(), "ganon report failed filtering with --min-percentage")
def test_minimizers(self): """ Test run with minimizers """ build_params = { "taxdump_file": [data_dir + "mini_nodes.dmp", data_dir + "mini_names.dmp"], "input_files": [ data_dir + "build/bacteria_NC_010333.1.fasta.gz", data_dir + "build/bacteria_NC_017164.1.fasta.gz", data_dir + "build/bacteria_NC_017163.1.fasta.gz", data_dir + "build/bacteria_NC_017543.1.fasta.gz" ], "seq_info_file": data_dir + "build/bacteria_seqinfo.txt", "write_seq_info_file": True, "window_size": 23, "rank": "species", "quiet": True, "db_prefix": self.results_dir + "base_build_minimizers" } cfg_build = Config("build", **build_params) # Run self.assertTrue(ganon.main(cfg=cfg_build), "ganon build exited with an error") # General sanity check of results res_build = build_sanity_check_and_parse(vars(cfg_build)) self.assertIsNotNone(res_build, "ganon build has inconsistent results") params = self.default_params.copy() params["output_prefix"] = self.results_dir + "test_minimizers" params["db_prefix"] = self.results_dir + "base_build_minimizers" params["rel_cutoff"] = 0.75 params["rel_filter"] = 0.1 cfg = Config("classify", **params) self.assertTrue(ganon.main(cfg=cfg), "ganon classify exited with an error") res = classify_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon classify has inconsistent results")
def test_default(self): """ ganon update with default parameters (online: eutils, taxdump) """ params = self.default_params.copy() params["output_db_prefix"] = self.results_dir + "test_default" params["taxdump_file"] = [] # Build config from params cfg = Config("update", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon update exited with an error") # General sanity check of results res = update_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon update has inconsistent results") # Classify simulated virus against updated index params_classify = { "db_prefix": params["output_db_prefix"], "single_reads": [data_dir + "vir.sim.1.fq", data_dir + "bac.sim.1.fq"], "max_error": 0, "output_all": True, "quiet": True, "output_lca": True, "output_prefix": self.results_dir + "test_default" } # Build config from params cfg_classify = Config("classify", **params_classify) # Run self.assertTrue(ganon.main(cfg=cfg_classify), "ganon classify exited with an error") # General sanity check of results res = classify_sanity_check_and_parse(vars(cfg_classify)) self.assertIsNotNone(res, "ganon classify has inconsistent results") # Specific tes - should return Viruses and Bacteria matches on the updated index self.assertTrue( res["tre_pd"][res["tre_pd"]["rank"] == "superkingdom"] ["name"].isin(["Bacteria", "Viruses"]).all(), "classification on updated index failed")
def test_default_offline(self): """ Test run with default parameters """ params = self.default_params.copy() params["db_prefix"] = self.results_dir + "test_default" # Build config from params cfg = Config("build", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon build exited with an error") # General sanity check of results res = build_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon build has inconsistent results")
def classify(cfg): print_log("Classifying reads (ganon-classify)", cfg.quiet) run_ganon_classify = " ".join([ cfg.path_exec['classify'], "--single-reads " + ",".join(cfg.single_reads) if cfg.single_reads else "", "--paired-reads " + ",".join(cfg.paired_reads) if cfg.paired_reads else "", "--ibf " + ",".join([db_prefix + ".ibf" for db_prefix in cfg.db_prefix]), "--map " + ",".join([db_prefix + ".map" for db_prefix in cfg.db_prefix]), "--tax " + ",".join([db_prefix + ".tax" for db_prefix in cfg.db_prefix]), "--hierarchy-labels " + ",".join(cfg.hierarchy_labels) if cfg.hierarchy_labels else "", "--max-error " + ",".join([str(me) for me in cfg.max_error]) if cfg.max_error else "", "--min-kmers " + ",".join([str(mk) for mk in cfg.min_kmers]) if cfg.min_kmers else "", "--max-error-unique " + ",".join([str(meu) for meu in cfg.max_error_unique]) if cfg.max_error_unique else "", "--strata-filter " + ",".join([str(sf) for sf in cfg.strata_filter]) if cfg.strata_filter else "", "--offset " + str(cfg.offset) if cfg.offset else "", "--output-prefix " + cfg.output_prefix if cfg.output_prefix else "", "--output-all" if cfg.output_all else "", "--output-unclassified" if cfg.output_unclassified else "", "--output-single" if cfg.output_single else "", "--threads " + str(cfg.threads) if cfg.threads else "", "--n-reads " + str(cfg.n_reads) if cfg.n_reads is not None else "", "--n-batches " + str(cfg.n_batches) if cfg.n_batches is not None else "", "--verbose" if cfg.verbose else "", "--quiet" if cfg.quiet else "" ]) stdout, stderr = run(run_ganon_classify) if not cfg.output_prefix: print(stdout) print_log(stderr, cfg.quiet) if cfg.output_prefix: report_params = { "db_prefix": cfg.db_prefix, "rep_file": cfg.output_prefix + ".rep", "output_prefix": cfg.output_prefix, "ranks": cfg.ranks, "output_format": "tsv", "verbose": cfg.verbose, "quiet": cfg.quiet } report_cfg = Config("report", **report_params) ret = report(report_cfg) return ret else: return True
def test_default(self): """ Test ganon table with default parameters """ params = self.default_params.copy() params["output_file"] = self.results_dir + "test_default.tsv" # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results")
def test_specialization_on_default(self): """ ganon update --specialization custom on previous generated index without specialiazazion """ params = self.default_params.copy() params["db_prefix"] = data_dir + "bacteria_default" params[ "output_db_prefix"] = self.results_dir + "test_specialization_on_default" params["specialization"] = "custom" # Build config from params cfg = Config("update", **params) # Should not run self.assertFalse(ganon.main(cfg=cfg), "ganon update exited with an error")
def test_default(self): """ With default parameters online """ params = self.default_params.copy() params["output_prefix"] = self.results_dir + "test_default" # report config from params cfg = Config("report", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon report exited with an error") # General sanity check of results res = report_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon report has inconsistent results")
def test_invalid_rank(self): """ ganon build --rank xyz (invalid) """ params = self.default_params.copy() params["db_prefix"] = self.results_dir + "test_invalid_rank" params["rank"] = "xyz" # Build config from params cfg = Config("build", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon build exited with an error") # General sanity check of results res = build_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon build has inconsistent results")
def test_min_percentage(self): """ Test ganon table with --min-percentage """ params = self.default_params.copy() params["output_file"] = self.results_dir + "test_min_percentage.tsv" params["min_percentage"] = 0.01 # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results") # should output just value higher than min_percentage (or zeros) self.assertTrue(((res["out_pd"]==0) | (res["out_pd"]>=params["min_percentage"])).all(axis=None) , "ganon table min count filter failed")
def test_rank(self): """ Test ganon table with --ranks """ params = self.default_params.copy() params["output_file"] = self.results_dir + "test_rank.tsv" params["rank"] = "superkingdom" # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results") # should output just bacteria self.assertEqual(res["out_pd"].columns.values.size, 1, "ganon table rank selection failed")
def test_input_directory(self): """ Test run with default parameters using input directory and extension """ params = self.default_params.copy() params["output_file"] = self.results_dir + "test_input_directory.tsv" del params["tre_files"] params["input_directory"] = data_dir+"table/" params["input_extension"] = ".tre" # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results")
def test_names_with(self): """ Test ganon table with --names-with """ params = self.default_params.copy() params["output_file"] = self.results_dir + "test_names_with.tsv" params["names_with"] = "Prevotella" # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results") # should output species with the name starting with Veillonella self.assertTrue(all("Prevotella" in r for r in res["out_pd"].columns.values), "ganon table names with filter failed")
def test_names(self): """ Test ganon table with --names """ params = self.default_params.copy() params["output_file"] = self.results_dir + "test_names.tsv" params["names"] = "Veillonella tobetsuensis" # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results") # should output species with the name Veillonella tobetsuensis self.assertEqual(res["out_pd"].columns.values, "Veillonella tobetsuensis", "ganon table names filter failed")
def test_taxids_relative(self): """ Test ganon table with --taxids not on the chosen rank """ params = self.default_params.copy() params["output_file"] = self.results_dir + "test_taxids_relative.tsv" params["taxids"] = "838" # genus: Prevotella # Build config from params cfg = Config("table", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon table exited with an error") # General sanity check of results res = table_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon table has inconsistent results") # should output species of the genus 838 (Prevotella) only self.assertTrue(all("Prevotella" in r for r in res["out_pd"].columns.values), "ganon table taxids filter failed")
def test_input_directory(self): """ Test duplicated entries on the seqinfo file """ params = self.default_params.copy() params["db_prefix"] = self.results_dir + "test_input_directory" del params["input_files"] params["input_directory"] = data_dir+"build/" params["input_extension"] = ".fasta.gz" # Build config from params cfg = Config("build", **params) # Run self.assertTrue(ganon.main(cfg=cfg), "ganon build exited with an error") # General sanity check of results res = build_sanity_check_and_parse(vars(cfg)) self.assertIsNotNone(res, "ganon build has inconsistent results")