def setUp(self): self.config = build_config(["--cf-create-clusters", "--cf-mean-threshold", "0.6", "--cf-min-cds", "5", "--cf-min-pfams", "5"], modules=[clusterfinder], isolated=True) update_config({"enabled_cluster_types": []}) self.record = DummyRecord(seq=Seq("A" * 2000)) for start, end, probability, pfam_id in [(10, 20, 0.1, 'PF77777'), (30, 40, 0.3, 'PF00106'), (50, 60, 0.4, 'PF00107'), (60, 70, 0.7, 'PF00109'), (70, 80, 0.98, 'PF08484'), (90, 100, 0.8, 'PF02401'), (100, 110, 0.32, 'PF04369'), (110, 120, 1.0, 'PF00128'), (130, 140, 0.2, 'PF77776'), (500, 505, None, 'PF77775'), (1010, 1020, 0.1, 'PF77774'), (1030, 1040, 0.3, 'PF00106'), (1050, 1060, 0.4, 'PF00107'), (1060, 1070, 0.7, 'PF00109'), (1070, 1080, 0.98, 'PF08484'), (1090, 1100, 0.8, 'PF02401'), (1100, 1110, 0.32, 'PF04369'), (1110, 1120, 1.0, 'PF00128')]: location = FeatureLocation(start, end, strand=1) self.record.add_cds_feature(CDSFeature(location, locus_tag=str(start), translation="A")) pfam = PFAMDomain(location, "dummy_description", protein_start=start + 1, protein_end=end-1, identifier=pfam_id, tool="test") pfam.domain_id = "pfam_%d" % start pfam.probability = probability self.record.add_pfam_domain(pfam)
def prepare_output_directory(name: str, input_file: str) -> None: """ Ensure the ouptut directory exists and is usable Raises an exception if the directory is unusable, or if results not being reused and directory not empty Arguments: name: the path of the directory input_file: the path of the input file Returns: None """ # if not supplied, set the output directory to be the sequence name input_prefix = os.path.splitext(os.path.basename(input_file))[0] if not name: name = os.path.abspath(input_prefix) update_config({"output_dir": name}) if os.path.exists(name): if not os.path.isdir(name): raise RuntimeError("Output directory %s exists and is not a directory" % name) # not empty (apart from a possible input dir), and not reusing its results if not input_file.endswith(".json") and \ list(filter(_ignore_patterns, glob.glob(os.path.join(name, "*")))): raise RuntimeError("Output directory contains other files, aborting for safety") else: # --reuse logging.debug("Removing existing region genbank files") for genbank in glob.glob(os.path.join(name, "*.region???.gbk")): os.remove(genbank) logging.debug("Reusing output directory: %s", name) else: logging.debug("Creating output directory: %s", name) os.mkdir(name)
def test_nisin_minimal(self): # make sure the output directory isn't filled out_dir = self.default_options.output_dir assert not list(glob.glob(os.path.join(out_dir, "*"))) # die with neither inputs provided with self.assertRaisesRegex( ValueError, "No sequence file or prior results to read"): run_antismash(None, self.default_options) # make sure no files created assert not list(glob.glob(os.path.join(out_dir, "*"))) # do a normal run run_antismash(get_path_to_nisin_genbank(), self.default_options) self.check_output_files() # remove html file and make sure it's recreated os.unlink(os.path.join(self.default_options.output_dir, "index.html")) update_config({ "reuse_results": os.path.join(self.default_options.output_dir, "nisin.json") }) run_antismash(None, self.default_options) self.check_output_files()
def test_nisin_fasta_only(self): config.update_config({"genefinding_tool": "none"}) filepath = path.get_full_path(__file__, "data", "nisin.fasta") records = record_processing.parse_input_sequence(filepath) assert len(records) == 1 assert not records[0].get_cds_features() # make sure genefinding wasn't run with default options with self.assertRaisesRegex(AntismashInputError, "all records skipped"): record_processing.pre_process_sequences(records, self.options, self.genefinding) assert not self.genefinding.was_run assert not records[0].get_cds_features() # make sure genefinding was run when not 'none' records[0].skip = False config.update_config({"genefinding_tool": "not-none"}) # due to no genes actually being marked, it'll raise an error with self.assertRaisesRegex(AntismashInputError, "all records skipped"): record_processing.pre_process_sequences(records, self.options, self.genefinding) # but genefinding was still run assert self.genefinding.was_run # still no features because we used dummy genefinding for record in records: assert not record.get_cds_features() assert record.skip.lower() == "no genes found"
def test_nisin_complete(self): with TemporaryDirectory() as output_dir: args = [ "--minimal", "--enable-tta", "--tta-threshold", "0", "--output-dir", output_dir, helpers.get_path_to_nisin_genbank() ] options = build_config(args, isolated=True, modules=antismash.get_all_modules()) antismash.run_antismash(helpers.get_path_to_nisin_genbank(), options) # regen the results update_config( {"reuse_results": os.path.join(output_dir, "nisin.json")}) prior_results = read_data(None, options) record = prior_results.records[0] results = prior_results.results[0] tta_results = tta.regenerate_previous_results( results.get("antismash.modules.tta"), record, options) assert isinstance(tta_results, tta.TTAResults) assert len(tta_results.features) == 174 # raise the threshold above the gc_content and ensure regenned has no hits update_config({"tta_threshold": 0.65}) tta_results = tta.regenerate_previous_results( results.get("antismash.modules.tta"), record, options) assert isinstance(tta_results, tta.TTAResults) assert not tta_results.features
def setUp(self): self.config = build_config([ "--cf-create-clusters", "--cf-mean-threshold", "0.6", "--cf-min-cds", "5", "--cf-min-pfams", "5" ], modules=[clusterfinder], isolated=True) update_config({"enabled_cluster_types": []}) self.record = DummyRecord(seq=Seq("A" * 2000)) for start, end, probability, pfam_id in [(10, 20, 0.1, 'FAKE007'), (30, 40, 0.3, 'PF00106'), (50, 60, 0.4, 'PF00107'), (60, 70, 0.7, 'PF00109'), (70, 80, 0.98, 'PF08484'), (90, 100, 0.8, 'PF02401'), (100, 110, 0.32, 'PF04369'), (110, 120, 1.0, 'PF00128'), (130, 140, 0.2, 'FAKE234'), (500, 505, None, 'FAKE505'), (1010, 1020, 0.1, 'FAKE007'), (1030, 1040, 0.3, 'PF00106'), (1050, 1060, 0.4, 'PF00107'), (1060, 1070, 0.7, 'PF00109'), (1070, 1080, 0.98, 'PF08484'), (1090, 1100, 0.8, 'PF02401'), (1100, 1110, 0.32, 'PF04369'), (1110, 1120, 1.0, 'PF00128')]: location = FeatureLocation(start, end) self.record.add_cds_feature( CDSFeature(location, locus_tag=str(start))) pfam = PFAMDomain(location, "dummy_description") pfam.db_xref.append(pfam_id) pfam.probability = probability self.record.add_pfam_domain(pfam)
def test_limit_to_record_complete(self): records = self.read_double_nisin() config.update_config({"limit_to_record": "bad_id"}) with self.assertRaisesRegex(AntismashInputError, "no sequences matched filter"): record_processing.pre_process_sequences(records, self.options, self.genefinding)
def prepare_output_directory(name: str, input_file: str) -> None: """ Ensure the ouptut directory exists and is usable Raises an exception if the directory is unusable Arguments: name: the path of the directory input_file: the path of the input file Returns: None """ # if not supplied, set the output directory to be the sequence name if not name: name = os.path.abspath( os.path.splitext(os.path.basename(input_file))[0]) update_config({"output_dir": name}) if os.path.exists(name): if not os.path.isdir(name): raise RuntimeError( "Output directory %s exists and is not a directory" % name) logging.debug("Reusing output directory: %s", name) else: logging.debug("Creating output directory: %s", name) os.mkdir(name)
def test_check_prereqs_missing_executables(self): options = build_config(["--check-prereqs"], isolated=True, modules=get_all_modules()) update_config({"executables": Namespace()}) mock("antismash.config.get_config", returns=options) assert hasattr(get_config(), "executables") assert not get_config().executables.__dict__ with self.assertRaisesRegex(RuntimeError, "failing prereq"): antismash.main.check_prerequisites(get_all_modules(), options)
def test_limit_to_record_partial(self): records = self.read_double_nisin() assert all(rec.skip is None for rec in records) config.update_config({"limit_to_record": records[0].id}) records[0].id += "_changed" record_processing.pre_process_sequences(records, self.options, self.genefinding) assert not records[1].skip assert records[0].skip.startswith("did not match filter")
def test_nisin_fasta_gff(self): fasta = path.get_full_path(__file__, "data", "nisin.fasta") gff = path.get_full_path(__file__, "data", "nisin.gff3") config.update_config({"genefinding_gff3": gff}) records = record_processing.parse_input_sequence(fasta, gff_file=gff) record_processing.pre_process_sequences(records, self.options, self.genefinding) assert not self.genefinding.was_run assert len(records[0].get_cds_features()) == 11
def test_limit(self): records = self.read_double_nisin() assert all(rec.skip is None for rec in records) assert not self.options.triggered_limit config.update_config({"limit": 1}) record_processing.pre_process_sequences(records, self.options, self.genefinding) assert records[0].skip is None assert records[1].skip.startswith("skipping all but first 1") assert self.options.triggered_limit
def run_antismash(self, filename, expected): with TemporaryDirectory() as output_dir: update_config({"output_dir": output_dir}) results = helpers.run_and_regenerate_results_for_module(filename, clusterblast, self.options) update_config({"output_dir": ""}) results, global_results = self.get_results(results) assert len(results.region_results) == 1 cluster = results.region_results[0] assert len(cluster.ranking) == expected # will change if database does self.check_svgs(global_results, expected, output_dir) return results
def canonical_base_filename(input_file: str, directory: str, options: ConfigType) -> str: """Generate a canonical base filename if one isn't specified in the options.""" if options.output_basename: base_filename = options.output_basename else: base_filename, ext = os.path.splitext(os.path.basename(input_file)) if ext.lower() in (".gz", ".bz", ".xz"): base_filename, _ = os.path.splitext(base_filename) update_config({"output_basename": base_filename}) return os.path.join(directory, base_filename)
def test_from_json_higher_bitscore(self): json = self.create_results().to_json() assert get_config().rre_cutoff == 25. new = 35. assert self.hits[0].score > new assert self.hits[1].score < new update_config({"rre_cutoff": new}) result = RREFinderResults.from_json(json, self.record) assert len(result.hits_by_cds) == 1 assert result.hits_by_cds[self.hits[0].locus_tag] == [self.hits[0]] assert len(result.hits_by_protocluster) == 1 assert result.hits_by_protocluster[1] == [self.hits[0].locus_tag]
def test_from_json_higher_min_length(self): json = self.create_results().to_json() assert get_config().rre_min_length == 50 new = 80 assert len(self.hits[0]) < new assert len(self.hits[1]) > new update_config({"rre_min_length": new}) results = RREFinderResults.from_json(json, self.record) assert len(results.hits_by_cds) == 1 assert results.hits_by_cds[self.hits[1].locus_tag] == [self.hits[1]] assert len(results.hits_by_protocluster) == 1 assert results.hits_by_protocluster[2] == [self.hits[1].locus_tag]
def setUp(self): self.options = update_config( get_simple_options(genefinding, [ '--taxon', 'fungi', '--genefinding-tool', 'glimmerhmm', '--cpus', '1' ])) self.data_location = get_full_path(__file__, "data")
def test_valid_args(self): # make sure args go through to the Namespace default object options = self.core_parser.parse_args(['--taxon', 'fungi']) assert options.taxon == 'fungi' # make sure they propagate to the Config singleton config = update_config(options) assert config.taxon == 'fungi'
def setUp(self): options = build_config( ["--minimal", "--enable-tta", "--tta-threshold", "0"], isolated=True, modules=antismash.get_all_modules()) self.old_config = get_config().__dict__ self.options = update_config(options)
def setUp(self): class DummyModule: def __init__(self): self.was_run = False def get_arguments(self): args = config.args.ModuleArgs("genefinding", "genefinding") args.add_option("gff3", default="", type=str, help="dummy", dest="gff3") return args def run_on_record(self, *_args, **_kwargs): self.was_run = True self.genefinding = DummyModule() options = config.build_config(["--cpus", "1"], isolated=True, modules=[self.genefinding]) config.update_config({"triggered_limit": False}) self.options = options
def setUp(self): options = build_config(self.get_args(), isolated=True, modules=get_all_modules()) self.old_config = get_config().__dict__ self.options = update_config(options) assert clusterblast.check_prereqs(self.options) == [] assert clusterblast.check_options(self.options) == [] assert clusterblast.is_enabled(self.options)
def setUp(self): self.format0_file = path.get_full_path(__file__, "data", "format0.dmnd") self.format1_file = path.get_full_path(__file__, "data", "format1.dmnd") self.empty = path.get_full_path(__file__, "data", "empty.dmnd") options = build_config([], isolated=True, modules=get_all_modules()) self.old_config = get_config().__dict__ self.options = update_config(options)
def test_namespace_initialisation(self): # test intialisation from namespace namespace = Namespace() namespace.taxon = 'fungi' config = update_config(namespace) assert config.taxon == 'fungi' # a new constructor should keep the value assert get_config().taxon == 'fungi'
def test_assignment_proofing(self): config = update_config({'taxon': 'fungi'}) assert config.taxon == 'fungi' # changing values in a Config object is invalid with self.assertRaises(RuntimeError): config.taxon = 'bacteria' # and verify it wasn't changed assert config.taxon == 'fungi'
def test_get(self): config = update_config({'a': 1, 'b': None}) # check attribute and get are the same assert config.a == config.get('a') # check default values function assert config.get('b', 3) is None # since b exists assert config.get('c') is None # since c doesn't assert config.get('c', 3) == 3 # now with default as 3
def test_long_names(self): record = self.read_nisin()[0] record.id = "A" * 16 record.name = record.id self.run_on_records([record]) assert record.id == record.name == "A" * 16 record.id = "A" * 17 record.name = record.id self.run_on_records([record]) assert len(record.id) <= 16 assert len(record.name) <= 16 config.update_config({"allow_long_headers": True}) record.id = "A" * 17 record.name = record.id self.run_on_records([record]) assert record.id == record.name == "A" * 17
def test_trees_complete(self): with TemporaryDirectory() as output_dir: args = [ "--minimal", "--enable-genefunctions", "--smcog-trees", "--output-dir", output_dir, helpers.get_path_to_nisin_genbank() ] options = build_config(args, isolated=True, modules=antismash.get_all_modules()) antismash.run_antismash(helpers.get_path_to_nisin_genbank(), options) with open(os.path.join(output_dir, "nisin.json")) as res_file: assert "antismash.modules.smcog_trees" in res_file.read() tree_files = list( glob.glob(os.path.join(output_dir, "smcogs", "*.png"))) assert len(tree_files) == 7 sample_tree = tree_files[0] # regen the results update_config( {"reuse_results": os.path.join(output_dir, "nisin.json")}) prior_results = read_data(None, options) record = prior_results.records[0] results = prior_results.results[0] tree_results = results["antismash.modules.smcog_trees"] smcogs_results = smcog_trees.regenerate_previous_results( tree_results, record, options) assert len(smcogs_results.tree_images) == 7 assert os.path.exists(sample_tree) os.unlink(sample_tree) assert not os.path.exists(sample_tree) # attempt to regen the results, the deleted tree image will prevent it prior_results = read_data(None, options) record = prior_results.records[0] results = prior_results.results[0] smcogs_results = smcog_trees.regenerate_previous_results( tree_results, record, options) assert smcogs_results is None
def test_mounted_at_runtime(self, _mocked_prep_known): options = config.update_config({ "database_dir": "/mounted_at_runtime", "executables": { "diamond": "/some/path" }, }) error = RuntimeError( "check_clusterblast_files called when it shouldn't have been") with patch.object(clusterblast, "check_clusterblast_files", side_effect=error): clusterblast.check_prereqs(options) options = config.update_config({"database_dir": "/path"}) with patch.object(clusterblast, "check_clusterblast_files", returns=[]) as check: clusterblast.check_prereqs(options) check.assert_called_once()
def setUp(self): self.old_config = get_config().__dict__ options = build_config(self.get_args(), isolated=True, modules=antismash.get_all_modules()) self.options = update_config(options) update_config({"cpus": 1}) # prevent multiprocess testing from taking place, to stop signals # being caught awkwardly in the test itself # as smcogs_trees depends on genefunctions.smcogs' data, ensure that's ready to go assert genefunctions.prepare_data() == [] assert smcog_trees.check_prereqs() == [] assert smcog_trees.check_options(self.options) == [] assert smcog_trees.is_enabled(self.options) self.record = self.build_record( helpers.get_path_to_nisin_with_detection())
def test_nisin_fasta_only(self): config.update_config({"genefinding_tool": "none"}) filepath = path.get_full_path(__file__, "data", "nisin.fasta") records = record_processing.parse_input_sequence(filepath) assert len(records) == 1 assert not records[0].get_cds_features() # make sure genefinding wasn't run with default options record_processing.pre_process_sequences(records, self.options, self.genefinding) assert not self.genefinding.was_run assert not records[0].get_cds_features() # make sure genefinding was run when not 'none' records[0].skip = False config.update_config({"genefinding_tool": "not-none"}) record_processing.pre_process_sequences(records, self.options, self.genefinding) assert self.genefinding.was_run # still no features because we used dummy genefinding assert not records[0].get_cds_features()