def setUp(self):
        self.config = build_config(["--cf-create-clusters",
                                    "--cf-mean-threshold", "0.6",
                                    "--cf-min-cds", "5",
                                    "--cf-min-pfams", "5"], modules=[clusterfinder],
                                   isolated=True)
        update_config({"enabled_cluster_types": []})

        self.record = DummyRecord(seq=Seq("A" * 2000))
        for start, end, probability, pfam_id in [(10, 20, 0.1, 'PF77777'),
                                                 (30, 40, 0.3, 'PF00106'),
                                                 (50, 60, 0.4, 'PF00107'),
                                                 (60, 70, 0.7, 'PF00109'),
                                                 (70, 80, 0.98, 'PF08484'),
                                                 (90, 100, 0.8, 'PF02401'),
                                                 (100, 110, 0.32, 'PF04369'),
                                                 (110, 120, 1.0, 'PF00128'),
                                                 (130, 140, 0.2, 'PF77776'),
                                                 (500, 505, None, 'PF77775'),
                                                 (1010, 1020, 0.1, 'PF77774'),
                                                 (1030, 1040, 0.3, 'PF00106'),
                                                 (1050, 1060, 0.4, 'PF00107'),
                                                 (1060, 1070, 0.7, 'PF00109'),
                                                 (1070, 1080, 0.98, 'PF08484'),
                                                 (1090, 1100, 0.8, 'PF02401'),
                                                 (1100, 1110, 0.32, 'PF04369'),
                                                 (1110, 1120, 1.0, 'PF00128')]:
            location = FeatureLocation(start, end, strand=1)
            self.record.add_cds_feature(CDSFeature(location, locus_tag=str(start), translation="A"))
            pfam = PFAMDomain(location, "dummy_description", protein_start=start + 1,
                              protein_end=end-1, identifier=pfam_id, tool="test")
            pfam.domain_id = "pfam_%d" % start
            pfam.probability = probability
            self.record.add_pfam_domain(pfam)
Example #2
0
def prepare_output_directory(name: str, input_file: str) -> None:
    """ Ensure the ouptut directory exists and is usable

        Raises an exception if the directory is unusable,
        or if results not being reused and directory not empty

        Arguments:
            name: the path of the directory
            input_file: the path of the input file

        Returns:
            None
    """
    # if not supplied, set the output directory to be the sequence name
    input_prefix = os.path.splitext(os.path.basename(input_file))[0]
    if not name:
        name = os.path.abspath(input_prefix)
        update_config({"output_dir": name})

    if os.path.exists(name):
        if not os.path.isdir(name):
            raise RuntimeError("Output directory %s exists and is not a directory" % name)
        # not empty (apart from a possible input dir), and not reusing its results
        if not input_file.endswith(".json") and \
                list(filter(_ignore_patterns, glob.glob(os.path.join(name, "*")))):
            raise RuntimeError("Output directory contains other files, aborting for safety")
        else:  # --reuse
            logging.debug("Removing existing region genbank files")
            for genbank in glob.glob(os.path.join(name, "*.region???.gbk")):
                os.remove(genbank)
        logging.debug("Reusing output directory: %s", name)
    else:
        logging.debug("Creating output directory: %s", name)
        os.mkdir(name)
Example #3
0
    def test_nisin_minimal(self):
        # make sure the output directory isn't filled
        out_dir = self.default_options.output_dir
        assert not list(glob.glob(os.path.join(out_dir, "*")))

        # die with neither inputs provided
        with self.assertRaisesRegex(
                ValueError, "No sequence file or prior results to read"):
            run_antismash(None, self.default_options)

        # make sure no files created
        assert not list(glob.glob(os.path.join(out_dir, "*")))

        # do a normal run
        run_antismash(get_path_to_nisin_genbank(), self.default_options)
        self.check_output_files()

        # remove html file and make sure it's recreated
        os.unlink(os.path.join(self.default_options.output_dir, "index.html"))
        update_config({
            "reuse_results":
            os.path.join(self.default_options.output_dir, "nisin.json")
        })
        run_antismash(None, self.default_options)
        self.check_output_files()
Example #4
0
    def test_nisin_fasta_only(self):
        config.update_config({"genefinding_tool": "none"})
        filepath = path.get_full_path(__file__, "data", "nisin.fasta")
        records = record_processing.parse_input_sequence(filepath)
        assert len(records) == 1
        assert not records[0].get_cds_features()
        # make sure genefinding wasn't run with default options
        with self.assertRaisesRegex(AntismashInputError,
                                    "all records skipped"):
            record_processing.pre_process_sequences(records, self.options,
                                                    self.genefinding)
        assert not self.genefinding.was_run
        assert not records[0].get_cds_features()

        # make sure genefinding was run when not 'none'
        records[0].skip = False
        config.update_config({"genefinding_tool": "not-none"})
        # due to no genes actually being marked, it'll raise an error
        with self.assertRaisesRegex(AntismashInputError,
                                    "all records skipped"):
            record_processing.pre_process_sequences(records, self.options,
                                                    self.genefinding)
        # but genefinding was still run
        assert self.genefinding.was_run
        # still no features because we used dummy genefinding
        for record in records:
            assert not record.get_cds_features()
            assert record.skip.lower() == "no genes found"
Example #5
0
    def test_nisin_complete(self):
        with TemporaryDirectory() as output_dir:
            args = [
                "--minimal", "--enable-tta", "--tta-threshold", "0",
                "--output-dir", output_dir,
                helpers.get_path_to_nisin_genbank()
            ]
            options = build_config(args,
                                   isolated=True,
                                   modules=antismash.get_all_modules())
            antismash.run_antismash(helpers.get_path_to_nisin_genbank(),
                                    options)

            # regen the results
            update_config(
                {"reuse_results": os.path.join(output_dir, "nisin.json")})
            prior_results = read_data(None, options)
            record = prior_results.records[0]
            results = prior_results.results[0]
            tta_results = tta.regenerate_previous_results(
                results.get("antismash.modules.tta"), record, options)
            assert isinstance(tta_results, tta.TTAResults)
            assert len(tta_results.features) == 174

            # raise the threshold above the gc_content and ensure regenned has no hits
            update_config({"tta_threshold": 0.65})
            tta_results = tta.regenerate_previous_results(
                results.get("antismash.modules.tta"), record, options)
            assert isinstance(tta_results, tta.TTAResults)
            assert not tta_results.features
    def setUp(self):
        self.config = build_config([
            "--cf-create-clusters", "--cf-mean-threshold", "0.6",
            "--cf-min-cds", "5", "--cf-min-pfams", "5"
        ],
                                   modules=[clusterfinder],
                                   isolated=True)
        update_config({"enabled_cluster_types": []})

        self.record = DummyRecord(seq=Seq("A" * 2000))
        for start, end, probability, pfam_id in [(10, 20, 0.1, 'FAKE007'),
                                                 (30, 40, 0.3, 'PF00106'),
                                                 (50, 60, 0.4, 'PF00107'),
                                                 (60, 70, 0.7, 'PF00109'),
                                                 (70, 80, 0.98, 'PF08484'),
                                                 (90, 100, 0.8, 'PF02401'),
                                                 (100, 110, 0.32, 'PF04369'),
                                                 (110, 120, 1.0, 'PF00128'),
                                                 (130, 140, 0.2, 'FAKE234'),
                                                 (500, 505, None, 'FAKE505'),
                                                 (1010, 1020, 0.1, 'FAKE007'),
                                                 (1030, 1040, 0.3, 'PF00106'),
                                                 (1050, 1060, 0.4, 'PF00107'),
                                                 (1060, 1070, 0.7, 'PF00109'),
                                                 (1070, 1080, 0.98, 'PF08484'),
                                                 (1090, 1100, 0.8, 'PF02401'),
                                                 (1100, 1110, 0.32, 'PF04369'),
                                                 (1110, 1120, 1.0, 'PF00128')]:
            location = FeatureLocation(start, end)
            self.record.add_cds_feature(
                CDSFeature(location, locus_tag=str(start)))
            pfam = PFAMDomain(location, "dummy_description")
            pfam.db_xref.append(pfam_id)
            pfam.probability = probability
            self.record.add_pfam_domain(pfam)
Example #7
0
 def test_limit_to_record_complete(self):
     records = self.read_double_nisin()
     config.update_config({"limit_to_record": "bad_id"})
     with self.assertRaisesRegex(AntismashInputError,
                                 "no sequences matched filter"):
         record_processing.pre_process_sequences(records, self.options,
                                                 self.genefinding)
Example #8
0
def prepare_output_directory(name: str, input_file: str) -> None:
    """ Ensure the ouptut directory exists and is usable

        Raises an exception if the directory is unusable

        Arguments:
            name: the path of the directory
            input_file: the path of the input file

        Returns:
            None
    """
    # if not supplied, set the output directory to be the sequence name
    if not name:
        name = os.path.abspath(
            os.path.splitext(os.path.basename(input_file))[0])
        update_config({"output_dir": name})

    if os.path.exists(name):
        if not os.path.isdir(name):
            raise RuntimeError(
                "Output directory %s exists and is not a directory" % name)
        logging.debug("Reusing output directory: %s", name)
    else:
        logging.debug("Creating output directory: %s", name)
        os.mkdir(name)
 def test_check_prereqs_missing_executables(self):
     options = build_config(["--check-prereqs"], isolated=True, modules=get_all_modules())
     update_config({"executables": Namespace()})
     mock("antismash.config.get_config", returns=options)
     assert hasattr(get_config(), "executables")
     assert not get_config().executables.__dict__
     with self.assertRaisesRegex(RuntimeError, "failing prereq"):
         antismash.main.check_prerequisites(get_all_modules(), options)
 def test_limit_to_record_partial(self):
     records = self.read_double_nisin()
     assert all(rec.skip is None for rec in records)
     config.update_config({"limit_to_record": records[0].id})
     records[0].id += "_changed"
     record_processing.pre_process_sequences(records, self.options, self.genefinding)
     assert not records[1].skip
     assert records[0].skip.startswith("did not match filter")
 def test_nisin_fasta_gff(self):
     fasta = path.get_full_path(__file__, "data", "nisin.fasta")
     gff = path.get_full_path(__file__, "data", "nisin.gff3")
     config.update_config({"genefinding_gff3": gff})
     records = record_processing.parse_input_sequence(fasta, gff_file=gff)
     record_processing.pre_process_sequences(records, self.options, self.genefinding)
     assert not self.genefinding.was_run
     assert len(records[0].get_cds_features()) == 11
 def test_limit(self):
     records = self.read_double_nisin()
     assert all(rec.skip is None for rec in records)
     assert not self.options.triggered_limit
     config.update_config({"limit": 1})
     record_processing.pre_process_sequences(records, self.options, self.genefinding)
     assert records[0].skip is None
     assert records[1].skip.startswith("skipping all but first 1")
     assert self.options.triggered_limit
Example #13
0
 def run_antismash(self, filename, expected):
     with TemporaryDirectory() as output_dir:
         update_config({"output_dir": output_dir})
         results = helpers.run_and_regenerate_results_for_module(filename, clusterblast, self.options)
         update_config({"output_dir": ""})
         results, global_results = self.get_results(results)
         assert len(results.region_results) == 1
         cluster = results.region_results[0]
         assert len(cluster.ranking) == expected  # will change if database does
         self.check_svgs(global_results, expected, output_dir)
     return results
Example #14
0
def canonical_base_filename(input_file: str, directory: str, options: ConfigType) -> str:
    """Generate a canonical base filename if one isn't specified in the options."""
    if options.output_basename:
        base_filename = options.output_basename
    else:
        base_filename, ext = os.path.splitext(os.path.basename(input_file))
        if ext.lower() in (".gz", ".bz", ".xz"):
            base_filename, _ = os.path.splitext(base_filename)
        update_config({"output_basename": base_filename})

    return os.path.join(directory, base_filename)
Example #15
0
 def test_from_json_higher_bitscore(self):
     json = self.create_results().to_json()
     assert get_config().rre_cutoff == 25.
     new = 35.
     assert self.hits[0].score > new
     assert self.hits[1].score < new
     update_config({"rre_cutoff": new})
     result = RREFinderResults.from_json(json, self.record)
     assert len(result.hits_by_cds) == 1
     assert result.hits_by_cds[self.hits[0].locus_tag] == [self.hits[0]]
     assert len(result.hits_by_protocluster) == 1
     assert result.hits_by_protocluster[1] == [self.hits[0].locus_tag]
Example #16
0
 def test_from_json_higher_min_length(self):
     json = self.create_results().to_json()
     assert get_config().rre_min_length == 50
     new = 80
     assert len(self.hits[0]) < new
     assert len(self.hits[1]) > new
     update_config({"rre_min_length": new})
     results = RREFinderResults.from_json(json, self.record)
     assert len(results.hits_by_cds) == 1
     assert results.hits_by_cds[self.hits[1].locus_tag] == [self.hits[1]]
     assert len(results.hits_by_protocluster) == 1
     assert results.hits_by_protocluster[2] == [self.hits[1].locus_tag]
 def setUp(self):
     self.options = update_config(
         get_simple_options(genefinding, [
             '--taxon', 'fungi', '--genefinding-tool', 'glimmerhmm',
             '--cpus', '1'
         ]))
     self.data_location = get_full_path(__file__, "data")
Example #18
0
 def test_valid_args(self):
     # make sure args go through to the Namespace default object
     options = self.core_parser.parse_args(['--taxon', 'fungi'])
     assert options.taxon == 'fungi'
     # make sure they propagate to the Config singleton
     config = update_config(options)
     assert config.taxon == 'fungi'
Example #19
0
 def setUp(self):
     options = build_config(
         ["--minimal", "--enable-tta", "--tta-threshold", "0"],
         isolated=True,
         modules=antismash.get_all_modules())
     self.old_config = get_config().__dict__
     self.options = update_config(options)
    def setUp(self):
        class DummyModule:
            def __init__(self):
                self.was_run = False

            def get_arguments(self):
                args = config.args.ModuleArgs("genefinding", "genefinding")
                args.add_option("gff3", default="", type=str, help="dummy", dest="gff3")
                return args

            def run_on_record(self, *_args, **_kwargs):
                self.was_run = True
        self.genefinding = DummyModule()

        options = config.build_config(["--cpus", "1"], isolated=True, modules=[self.genefinding])
        config.update_config({"triggered_limit": False})
        self.options = options
Example #21
0
    def setUp(self):
        options = build_config(self.get_args(), isolated=True, modules=get_all_modules())
        self.old_config = get_config().__dict__
        self.options = update_config(options)

        assert clusterblast.check_prereqs(self.options) == []
        assert clusterblast.check_options(self.options) == []
        assert clusterblast.is_enabled(self.options)
Example #22
0
    def setUp(self):
        self.format0_file = path.get_full_path(__file__, "data", "format0.dmnd")
        self.format1_file = path.get_full_path(__file__, "data", "format1.dmnd")
        self.empty = path.get_full_path(__file__, "data", "empty.dmnd")

        options = build_config([], isolated=True, modules=get_all_modules())
        self.old_config = get_config().__dict__
        self.options = update_config(options)
Example #23
0
 def test_namespace_initialisation(self):
     # test intialisation from namespace
     namespace = Namespace()
     namespace.taxon = 'fungi'
     config = update_config(namespace)
     assert config.taxon == 'fungi'
     # a new constructor should keep the value
     assert get_config().taxon == 'fungi'
Example #24
0
 def test_assignment_proofing(self):
     config = update_config({'taxon': 'fungi'})
     assert config.taxon == 'fungi'
     # changing values in a Config object is invalid
     with self.assertRaises(RuntimeError):
         config.taxon = 'bacteria'
     # and verify it wasn't changed
     assert config.taxon == 'fungi'
Example #25
0
 def test_get(self):
     config = update_config({'a': 1, 'b': None})
     # check attribute and get are the same
     assert config.a == config.get('a')
     # check default values function
     assert config.get('b', 3) is None  # since b exists
     assert config.get('c') is None  # since c doesn't
     assert config.get('c', 3) == 3  # now with default as 3
Example #26
0
    def test_long_names(self):
        record = self.read_nisin()[0]
        record.id = "A" * 16
        record.name = record.id
        self.run_on_records([record])
        assert record.id == record.name == "A" * 16

        record.id = "A" * 17
        record.name = record.id
        self.run_on_records([record])
        assert len(record.id) <= 16
        assert len(record.name) <= 16

        config.update_config({"allow_long_headers": True})
        record.id = "A" * 17
        record.name = record.id
        self.run_on_records([record])
        assert record.id == record.name == "A" * 17
Example #27
0
    def test_trees_complete(self):
        with TemporaryDirectory() as output_dir:
            args = [
                "--minimal", "--enable-genefunctions", "--smcog-trees",
                "--output-dir", output_dir,
                helpers.get_path_to_nisin_genbank()
            ]
            options = build_config(args,
                                   isolated=True,
                                   modules=antismash.get_all_modules())
            antismash.run_antismash(helpers.get_path_to_nisin_genbank(),
                                    options)

            with open(os.path.join(output_dir, "nisin.json")) as res_file:
                assert "antismash.modules.smcog_trees" in res_file.read()

            tree_files = list(
                glob.glob(os.path.join(output_dir, "smcogs", "*.png")))
            assert len(tree_files) == 7
            sample_tree = tree_files[0]

            # regen the results
            update_config(
                {"reuse_results": os.path.join(output_dir, "nisin.json")})
            prior_results = read_data(None, options)
            record = prior_results.records[0]
            results = prior_results.results[0]
            tree_results = results["antismash.modules.smcog_trees"]

            smcogs_results = smcog_trees.regenerate_previous_results(
                tree_results, record, options)
            assert len(smcogs_results.tree_images) == 7
            assert os.path.exists(sample_tree)

            os.unlink(sample_tree)
            assert not os.path.exists(sample_tree)

            # attempt to regen the results, the deleted tree image will prevent it
            prior_results = read_data(None, options)
            record = prior_results.records[0]
            results = prior_results.results[0]
            smcogs_results = smcog_trees.regenerate_previous_results(
                tree_results, record, options)
            assert smcogs_results is None
Example #28
0
    def test_mounted_at_runtime(self, _mocked_prep_known):
        options = config.update_config({
            "database_dir": "/mounted_at_runtime",
            "executables": {
                "diamond": "/some/path"
            },
        })
        error = RuntimeError(
            "check_clusterblast_files called when it shouldn't have been")
        with patch.object(clusterblast,
                          "check_clusterblast_files",
                          side_effect=error):
            clusterblast.check_prereqs(options)

        options = config.update_config({"database_dir": "/path"})
        with patch.object(clusterblast, "check_clusterblast_files",
                          returns=[]) as check:
            clusterblast.check_prereqs(options)
            check.assert_called_once()
Example #29
0
    def setUp(self):
        self.old_config = get_config().__dict__
        options = build_config(self.get_args(),
                               isolated=True,
                               modules=antismash.get_all_modules())
        self.options = update_config(options)
        update_config({"cpus": 1})
        # prevent multiprocess testing from taking place, to stop signals
        # being caught awkwardly in the test itself

        # as smcogs_trees depends on genefunctions.smcogs' data, ensure that's ready to go
        assert genefunctions.prepare_data() == []

        assert smcog_trees.check_prereqs() == []
        assert smcog_trees.check_options(self.options) == []
        assert smcog_trees.is_enabled(self.options)

        self.record = self.build_record(
            helpers.get_path_to_nisin_with_detection())
    def test_nisin_fasta_only(self):
        config.update_config({"genefinding_tool": "none"})
        filepath = path.get_full_path(__file__, "data", "nisin.fasta")
        records = record_processing.parse_input_sequence(filepath)
        assert len(records) == 1
        assert not records[0].get_cds_features()
        # make sure genefinding wasn't run with default options
        record_processing.pre_process_sequences(records, self.options,
                                                self.genefinding)
        assert not self.genefinding.was_run
        assert not records[0].get_cds_features()

        # make sure genefinding was run when not 'none'
        records[0].skip = False
        config.update_config({"genefinding_tool": "not-none"})
        record_processing.pre_process_sequences(records, self.options,
                                                self.genefinding)
        assert self.genefinding.was_run
        # still no features because we used dummy genefinding
        assert not records[0].get_cds_features()