def import_module_and_submodules(package_name: str) -> None: """ Import all submodules under the given package. Primarily useful so that people using AllenNLP as a library can specify their own custom packages and have their custom classes get loaded and registered. """ importlib.invalidate_caches() # For some reason, python doesn't always add this by default to your path, but you pretty much # always want it when using `--include-package`. And if it's already there, adding it again at # the end won't hurt anything. with push_python_path(root): # Import at top level module = importlib.import_module(package_name) path = getattr(module, "__path__", []) path_string = "" if not path else path[0] # walk_packages only finds immediate children, so need to recurse. for module_finder, name, _ in pkgutil.walk_packages(path): # Sometimes when you import third-party libraries that are on your path, # `pkgutil.walk_packages` returns those too, so we need to skip them. if path_string and module_finder.path != path_string: continue subpackage = f"{package_name}.{name}" import_module_and_submodules(subpackage)
def test_other_modules(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / "testpackage" packagedir.mkdir() (packagedir / "__init__.py").touch() # And add that directory to the path with push_python_path(self.TEST_DIR): # Write out a duplicate predictor there, but registered under a different name. from allennlp.predictors import text_classifier with open(text_classifier.__file__) as f: code = f.read().replace( """@Predictor.register("text_classifier")""", """@Predictor.register("duplicate-test-predictor")""", ) with open(os.path.join(packagedir, "predictor.py"), "w") as f: f.write(code) self.infile = os.path.join(self.TEST_DIR, "inputs.txt") self.outfile = os.path.join(self.TEST_DIR, "outputs.txt") with open(self.infile, "w") as f: f.write( """{"sentence": "the seahawks won the super bowl in 2016"}\n""" ) f.write( """{"sentence": "the mariners won the super bowl in 2037"}\n""" ) sys.argv = [ "__main__.py", # executable "predict", # command str(self.classifier_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--predictor", "duplicate-test-predictor", "--silent", ] # Should raise ConfigurationError, because predictor is unknown with pytest.raises(ConfigurationError): main() # But once we include testpackage, it should be known sys.argv.extend(["--include-package", "testpackage"]) main() assert os.path.exists(self.outfile) with open(self.outfile, "r") as f: results = [json.loads(line) for line in f] assert len(results) == 2 # Overridden predictor should output extra field for result in results: assert set(result.keys()) == {"label", "logits", "probs"}
def discover_plugins() -> Iterable[str]: """ Returns an iterable of the plugins found. """ with push_python_path("."): for module_info in discover_namespace_plugins(): yield module_info.name yield from discover_file_plugins()
def push_python_project(path: PathType) -> ContextManagerFunctionReturnType[None]: """ Changes the current directory to the given path and prepends it to `sys.path`. It simulates the behavior of running a command from a Python's project root directory, which is part of Python's path. This method is intended to use with `with`, so after its usage, the current directory will be set to the previous value and its value removed from `sys.path`. """ with pushd(path), push_python_path("."): yield
def test_import_submodules(self): (self.TEST_DIR / "mymodule").mkdir() (self.TEST_DIR / "mymodule" / "__init__.py").touch() (self.TEST_DIR / "mymodule" / "submodule").mkdir() (self.TEST_DIR / "mymodule" / "submodule" / "__init__.py").touch() (self.TEST_DIR / "mymodule" / "submodule" / "subsubmodule.py").touch() with push_python_path(self.TEST_DIR): assert "mymodule" not in sys.modules assert "mymodule.submodule" not in sys.modules util.import_module_and_submodules("mymodule") assert "mymodule" in sys.modules assert "mymodule.submodule" in sys.modules assert "mymodule.submodule.subsubmodule" in sys.modules
def discover_plugins() -> Iterable[str]: """ Returns an iterable of the plugins found. """ plugins: Set[str] = set() if os.path.isfile(LOCAL_PLUGINS_FILENAME): with push_python_path("."): for plugin in discover_file_plugins(LOCAL_PLUGINS_FILENAME): if plugin in plugins: continue yield plugin plugins.add(plugin) if os.path.isfile(GLOBAL_PLUGINS_FILENAME): for plugin in discover_file_plugins(GLOBAL_PLUGINS_FILENAME): if plugin in plugins: continue yield plugin plugins.add(plugin)
def test_implicit_include_package(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / "testpackage" packagedir.mkdir() (packagedir / "__init__.py").touch() # And add that directory to the path with push_python_path(self.TEST_DIR): # Write out a duplicate dataset reader there, but registered under a different name. reader = DatasetReader.by_name("text_classification_json") with open(inspect.getabsfile(reader)) as f: code = f.read().replace( """@DatasetReader.register("text_classification_json")""", """@DatasetReader.register("text_classification_json-fake")""", ) with open(os.path.join(packagedir, "reader.py"), "w") as f: f.write(code) # Fails to import by registered name with pytest.raises(ConfigurationError) as exc: DatasetReader.by_name("text_classification_json-fake") assert "is not a registered name" in str(exc.value) # Fails to import with wrong module name with pytest.raises(ConfigurationError) as exc: DatasetReader.by_name( "testpackage.text_classification_json.TextClassificationJsonReader" ) assert "unable to import module" in str(exc.value) # Fails to import with wrong class name with pytest.raises(ConfigurationError): DatasetReader.by_name("testpackage.reader.FakeReader") assert "unable to find class" in str(exc.value) # Imports successfully with right fully qualified name duplicate_reader = DatasetReader.by_name( "testpackage.reader.TextClassificationJsonReader" ) assert duplicate_reader.__name__ == "TextClassificationJsonReader"
def discover_plugins() -> Iterable[str]: """ Returns an iterable of the plugins found. """ with push_python_path("."): yield from discover_file_plugins()
def test_other_modules(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / "testpackage" packagedir.mkdir() (packagedir / "__init__.py").touch() # And add that directory to the path with push_python_path(self.TEST_DIR): # Write out a duplicate model there, but registered under a different name. from allennlp.models import simple_tagger with open(simple_tagger.__file__) as model_file: code = model_file.read().replace( """@Model.register("simple_tagger")""", """@Model.register("duplicate-test-tagger")""", ) with open(packagedir / "model.py", "w") as new_model_file: new_model_file.write(code) # Copy fixture there too. shutil.copy(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv", self.TEST_DIR) data_path = str(self.TEST_DIR / "sequence_tagging.tsv") # Write out config file config_path = self.TEST_DIR / "config.json" config_json = """{ "model": { "type": "duplicate-test-tagger", "text_field_embedder": { "token_embedders": { "tokens": { "type": "embedding", "embedding_dim": 5 } } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }, "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": "$$$", "validation_data_path": "$$$", "iterator": {"type": "basic", "batch_size": 2}, "trainer": { "num_epochs": 2, "optimizer": "adam" } }""".replace( "$$$", data_path ) with open(config_path, "w") as config_file: config_file.write(config_json) serialization_dir = self.TEST_DIR / "serialization" # Run train with using the non-allennlp module. sys.argv = ["allennlp", "train", str(config_path), "-s", str(serialization_dir)] # Shouldn't be able to find the model. with pytest.raises(ConfigurationError): main() # Now add the --include-package flag and it should work. # We also need to add --recover since the output directory already exists. sys.argv.extend(["--recover", "--include-package", "testpackage"]) main() # Rewrite out config file, but change a value. with open(config_path, "w") as new_config_file: new_config_file.write(config_json.replace('"num_epochs": 2,', '"num_epochs": 4,')) # This should fail because the config.json does not match that in the serialization directory. with pytest.raises(ConfigurationError): main()