def test_works_with_known_model(self): with open(self.infile, 'w') as f: f.write("""{"passage": "the seahawks won the super bowl in 2016", """ """ "question": "when did the seahawks win the super bowl?"}\n""") f.write("""{"passage": "the mariners won the super bowl in 2037", """ """ "question": "when did the mariners win the super bowl?"}\n""") sys.argv = ["run.py", # executable "predict", # command str(self.bidaf_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--silent"] main() assert os.path.exists(self.outfile) with open(self.outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 2 for result in results: assert set(result.keys()) == {"span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str"} shutil.rmtree(self.tempdir)
def test_using_dataset_reader_works_with_known_model(self): sys.argv = ["run.py", # executable "predict", # command str(self.bidaf_model_path), str(self.bidaf_data_path), # input_file "--output-file", str(self.outfile), "--silent", "--use-dataset-reader"] main() assert os.path.exists(self.outfile) with open(self.outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 5 for result in results: assert set(result.keys()) == {"span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str", "loss"} shutil.rmtree(self.tempdir)
def test_other_modules(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / 'testpackage' packagedir.mkdir() # pylint: disable=no-member (packagedir / '__init__.py').touch() # pylint: disable=no-member # And add that directory to the path sys.path.insert(0, str(self.TEST_DIR)) # Write out a duplicate predictor there, but registered under a different name. from srl_model.predictors import bidaf with open(bidaf.__file__) as f: code = f.read().replace("""@Predictor.register('machine-comprehension')""", """@Predictor.register('duplicate-test-predictor')""") with open(os.path.join(packagedir, 'predictor.py'), 'w') as f: f.write(code) self.infile = os.path.join(self.TEST_DIR, "inputs.txt") self.outfile = os.path.join(self.TEST_DIR, "outputs.txt") with open(self.infile, 'w') as f: f.write("""{"passage": "the seahawks won the super bowl in 2016", """ """ "question": "when did the seahawks win the super bowl?"}\n""") f.write("""{"passage": "the mariners won the super bowl in 2037", """ """ "question": "when did the mariners win the super bowl?"}\n""") sys.argv = ["run.py", # executable "predict", # command str(self.bidaf_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--predictor", "duplicate-test-predictor", "--silent"] # Should raise ConfigurationError, because predictor is unknown with pytest.raises(ConfigurationError): main() # But once we include testpackage, it should be known sys.argv.extend(["--include-package", "testpackage"]) main() assert os.path.exists(self.outfile) with open(self.outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 2 # Overridden predictor should output extra field for result in results: assert set(result.keys()) == {"span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str"} sys.path.remove(str(self.TEST_DIR))
def test_fails_without_required_args(self): sys.argv = ["run.py", # executable "predict", # command "/path/to/archive", # archive, but no input file ] with self.assertRaises(SystemExit) as cm: # pylint: disable=invalid-name main() assert cm.exception.code == 2 # argparse code for incorrect usage
def test_alternative_file_formats(self): @Predictor.register('bidaf-csv') # pylint: disable=unused-variable class BidafCsvPredictor(BidafPredictor): """same as bidaf predictor but using CSV inputs and outputs""" def load_line(self, line: str) -> JsonDict: reader = csv.reader([line]) passage, question = next(reader) return {"passage": passage, "question": question} def dump_line(self, outputs: JsonDict) -> str: output = io.StringIO() writer = csv.writer(output) row = [outputs["span_start_probs"][0], outputs["span_end_probs"][0], *outputs["best_span"], outputs["best_span_str"]] writer.writerow(row) return output.getvalue() with open(self.infile, 'w') as f: writer = csv.writer(f) writer.writerow(["the seahawks won the super bowl in 2016", "when did the seahawks win the super bowl?"]) writer.writerow(["the mariners won the super bowl in 2037", "when did the mariners win the super bowl?"]) sys.argv = ["run.py", # executable "predict", # command str(self.bidaf_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--predictor", 'bidaf-csv', "--silent"] main() assert os.path.exists(self.outfile) with open(self.outfile, 'r') as f: reader = csv.reader(f) results = [row for row in reader] assert len(results) == 2 for row in results: assert len(row) == 5 start_prob, end_prob, span_start, span_end, span = row for prob in (start_prob, end_prob): assert 0 <= float(prob) <= 1 assert 0 <= int(span_start) <= int(span_end) <= 8 assert span != '' shutil.rmtree(self.tempdir)
def test_fails_on_unknown_command(self): sys.argv = [ "bogus", # command "unknown_model", # model_name "bogus file", # input_file "--output-file", "bogus out file", "--silent" ] with self.assertRaises(SystemExit) as cm: # pylint: disable=invalid-name main() assert cm.exception.code == 2 # argparse code for incorrect usage
def test_can_specify_predictor(self): @Predictor.register('bidaf-explicit') # pylint: disable=unused-variable class Bidaf3Predictor(BidafPredictor): """same as bidaf predictor but with an extra field""" def predict_json(self, inputs: JsonDict) -> JsonDict: result = super().predict_json(inputs) result["explicit"] = True return result with open(self.infile, 'w') as f: f.write("""{"passage": "the seahawks won the super bowl in 2016", """ """ "question": "when did the seahawks win the super bowl?"}\n""") f.write("""{"passage": "the mariners won the super bowl in 2037", """ """ "question": "when did the mariners win the super bowl?"}\n""") sys.argv = ["run.py", # executable "predict", # command str(self.bidaf_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--predictor", "bidaf-explicit", "--silent"] main() assert os.path.exists(self.outfile) with open(self.outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 2 # Overridden predictor should output extra field for result in results: assert set(result.keys()) == {"span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str", "explicit"} shutil.rmtree(self.tempdir)
def test_subcommand_overrides(self): def do_nothing(_): pass class FakeEvaluate(Subcommand): add_subparser_called = False def add_subparser(self, name, parser): subparser = parser.add_parser(name, description="fake", help="fake help") subparser.set_defaults(func=do_nothing) self.add_subparser_called = True return subparser fake_evaluate = FakeEvaluate() sys.argv = ["allennlp.run", "evaluate"] main(subcommand_overrides={"evaluate": fake_evaluate}) assert fake_evaluate.add_subparser_called
def test_other_modules(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / 'configuretestpackage' packagedir.mkdir() # pylint: disable=no-member (packagedir / '__init__.py').touch() # pylint: disable=no-member # And add that directory to the path sys.path.insert(0, str(self.TEST_DIR)) # Write out a duplicate predictor there, but registered under a different name. from srl_model.predictors import bidaf with open(bidaf.__file__) as f: code = f.read().replace( """@Predictor.register('machine-comprehension')""", """@Predictor.register('configure-test-predictor')""") with open(os.path.join(packagedir, 'predictor.py'), 'w') as f: f.write(code) # Capture stdout stdout_saved = sys.stdout stdout_captured = StringIO() sys.stdout = stdout_captured sys.argv = [ "run.py", # executable "configure", # command "configuretestpackage.predictor.BidafPredictor" ] main() output = stdout_captured.getvalue() assert "configure-test-predictor" in output sys.stdout = stdout_saved sys.path.remove(str(self.TEST_DIR))
def test_other_modules(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / 'testpackage' packagedir.mkdir() # pylint: disable=no-member (packagedir / '__init__.py').touch() # pylint: disable=no-member # And add that directory to the path sys.path.insert(0, str(self.TEST_DIR)) # Write out a duplicate model there, but registered under a different name. from srl_model.models import simple_tagger with open(simple_tagger.__file__) as model_file: code = model_file.read().replace( """@Model.register("simple_tagger")""", """@Model.register("duplicate-test-tagger")""") with open(packagedir / 'model.py', 'w') as new_model_file: new_model_file.write(code) # Copy fixture there too. shutil.copy(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv', self.TEST_DIR) data_path = str(self.TEST_DIR / 'sequence_tagging.tsv') # Write out config file config_path = self.TEST_DIR / 'config.json' config_json = """{ "model": { "type": "duplicate-test-tagger", "text_field_embedder": { "tokens": { "type": "embedding", "embedding_dim": 5 } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }, "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": "$$$", "validation_data_path": "$$$", "iterator": {"type": "basic", "batch_size": 2}, "trainer": { "num_epochs": 2, "optimizer": "adam" } }""".replace('$$$', data_path) with open(config_path, 'w') as config_file: config_file.write(config_json) serialization_dir = self.TEST_DIR / 'serialization' # Run train with using the non-allennlp module. sys.argv = [ "bin/allennlp", "train", str(config_path), "-s", str(serialization_dir) ] # Shouldn't be able to find the model. with pytest.raises(ConfigurationError): main() # Now add the --include-package flag and it should work. # We also need to add --recover since the output directory already exists. sys.argv.extend(["--recover", "--include-package", 'testpackage']) main() # Rewrite out config file, but change a value. with open(config_path, 'w') as new_config_file: new_config_file.write( config_json.replace('"num_epochs": 2,', '"num_epochs": 4,')) # This should fail because the config.json does not match that in the serialization directory. with pytest.raises(ConfigurationError): main() sys.path.remove(str(self.TEST_DIR))
#!/usr/bin/env python import logging import os import sys if os.environ.get("ALLENNLP_DEBUG"): LEVEL = logging.DEBUG else: LEVEL = logging.INFO sys.path.insert( 0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir)))) logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', level=LEVEL) from srl_model.commands import main # pylint: disable=wrong-import-position if __name__ == "__main__": main(prog="allennlp")