def test_works_with_known_model(self): with open(self.infile, "w") as f: f.write( """{"sentence": "the seahawks won the super bowl in 2016"}\n""" ) f.write( """{"sentence": "the mariners won the super bowl in 2037"}\n""" ) sys.argv = [ "__main__.py", # executable "predict", # command str(self.classifier_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--silent", ] main() assert os.path.exists(self.outfile) with open(self.outfile, "r") as f: results = [json.loads(line) for line in f] assert len(results) == 2 for result in results: assert set(result.keys()) == {"label", "logits", "probs"} shutil.rmtree(self.tempdir)
def test_other_modules(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / "testpackage" packagedir.mkdir() (packagedir / "__init__.py").touch() # And add that directory to the path with push_python_path(self.TEST_DIR): # Write out a duplicate predictor there, but registered under a different name. from allennlp.predictors import text_classifier with open(text_classifier.__file__) as f: code = f.read().replace( """@Predictor.register("text_classifier")""", """@Predictor.register("duplicate-test-predictor")""", ) with open(os.path.join(packagedir, "predictor.py"), "w") as f: f.write(code) self.infile = os.path.join(self.TEST_DIR, "inputs.txt") self.outfile = os.path.join(self.TEST_DIR, "outputs.txt") with open(self.infile, "w") as f: f.write( """{"sentence": "the seahawks won the super bowl in 2016"}\n""" ) f.write( """{"sentence": "the mariners won the super bowl in 2037"}\n""" ) sys.argv = [ "__main__.py", # executable "predict", # command str(self.classifier_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--predictor", "duplicate-test-predictor", "--silent", ] # Should raise ConfigurationError, because predictor is unknown with pytest.raises(ConfigurationError): main() # But once we include testpackage, it should be known sys.argv.extend(["--include-package", "testpackage"]) main() assert os.path.exists(self.outfile) with open(self.outfile, "r") as f: results = [json.loads(line) for line in f] assert len(results) == 2 # Overridden predictor should output extra field for result in results: assert set(result.keys()) == {"label", "logits", "probs"}
def test_using_dataset_reader_works_with_specified_multitask_head(self): sys.argv = [ "__main__.py", # executable "predict", # command str(self.classifier_model_path), "unittest", # "path" of the input data, but it's not really a path for VQA "--output-file", str(self.outfile), "--silent", "--use-dataset-reader", "--multitask-head", "vqa", ] main() assert os.path.exists(self.outfile) with open(self.outfile, "r") as f: results = [json.loads(line) for line in f] assert len(results) == 3 for result in results: assert "vqa_best_answer" in result.keys() shutil.rmtree(self.tempdir)
def test_using_dataset_reader_works_with_known_model(self): sys.argv = [ "__main__.py", # executable "predict", # command str(self.classifier_model_path), str(self.classifier_data_path), # input_file "--output-file", str(self.outfile), "--silent", "--use-dataset-reader", ] main() assert os.path.exists(self.outfile) with open(self.outfile, "r") as f: results = [json.loads(line) for line in f] assert len(results) == 3 for result in results: assert set(result.keys()) == {"label", "logits", "loss", "probs"} shutil.rmtree(self.tempdir)
def test_average_embedding_works(self): sentence = "Michael went to the store to buy some eggs ." with open(self.sentences_path, 'w') as f: f.write(sentence) sys.argv = ["run.py", # executable "elmo", # command self.sentences_path, self.output_path, "--average", "--options-file", self.options_file, "--weight-file", self.weight_file] main() assert os.path.exists(self.output_path) embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file) expected_embedding = embedder.embed_sentence(sentence.split()) expected_embedding = (expected_embedding[0] + expected_embedding[1] + expected_embedding[2]) / 3 with h5py.File(self.output_path, 'r') as h5py_file: assert set(h5py_file.keys()) == {"0", "sentence_to_index"} # The vectors in the test configuration are smaller (32 length) embedding = h5py_file.get("0") assert embedding.shape == (len(sentence.split()), 32) numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4) assert json.loads(h5py_file.get("sentence_to_index")[0]) == {sentence: "0"}
def test_other_modules(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / 'configuretestpackage' packagedir.mkdir() # pylint: disable=no-member (packagedir / '__init__.py').touch() # pylint: disable=no-member # And add that directory to the path sys.path.insert(0, str(self.TEST_DIR)) # Write out a duplicate predictor there, but registered under a different name. from allennlp.predictors import bidaf with open(bidaf.__file__) as f: code = f.read().replace("""@Predictor.register('machine-comprehension')""", """@Predictor.register('configure-test-predictor')""") with open(os.path.join(packagedir, 'predictor.py'), 'w') as f: f.write(code) # Capture stdout stdout_saved = sys.stdout stdout_captured = StringIO() sys.stdout = stdout_captured sys.argv = ["run.py", # executable "configure", # command "configuretestpackage.predictor.BidafPredictor"] main() output = stdout_captured.getvalue() assert "configure-test-predictor" in output sys.stdout = stdout_saved sys.path.remove(str(self.TEST_DIR))
def test_batch_embedding_works(self): sentences = [ u"Michael went to the store to buy some eggs .", u"Joel rolled down the street on his skateboard .", u"test / this is a first sentence", u"Take a look , then , at Tuesday 's elections in New York City , New Jersey and Virginia :" ] with open(self.sentences_path, u'w') as f: for line in sentences: f.write(line + u'\n') sys.argv = [u"run.py", # executable u"elmo", # command self.sentences_path, self.output_path, u"--all", u"--options-file", self.options_file, u"--weight-file", self.weight_file] main() assert os.path.exists(self.output_path) with h5py.File(self.output_path, u'r') as h5py_file: assert set(h5py_file.keys()) == set([u"0", u"1", u"2", u"3", u"sentence_to_index"]) # The vectors in the test configuration are smaller (32 length) for sentence_id, sentence in izip([u"0", u"1", u"2", u"3"], sentences): assert h5py_file.get(sentence_id).shape == (3, len(sentence.split()), 32) assert (json.loads(h5py_file.get(u"sentence_to_index")[0]) == dict((sentences[i], unicode(i)) for i in range(len(sentences))))
def test_batch_prediction_works_with_known_model(self): tempdir = tempfile.mkdtemp() infile = os.path.join(tempdir, "inputs.txt") outfile = os.path.join(tempdir, "outputs.txt") with open(infile, 'w') as f: f.write("""{"passage": "the seahawks won the super bowl in 2016", """ """ "question": "when did the seahawks win the super bowl?"}\n""") f.write("""{"passage": "the mariners won the super bowl in 2037", """ """ "question": "when did the mariners win the super bowl?"}\n""") sys.argv = ["run.py", # executable "predict", # command "tests/fixtures/bidaf/serialization/model.tar.gz", infile, # input_file "--output-file", outfile, "--silent", "--batch-size", '2'] main() assert os.path.exists(outfile) with open(outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 2 for result in results: assert set(result.keys()) == {"span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str"} shutil.rmtree(tempdir)
def test_empty_sentences_are_filtered(self): tempdir = tempfile.mkdtemp() sentences_path = os.path.join(tempdir, "sentences.txt") output_path = os.path.join(tempdir, "output.txt") sentences = ["A", "", "", "B"] with open(sentences_path, 'w') as f: for line in sentences: f.write(line + '\n') sys.argv = [ "run.py", # executable "elmo", # command sentences_path, output_path, "--all", "--options-file", self.options_file, "--weight-file", self.weight_file ] main() assert os.path.exists(output_path) with h5py.File(output_path, 'r') as h5py_file: assert len(h5py_file.keys()) == 2 assert set(h5py_file.keys()) == set(["A", "B"])
def test_using_dataset_reader_works_with_known_model(self): sys.argv = ["run.py", # executable "predict", # command str(self.bidaf_model_path), str(self.bidaf_data_path), # input_file "--output-file", str(self.outfile), "--silent", "--use-dataset-reader"] main() assert os.path.exists(self.outfile) with open(self.outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 5 for result in results: assert set(result.keys()) == {"span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str", "loss"} shutil.rmtree(self.tempdir)
def test_works_with_known_model(self): with open(self.infile, 'w') as f: f.write("""{"passage": "the seahawks won the super bowl in 2016", """ """ "question": "when did the seahawks win the super bowl?"}\n""") f.write("""{"passage": "the mariners won the super bowl in 2037", """ """ "question": "when did the mariners win the super bowl?"}\n""") sys.argv = ["run.py", # executable "predict", # command str(self.bidaf_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--silent"] main() assert os.path.exists(self.outfile) with open(self.outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 2 for result in results: assert set(result.keys()) == {"span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str"} shutil.rmtree(self.tempdir)
def test_other_modules(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / u'configuretestpackage' packagedir.mkdir() # pylint: disable=no-member (packagedir / u'__init__.py').touch() # pylint: disable=no-member # And add that directory to the path sys.path.insert(0, unicode(self.TEST_DIR)) # Write out a duplicate predictor there, but registered under a different name. from allennlp.predictors import bidaf with open(bidaf.__file__) as f: code = f.read().replace(u"""@Predictor.register('machine-comprehension')""", u"""@Predictor.register('configure-test-predictor')""") with open(os.path.join(packagedir, u'predictor.py'), u'w') as f: f.write(code) # Capture stdout stdout_saved = sys.stdout stdout_captured = StringIO() sys.stdout = stdout_captured sys.argv = [u"run.py", # executable u"configure", # command u"configuretestpackage.predictor.BidafPredictor"] main() output = stdout_captured.getvalue() assert u"configure-test-predictor" in output sys.stdout = stdout_saved sys.path.remove(unicode(self.TEST_DIR))
def test_base_predictor(self): # Tests when no Predictor is found and the base class implementation is used model_path = str(self.classifier_model_path) archive = load_archive(model_path) model_type = archive.config.get("model").get("type") # Makes sure that we don't have a DEFAULT_PREDICTOR for it. Otherwise the base class # implementation wouldn't be used del DEFAULT_PREDICTORS["basic_classifier"] assert model_type not in DEFAULT_PREDICTORS # Doesn't use a --predictor sys.argv = [ "__main__.py", # executable "predict", # command model_path, str(self.classifier_data_path), # input_file "--output-file", str(self.outfile), "--silent", "--use-dataset-reader", ] main() assert os.path.exists(self.outfile) with open(self.outfile, "r") as f: results = [json.loads(line) for line in f] assert len(results) == 3 for result in results: assert set(result.keys()) == {"logits", "probs", "label", "loss"} DEFAULT_PREDICTORS["basic_classifier"] = "text_classifier"
def test_top_embedding_works(self): sentence = "Michael went to the store to buy some eggs ." with open(self.sentences_path, 'w') as f: f.write(sentence) sys.argv = [ "run.py", # executable "elmo", # command self.sentences_path, self.output_path, "--top", "--options-file", self.options_file, "--weight-file", self.weight_file ] main() assert os.path.exists(self.output_path) embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file) expected_embedding = embedder.embed_sentence(sentence.split())[2] with h5py.File(self.output_path, 'r') as h5py_file: assert list(h5py_file.keys()) == [sentence] # The vectors in the test configuration are smaller (32 length) embedding = h5py_file.get(sentence) assert embedding.shape == (len(sentence.split()), 32) numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
def test_batch_prediction_works_with_known_model(self): with open(self.infile, 'w') as f: f.write("""{"passage": "the seahawks won the super bowl in 2016", """ """ "question": "when did the seahawks win the super bowl?"}\n""") f.write("""{"passage": "the mariners won the super bowl in 2037", """ """ "question": "when did the mariners win the super bowl?"}\n""") sys.argv = ["run.py", # executable "predict", # command str(self.bidaf_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--silent", "--batch-size", '2'] main() assert os.path.exists(self.outfile) with open(self.outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 2 for result in results: assert set(result.keys()) == {"span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str"} shutil.rmtree(self.tempdir)
def test_duplicate_sentences(self): tempdir = tempfile.mkdtemp() sentences_path = os.path.join(tempdir, "sentences.txt") output_path = os.path.join(tempdir, "output.txt") sentences = [ "Michael went to the store to buy some eggs .", "Michael went to the store to buy some eggs .", ] with open(sentences_path, 'w') as f: for line in sentences: f.write(line + '\n') sys.argv = ["run.py", # executable "elmo", # command sentences_path, output_path, "--all", "--options-file", self.options_file, "--weight-file", self.weight_file] main() assert os.path.exists(output_path) with h5py.File(output_path, 'r') as h5py_file: assert len(h5py_file.keys()) == 1 assert set(h5py_file.keys()) == set(sentences) # The vectors in the test configuration are smaller (32 length) for sentence in set(sentences): assert h5py_file.get(sentence).shape == (3, len(sentence.split()), 32)
def test_using_dataset_reader_works_with_known_model(self): sys.argv = [ u"run.py", # executable u"predict", # command unicode(self.bidaf_model_path), unicode(self.bidaf_data_path), # input_file u"--output-file", unicode(self.outfile), u"--silent", u"--use-dataset-reader" ] main() assert os.path.exists(self.outfile) with open(self.outfile, u'r') as f: results = [json.loads(line) for line in f] assert len(results) == 5 for result in results: assert set(result.keys()) == set([ u"span_start_logits", u"span_end_logits", u"passage_question_attention", u"question_tokens", u"passage_tokens", u"span_start_probs", u"span_end_probs", u"best_span", u"best_span_str", u"loss" ]) shutil.rmtree(self.tempdir)
def test_empty_sentences_are_filtered(self): tempdir = tempfile.mkdtemp() sentences_path = os.path.join(tempdir, "sentences.txt") output_path = os.path.join(tempdir, "output.txt") sentences = [ "A", "", "", "B" ] with open(sentences_path, 'w') as f: for line in sentences: f.write(line + '\n') sys.argv = ["run.py", # executable "elmo", # command sentences_path, output_path, "--all", "--options-file", self.options_file, "--weight-file", self.weight_file] main() assert os.path.exists(output_path) with h5py.File(output_path, 'r') as h5py_file: assert len(h5py_file.keys()) == 2 assert set(h5py_file.keys()) == set(["A", "B"])
def test_duplicate_sentences(self): sentences = [ "Michael went to the store to buy some eggs .", "Michael went to the store to buy some eggs .", ] with open(self.sentences_path, 'w') as f: for line in sentences: f.write(line + '\n') sys.argv = ["run.py", # executable "elmo", # command self.sentences_path, self.output_path, "--all", "--options-file", self.options_file, "--weight-file", self.weight_file] main() assert os.path.exists(self.output_path) with h5py.File(self.output_path, 'r') as h5py_file: assert len(h5py_file.keys()) == 3 assert set(h5py_file.keys()) == {"0", "1", "sentence_to_index"} # The vectors in the test configuration are smaller (32 length) for sentence_id, sentence in zip(["0", "1"], sentences): assert h5py_file.get(sentence_id).shape == (3, len(sentence.split()), 32)
def test_average_embedding_works(self): tempdir = tempfile.mkdtemp() sentences_path = os.path.join(tempdir, "sentences.txt") output_path = os.path.join(tempdir, "output.txt") sentence = "Michael went to the store to buy some eggs ." with open(sentences_path, 'w') as f: f.write(sentence) sys.argv = ["run.py", # executable "elmo", # command sentences_path, output_path, "--average", "--options-file", self.options_file, "--weight-file", self.weight_file] main() assert os.path.exists(output_path) embedder = ElmoEmbedder(options_file=self.options_file, weight_file=self.weight_file) expected_embedding = embedder.embed_sentence(sentence.split()) expected_embedding = (expected_embedding[0] + expected_embedding[1] + expected_embedding[2]) / 3 with h5py.File(output_path, 'r') as h5py_file: assert list(h5py_file.keys()) == [sentence] # The vectors in the test configuration are smaller (32 length) embedding = h5py_file.get(sentence) assert embedding.shape == (len(sentence.split()), 32) numpy.testing.assert_allclose(embedding, expected_embedding, rtol=1e-4)
def test_batch_embedding_works_with_sentences_as_keys(self): sentences = [ "Michael went to the store to buy some eggs .", "Joel rolled down the street on his skateboard ." ] with open(self.sentences_path, 'w') as f: for line in sentences: f.write(line + '\n') sys.argv = ["run.py", # executable "elmo", # command self.sentences_path, self.output_path, "--all", "--options-file", self.options_file, "--weight-file", self.weight_file, "--use-sentence-keys"] main() assert os.path.exists(self.output_path) with h5py.File(self.output_path, 'r') as h5py_file: assert set(h5py_file.keys()) == set(sentences) # The vectors in the test configuration are smaller (32 length) for sentence in sentences: assert h5py_file.get(sentence).shape == (3, len(sentence.split()), 32)
def test_batch_embedding_works_with_forget_sentences(self): sentences = [ "Michael went to the store to buy some eggs .", "Joel rolled down the street on his skateboard .", "test / this is a first sentence", "Take a look , then , at Tuesday 's elections in New York City , New Jersey and Virginia :" ] with open(self.sentences_path, 'w') as f: for line in sentences: f.write(line + '\n') sys.argv = ["run.py", # executable "elmo", # command self.sentences_path, self.output_path, "--all", "--options-file", self.options_file, "--weight-file", self.weight_file, "--forget-sentences"] main() assert os.path.exists(self.output_path) with h5py.File(self.output_path, 'r') as h5py_file: assert set(h5py_file.keys()) == {"0", "1", "2", "3"} # The vectors in the test configuration are smaller (32 length) for sentence_id, sentence in zip(["0", "1", "2", "3"], sentences): assert h5py_file.get(sentence_id).shape == (3, len(sentence.split()), 32)
def run(exp_name, config_file, ): print(os.getcwd()) command = f"allennlp train ./configs/{exp_name}/{config_file}.json -s ./output/debug/{exp_name}/{config_file.split('.')[0]} -f -o {override_dict} --include-package allennlp_plugins" print(sys.argv) print(command) sys.argv = command.split() main()
def test_other_modules(self): # Create a new package in a temporary dir packagedir = os.path.join(self.TEST_DIR, 'testpackage') pathlib.Path(packagedir).mkdir() pathlib.Path(os.path.join(packagedir, '__init__.py')).touch() # And add that directory to the path sys.path.insert(0, self.TEST_DIR) # Write out a duplicate predictor there, but registered under a different name. from allennlp.service.predictors import bidaf with open(bidaf.__file__) as f: code = f.read().replace("""@Predictor.register('machine-comprehension')""", """@Predictor.register('duplicate-test-predictor')""") with open(os.path.join(packagedir, 'predictor.py'), 'w') as f: f.write(code) infile = os.path.join(self.TEST_DIR, "inputs.txt") outfile = os.path.join(self.TEST_DIR, "outputs.txt") with open(infile, 'w') as f: f.write("""{"passage": "the seahawks won the super bowl in 2016", """ """ "question": "when did the seahawks win the super bowl?"}\n""") f.write("""{"passage": "the mariners won the super bowl in 2037", """ """ "question": "when did the mariners win the super bowl?"}\n""") sys.argv = ["run.py", # executable "predict", # command "tests/fixtures/bidaf/serialization/model.tar.gz", infile, # input_file "--output-file", outfile, "--predictor", "duplicate-test-predictor", "--silent"] # Should raise ConfigurationError, because predictor is unknown with pytest.raises(ConfigurationError): main() # But once we include testpackage, it should be known sys.argv.extend(["--include-package", "testpackage"]) main() assert os.path.exists(outfile) with open(outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 2 # Overridden predictor should output extra field for result in results: assert set(result.keys()) == {"span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str"} sys.path.remove(self.TEST_DIR)
def test_alternative_file_formats(self): tempdir = tempfile.mkdtemp() infile = os.path.join(tempdir, "inputs.txt") outfile = os.path.join(tempdir, "outputs.txt") @Predictor.register('bidaf-csv') # pylint: disable=unused-variable class BidafCsvPredictor(BidafPredictor): """same as bidaf predictor but using CSV inputs and outputs""" def load_line(self, line: str) -> JsonDict: reader = csv.reader([line]) passage, question = next(reader) return {"passage": passage, "question": question} def dump_line(self, outputs: JsonDict) -> str: output = io.StringIO() writer = csv.writer(output) row = [outputs["span_start_probs"][0], outputs["span_end_probs"][0], *outputs["best_span"], outputs["best_span_str"]] writer.writerow(row) return output.getvalue() with open(infile, 'w') as f: writer = csv.writer(f) writer.writerow(["the seahawks won the super bowl in 2016", "when did the seahawks win the super bowl?"]) writer.writerow(["the mariners won the super bowl in 2037", "when did the mariners win the super bowl?"]) sys.argv = ["run.py", # executable "predict", # command "tests/fixtures/bidaf/serialization/model.tar.gz", infile, # input_file "--output-file", outfile, "--silent"] main(predictor_overrides={'bidaf': 'bidaf-csv'}) assert os.path.exists(outfile) with open(outfile, 'r') as f: reader = csv.reader(f) results = [row for row in reader] assert len(results) == 2 for row in results: assert len(row) == 5 start_prob, end_prob, span_start, span_end, span = row for prob in (start_prob, end_prob): assert 0 <= float(prob) <= 1 assert 0 <= int(span_start) <= int(span_end) <= 8 assert span != '' shutil.rmtree(tempdir)
def test_alternative_file_formats(self): tempdir = tempfile.mkdtemp() infile = os.path.join(tempdir, "inputs.txt") outfile = os.path.join(tempdir, "outputs.txt") @Predictor.register('bidaf-csv') # pylint: disable=unused-variable class BidafCsvPredictor(BidafPredictor): """same as bidaf predictor but using CSV inputs and outputs""" def load_line(self, line: str) -> JsonDict: reader = csv.reader([line]) passage, question = next(reader) return {"passage": passage, "question": question} def dump_line(self, outputs: JsonDict) -> str: output = io.StringIO() writer = csv.writer(output) row = [outputs["span_start_probs"][0], outputs["span_end_probs"][0], *outputs["best_span"], outputs["best_span_str"]] writer.writerow(row) return output.getvalue() with open(infile, 'w') as f: writer = csv.writer(f) writer.writerow(["the seahawks won the super bowl in 2016", "when did the seahawks win the super bowl?"]) writer.writerow(["the mariners won the super bowl in 2037", "when did the mariners win the super bowl?"]) sys.argv = ["run.py", # executable "predict", # command "tests/fixtures/bidaf/serialization/model.tar.gz", infile, # input_file "--output-file", outfile, "--predictor", 'bidaf-csv', "--silent"] main() assert os.path.exists(outfile) with open(outfile, 'r') as f: reader = csv.reader(f) results = [row for row in reader] assert len(results) == 2 for row in results: assert len(row) == 5 start_prob, end_prob, span_start, span_end, span = row for prob in (start_prob, end_prob): assert 0 <= float(prob) <= 1 assert 0 <= int(span_start) <= int(span_end) <= 8 assert span != '' shutil.rmtree(tempdir)
def test_can_specify_extra_args(self): @Predictor.register("classification-extra-args") class ExtraArgsPredictor(TextClassifierPredictor): def __init__( self, model: Model, dataset_reader: DatasetReader, frozen: bool = True, tag: str = "", ) -> None: super().__init__(model, dataset_reader, frozen) self.tag = tag def predict_json(self, inputs: JsonDict) -> JsonDict: result = super().predict_json(inputs) result["tag"] = self.tag return result with open(self.infile, "w") as f: f.write("""{"sentence": "the seahawks won the super bowl in 2016"}\n""") f.write("""{"sentence": "the mariners won the super bowl in 2037"}\n""") sys.argv = [ "__main__.py", # executable "predict", # command str(self.classifier_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--predictor", "classification-extra-args", "--silent", "--predictor-args", """{"tag": "fish"}""", ] main() assert os.path.exists(self.outfile) with open(self.outfile, "r") as f: results = [json.loads(line) for line in f] assert len(results) == 2 # Overridden predictor should output extra field for result in results: assert set(result.keys()) == { "label", "logits", "tag", "probs", "tokens", "token_ids", } assert result["tag"] == "fish" shutil.rmtree(self.tempdir)
def test_remove_with_missing_positionals(self, capsys): sys.argv = [ "allennlp", "cached-path", "--cache-dir", str(self.TEST_DIR), "--remove", ] with pytest.raises(RuntimeError, match="Missing positional"): main()
def test_fails_without_required_args(self): sys.argv = ["run.py", # executable "predict", # command "/path/to/archive", # archive, but no input file ] with self.assertRaises(SystemExit) as cm: # pylint: disable=invalid-name main() assert cm.exception.code == 2 # argparse code for incorrect usage
def test_can_specify_predictor(self): @Predictor.register("bidaf-explicit") class Bidaf3Predictor(BidafPredictor): """same as bidaf predictor but with an extra field""" def predict_json(self, inputs: JsonDict) -> JsonDict: result = super().predict_json(inputs) result["explicit"] = True return result with open(self.infile, "w") as f: f.write( """{"passage": "the seahawks won the super bowl in 2016", """ """ "question": "when did the seahawks win the super bowl?"}\n""" ) f.write( """{"passage": "the mariners won the super bowl in 2037", """ """ "question": "when did the mariners win the super bowl?"}\n""" ) sys.argv = [ "run.py", # executable "predict", # command str(self.bidaf_model_path), str(self.infile), # input_file "--output-file", str(self.outfile), "--predictor", "bidaf-explicit", "--silent", ] main() assert os.path.exists(self.outfile) with open(self.outfile, "r") as f: results = [json.loads(line) for line in f] assert len(results) == 2 # Overridden predictor should output extra field for result in results: assert set(result.keys()) == { "span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str", "token_offsets", "explicit", } shutil.rmtree(self.tempdir)
def test_fails_without_required_args(self): sys.argv = [ "run.py", "predict", "/path/to/archive", ] # executable # command # archive, but no input file with self.assertRaises(SystemExit) as cm: main() assert cm.exception.code == 2 # argparse code for incorrect usage
def test_can_specify_predictor(self): @Predictor.register('bidaf-explicit') # pylint: disable=unused-variable class Bidaf3Predictor(BidafPredictor): """same as bidaf predictor but with an extra field""" def predict_json(self, inputs: JsonDict, cuda_device: int = -1) -> JsonDict: result = super().predict_json(inputs) result["explicit"] = True return result tempdir = tempfile.mkdtemp() infile = os.path.join(tempdir, "inputs.txt") outfile = os.path.join(tempdir, "outputs.txt") with open(infile, 'w') as f: f.write( """{"passage": "the seahawks won the super bowl in 2016", """ """ "question": "when did the seahawks win the super bowl?"}\n""" ) f.write( """{"passage": "the mariners won the super bowl in 2037", """ """ "question": "when did the mariners win the super bowl?"}\n""" ) sys.argv = [ "run.py", # executable "predict", # command "tests/fixtures/bidaf/serialization/model.tar.gz", infile, # input_file "--output-file", outfile, "--predictor", "bidaf-explicit", "--silent" ] main() assert os.path.exists(outfile) with open(outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 2 # Overridden predictor should output extra field for result in results: assert set(result.keys()) == { "span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str", "explicit" } shutil.rmtree(tempdir)
def test_subcommand_plugin_is_available(self): # Test originally copied from # `allennlp.tests.commands.main_test.TestMain.test_subcommand_plugin_is_available`. sys.argv = ["allennlp"] with io.StringIO() as buf, redirect_stdout(buf): main() output = buf.getvalue() self.assertIn(" serve", output)
def test_fails_without_required_args(self): sys.argv = [ "__main__.py", "predict", "/path/to/archive", ] # executable # command # archive, but no input file with pytest.raises(SystemExit) as cm: main() assert cm.value.code == 2 # argparse code for incorrect usage
def test_fails_on_unknown_command(self): sys.argv = ["bogus", # command "unknown_model", # model_name "bogus file", # input_file "--output-file", "bogus out file", "--silent"] with self.assertRaises(SystemExit) as cm: # pylint: disable=invalid-name main() assert cm.exception.code == 2 # argparse code for incorrect usage
def test_inspect_with_bad_options(self, capsys): sys.argv = [ "allennlp", "cached-path", "--cache-dir", str(self.TEST_DIR), "--inspect", "--extract-archive", ] with pytest.raises(RuntimeError, match="--extract-archive"): main()
def run(exp_name, config_file, qianru = '--recover'): # if qianru == 'r': # qianru = '--recover' # elif qianru == 'f': # qianru = '--force' # else: # raise Exception print(os.getcwd()) command = f"allennlp train ./configs/{exp_name}/{config_file}.json -s ./output/{exp_name}/{config_file.split('.')[0]} {qianru} --include-package my_library" sys.argv = command.split() print(sys.argv) main()
def test_can_specify_predictor(self): @Predictor.register('bidaf-explicit') # pylint: disable=unused-variable class Bidaf3Predictor(BidafPredictor): """same as bidaf predictor but with an extra field""" def predict_json(self, inputs: JsonDict, cuda_device: int = -1) -> JsonDict: result = super().predict_json(inputs) result["explicit"] = True return result tempdir = tempfile.mkdtemp() infile = os.path.join(tempdir, "inputs.txt") outfile = os.path.join(tempdir, "outputs.txt") with open(infile, 'w') as f: f.write("""{"passage": "the seahawks won the super bowl in 2016", """ """ "question": "when did the seahawks win the super bowl?"}\n""") f.write("""{"passage": "the mariners won the super bowl in 2037", """ """ "question": "when did the mariners win the super bowl?"}\n""") sys.argv = ["run.py", # executable "predict", # command "tests/fixtures/bidaf/serialization/model.tar.gz", infile, # input_file "--output-file", outfile, "--predictor", "bidaf-explicit", "--silent"] main() assert os.path.exists(outfile) with open(outfile, 'r') as f: results = [json.loads(line) for line in f] assert len(results) == 2 # Overridden predictor should output extra field for result in results: assert set(result.keys()) == {"span_start_logits", "span_end_logits", "passage_question_attention", "question_tokens", "passage_tokens", "span_start_probs", "span_end_probs", "best_span", "best_span_str", "explicit"} shutil.rmtree(tempdir)
def test_other_modules(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / 'configuretestpackage' packagedir.mkdir() # pylint: disable=no-member (packagedir / '__init__.py').touch() # pylint: disable=no-member # And add that directory to the path sys.path.insert(0, str(self.TEST_DIR)) # Write out a duplicate predictor there, but registered under a different name. from allennlp.predictors import bidaf with open(bidaf.__file__) as f: code = f.read().replace("""@Predictor.register('machine-comprehension')""", """@Predictor.register('configure-test-predictor')""") with open(os.path.join(packagedir, 'predictor.py'), 'w') as f: f.write(code) app = None # Monkeypatch the run function def run_wizard(args: argparse.Namespace) -> None: nonlocal app app = make_app(args.include_package) app.testing = True configure_command._run_wizard = run_wizard sys.argv = ["run.py", # executable "configure", # command "--include-package", "configuretestpackage.predictor"] main() client = app.test_client() response = client.get('/api/config/?class=allennlp.predictors.predictor.Predictor&get_choices=true') data = json.loads(response.get_data()) choices = data.get('choices', ()) assert 'configuretestpackage.predictor.BidafPredictor' in choices
def test_subcommand_overrides(self): def do_nothing(_): pass class FakeEvaluate(Subcommand): add_subparser_called = False def add_subparser(self, name, parser): subparser = parser.add_parser(name, description="fake", help="fake help") subparser.set_defaults(func=do_nothing) self.add_subparser_called = True return subparser fake_evaluate = FakeEvaluate() sys.argv = ["allennlp.run", "evaluate"] main(subcommand_overrides={"evaluate": fake_evaluate}) assert fake_evaluate.add_subparser_called
#!/usr/bin/env python import logging import os import sys if os.environ.get("ALLENNLP_DEBUG"): LEVEL = logging.DEBUG else: LEVEL = logging.INFO sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir)))) logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', level=LEVEL) from allennlp.commands import main # pylint: disable=wrong-import-position if __name__ == "__main__": main(prog="python -m allennlp.run")
#!/usr/bin/env python import logging import os import sys if os.environ.get("ALLENNLP_DEBUG"): LEVEL = logging.DEBUG else: LEVEL = logging.INFO sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir)))) logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', level=LEVEL) from allennlp.commands import main # pylint: disable=wrong-import-position if __name__ == "__main__": main(prog="allennlp")
def test_other_modules(self): # Create a new package in a temporary dir packagedir = self.TEST_DIR / 'testpackage' packagedir.mkdir() # pylint: disable=no-member (packagedir / '__init__.py').touch() # pylint: disable=no-member # And add that directory to the path sys.path.insert(0, str(self.TEST_DIR)) # Write out a duplicate model there, but registered under a different name. from allennlp.models import simple_tagger with open(simple_tagger.__file__) as model_file: code = model_file.read().replace("""@Model.register("simple_tagger")""", """@Model.register("duplicate-test-tagger")""") with open(packagedir / 'model.py', 'w') as new_model_file: new_model_file.write(code) # Copy fixture there too. shutil.copy(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv', self.TEST_DIR) data_path = str(self.TEST_DIR / 'sequence_tagging.tsv') # Write out config file config_path = self.TEST_DIR / 'config.json' config_json = """{ "model": { "type": "duplicate-test-tagger", "text_field_embedder": { "tokens": { "type": "embedding", "embedding_dim": 5 } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }, "dataset_reader": {"type": "sequence_tagging"}, "train_data_path": "$$$", "validation_data_path": "$$$", "iterator": {"type": "basic", "batch_size": 2}, "trainer": { "num_epochs": 2, "optimizer": "adam" } }""".replace('$$$', data_path) with open(config_path, 'w') as config_file: config_file.write(config_json) serialization_dir = self.TEST_DIR / 'serialization' # Run train with using the non-allennlp module. sys.argv = ["bin/allennlp", "train", str(config_path), "-s", str(serialization_dir)] # Shouldn't be able to find the model. with pytest.raises(ConfigurationError): main() # Now add the --include-package flag and it should work. # We also need to add --recover since the output directory already exists. sys.argv.extend(["--recover", "--include-package", 'testpackage']) main() # Rewrite out config file, but change a value. with open(config_path, 'w') as new_config_file: new_config_file.write(config_json.replace('"num_epochs": 2,', '"num_epochs": 4,')) # This should fail because the config.json does not match that in the serialization directory. with pytest.raises(ConfigurationError): main() sys.path.remove(str(self.TEST_DIR))