def test_from_archive_does_not_consume_params(self): archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' / 'model.tar.gz') Predictor.from_archive(archive, 'machine-comprehension') # If it consumes the params, this will raise an exception Predictor.from_archive(archive, 'machine-comprehension')
def test_prediction_with_no_verbs(self): input1 = {"sentence": "Blah no verb sentence."} archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'semantic-role-labeling') result = predictor.predict_json(input1) assert result == { 'words': ['Blah', 'no', 'verb', 'sentence', '.'], 'verbs': [] } input2 = {"sentence": "This sentence has a verb."} results = predictor.predict_batch_json([input1, input2]) assert results[0] == { 'words': ['Blah', 'no', 'verb', 'sentence', '.'], 'verbs': [] } assert results[1] == { 'words': ['This', 'sentence', 'has', 'a', 'verb', '.'], 'verbs': [{ 'verb': 'has', 'description': 'This sentence has a verb .', 'tags': ['O', 'O', 'O', 'O', 'O', 'O'] }] }
def test_uses_named_inputs(self): inputs = { "sentence": "The squirrel wrote a unit test to make sure its nuts worked as designed." } archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'semantic-role-labeling') result = predictor.predict_json(inputs) print(result) words = result.get("words") assert words == [ "The", "squirrel", "wrote", "a", "unit", "test", "to", "make", "sure", "its", "nuts", "worked", "as", "designed", "." ] num_words = len(words) verbs = result.get("verbs") assert verbs is not None assert isinstance(verbs, list) assert any(v["verb"] == "wrote" for v in verbs) assert any(v["verb"] == "make" for v in verbs) assert any(v["verb"] == "worked" for v in verbs) for verb in verbs: tags = verb.get("tags") assert tags is not None assert isinstance(tags, list) assert all(isinstance(tag, str) for tag in tags) assert len(tags) == num_words
def test_uses_named_inputs(self): inputs = { "premise": "I always write unit tests for my code.", "hypothesis": "One time I didn't write any unit tests for my code." } archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'textual-entailment') result = predictor.predict_json(inputs) # Label probs should be 3 floats that sum to one label_probs = result.get("label_probs") assert label_probs is not None assert isinstance(label_probs, list) assert len(label_probs) == 3 assert all(isinstance(x, float) for x in label_probs) assert all(x >= 0 for x in label_probs) assert sum(label_probs) == approx(1.0) # Logits should be 3 floats that softmax to label_probs label_logits = result.get("label_logits") assert label_logits is not None assert isinstance(label_logits, list) assert len(label_logits) == 3 assert all(isinstance(x, float) for x in label_logits) exps = [math.exp(x) for x in label_logits] sumexps = sum(exps) for e, p in zip(exps, label_probs): assert e / sumexps == approx(p)
def test_uses_named_inputs(self): inputs = { "question": "What kind of test succeeded on its first attempt?", "passage": "One time I was writing a unit test, and it succeeded on the first attempt." } archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'machine-comprehension') result = predictor.predict_json(inputs) best_span = result.get("best_span") assert best_span is not None assert isinstance(best_span, list) assert len(best_span) == 2 assert all(isinstance(x, int) for x in best_span) assert best_span[0] <= best_span[1] best_span_str = result.get("best_span_str") assert isinstance(best_span_str, str) assert best_span_str != "" for probs_key in ("span_start_probs", "span_end_probs"): probs = result.get(probs_key) assert probs is not None assert all(isinstance(x, float) for x in probs) assert sum(probs) == approx(1.0)
def setUp(self): super().setUp() archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' / 'model.tar.gz') self.bidaf_predictor = Predictor.from_archive(archive, 'machine-comprehension')
def test_batch_prediction(self): inputs = [{ "sentence": "What kind of test succeeded on its first attempt?", }, { "sentence": "What kind of test succeeded on its first attempt at batch processing?", }] archive = load_archive(self.FIXTURES_ROOT / 'biaffine_dependency_parser' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'biaffine-dependency-parser') results = predictor.predict_batch_json(inputs) assert len(results) == 2 for result in results: sequence_length = len(result.get("words")) predicted_heads = result.get("predicted_heads") assert len(predicted_heads) == sequence_length predicted_dependencies = result.get("predicted_dependencies") assert len(predicted_dependencies) == sequence_length assert isinstance(predicted_dependencies, list) assert all(isinstance(x, str) for x in predicted_dependencies)
def test_uses_named_inputs(self): inputs = { "document": "This is a single string document about a test. Sometimes it " "contains coreferent parts." } archive = load_archive(self.FIXTURES_ROOT / 'coref' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'coreference-resolution') result = predictor.predict_json(inputs) document = result["document"] assert document == [ 'This', 'is', 'a', 'single', 'string', 'document', 'about', 'a', 'test', '.', 'Sometimes', 'it', 'contains', 'coreferent', 'parts', '.' ] clusters = result["clusters"] assert isinstance(clusters, list) for cluster in clusters: assert isinstance(cluster, list) for mention in cluster: # Spans should be integer indices. assert isinstance(mention[0], int) assert isinstance(mention[1], int) # Spans should be inside document. assert 0 < mention[0] <= len(document) assert 0 < mention[1] <= len(document)
def main(args): # Executing this file with no extra options runs the simple service with the bidaf test fixture # and the machine-comprehension predictor. There's no good reason you'd want # to do this, except possibly to test changes to the stock HTML). parser = argparse.ArgumentParser(description='Serve up a simple model') parser.add_argument('--archive-path', type=str, required=True, help='path to trained archive file') parser.add_argument('--predictor', type=str, required=True, help='name of predictor') parser.add_argument('--static-dir', type=str, help='serve index.html from this directory') parser.add_argument('--title', type=str, help='change the default page title', default="AllenNLP Demo") parser.add_argument('--field-name', type=str, action='append', help='field names to include in the demo') parser.add_argument('--port', type=int, default=8000, help='port to serve the demo on') parser.add_argument('--include-package', type=str, action='append', default=[], help='additional packages to include') args = parser.parse_args(args) # Load modules for package_name in args.include_package: import_submodules(package_name) archive = load_archive(args.archive_path) predictor = Predictor.from_archive(archive, args.predictor) field_names = args.field_name app = make_app(predictor=predictor, field_names=field_names, static_dir=args.static_dir, title=args.title) CORS(app) http_server = WSGIServer(('0.0.0.0', args.port), app) print(f"Model loaded, serving demo on port {args.port}") http_server.serve_forever()
def test_batch_prediction(self): inputs = { "sentence": "The squirrel wrote a unit test to make sure its nuts worked as designed." } archive = load_archive(self.FIXTURES_ROOT / 'srl' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'semantic-role-labeling') result = predictor.predict_batch_json([inputs, inputs]) assert result[0] == result[1]
def test_predictor_with_direct_parser(self): archive_dir = self.FIXTURES_ROOT / 'semantic_parsing' / 'nlvr_direct_semantic_parser' / 'serialization' archive = load_archive(os.path.join(archive_dir, 'model.tar.gz')) predictor = Predictor.from_archive(archive, 'nlvr-parser') result = predictor.predict_json(self.inputs) assert 'logical_form' in result assert 'denotations' in result # result['denotations'] is a list corresponding to k-best logical forms, where k is 1 by # default. assert len(result['denotations'] [0]) == 2 # Because there are two worlds in the input.
def test_answer_present_with_batch_predict(self): inputs = [{ "question": "Who is 18 years old?", "table": "Name\tAge\nShallan\t16\nKaladin\t18" }] archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'wikitables' / 'serialization' / 'model.tar.gz' archive = load_archive(archive_path) predictor = Predictor.from_archive(archive, 'wikitables-parser') result = predictor.predict_batch_json(inputs) answer = result[0].get("answer") assert answer is not None
def test_uses_named_inputs(self): inputs = { "source": "What kind of test succeeded on its first attempt?", } archive = load_archive(self.FIXTURES_ROOT / 'encoder_decoder' / 'simple_seq2seq' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'simple_seq2seq') result = predictor.predict_json(inputs) predicted_tokens = result.get("predicted_tokens") assert predicted_tokens is not None assert isinstance(predicted_tokens, list) assert all(isinstance(x, str) for x in predicted_tokens)
def test_predictor_uses_dataset_reader_to_determine_pos_set(self): # pylint: disable=protected-access archive = load_archive(self.FIXTURES_ROOT / 'biaffine_dependency_parser' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'biaffine-dependency-parser') inputs = { "sentence": "Dogs eat cats.", } instance_with_ud_pos = predictor._json_to_instance(inputs) tags = instance_with_ud_pos.fields["pos_tags"].labels assert tags == ['NOUN', 'VERB', 'NOUN', 'PUNCT'] predictor._dataset_reader.use_language_specific_pos = True instance_with_ptb_pos = predictor._json_to_instance(inputs) tags = instance_with_ptb_pos.fields["pos_tags"].labels assert tags == ['NNS', 'VBP', 'NNS', '.']
def test_uses_named_inputs(self): inputs = { "sentence": "What a great test sentence.", } archive = load_archive(self.FIXTURES_ROOT / 'constituency_parser' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'constituency-parser') result = predictor.predict_json(inputs) assert len(result["spans"] ) == 21 # number of possible substrings of the sentence. assert len(result["class_probabilities"]) == 21 assert result["tokens"] == [ "What", "a", "great", "test", "sentence", "." ] assert isinstance(result["trees"], str) for class_distribution in result["class_probabilities"]: self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
def test_batch_prediction(self): batch_inputs = [ { "premise": "I always write unit tests for my code.", "hypothesis": "One time I didn't write any unit tests for my code." }, { "premise": "I also write batched unit tests for throughput!", "hypothesis": "Batch tests are slower." }, ] archive = load_archive(self.FIXTURES_ROOT / 'decomposable_attention' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'textual-entailment') results = predictor.predict_batch_json(batch_inputs) print(results) assert len(results) == 2 for result in results: # Logits should be 3 floats that softmax to label_probs label_logits = result.get("label_logits") # Label probs should be 3 floats that sum to one label_probs = result.get("label_probs") assert label_probs is not None assert isinstance(label_probs, list) assert len(label_probs) == 3 assert all(isinstance(x, float) for x in label_probs) assert all(x >= 0 for x in label_probs) assert sum(label_probs) == approx(1.0) assert label_logits is not None assert isinstance(label_logits, list) assert len(label_logits) == 3 assert all(isinstance(x, float) for x in label_logits) exps = [math.exp(x) for x in label_logits] sumexps = sum(exps) for e, p in zip(exps, label_probs): assert e / sumexps == approx(p)
def test_batch_prediction(self): inputs = [{ "question": "What kind of test succeeded on its first attempt?", "passage": "One time I was writing a unit test, and it succeeded on the first attempt." }, { "question": "What kind of test succeeded on its first attempt at batch processing?", "passage": "One time I was writing a unit test, and it always failed!" }] archive = load_archive(self.FIXTURES_ROOT / 'bidaf' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'machine-comprehension') results = predictor.predict_batch_json(inputs) assert len(results) == 2 for result in results: best_span = result.get("best_span") best_span_str = result.get("best_span_str") start_probs = result.get("span_start_probs") end_probs = result.get("span_end_probs") assert best_span is not None assert isinstance(best_span, list) assert len(best_span) == 2 assert all(isinstance(x, int) for x in best_span) assert best_span[0] <= best_span[1] assert isinstance(best_span_str, str) assert best_span_str != "" for probs in (start_probs, end_probs): assert probs is not None assert all(isinstance(x, float) for x in probs) assert sum(probs) == approx(1.0)
def test_uses_named_inputs(self): inputs = { "question": "names", "table": "name\tdate\nmatt\t2017\npradeep\t2018" } archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'wikitables' / 'serialization' / 'model.tar.gz' archive = load_archive(archive_path) predictor = Predictor.from_archive(archive, 'wikitables-parser') result = predictor.predict_json(inputs) action_sequence = result.get("best_action_sequence") if action_sequence: # We don't currently disallow endless loops in the decoder, and an untrained seq2seq # model will easily get itself into a loop. An endless loop isn't a finished logical # form, so decoding doesn't return any finished states, which means no actions. So, # sadly, we don't have a great test here. This is just testing that the predictor # runs, basically. assert len(action_sequence) > 1 assert all([isinstance(action, str) for action in action_sequence]) logical_form = result.get("logical_form") assert logical_form is not None
def test_batch_prediction(self): inputs = [{ "sentence": "What a great test sentence." }, { "sentence": "Here's another good, interesting one." }] archive = load_archive(self.FIXTURES_ROOT / 'constituency_parser' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'constituency-parser') results = predictor.predict_batch_json(inputs) result = results[0] assert len(result["spans"] ) == 21 # number of possible substrings of the sentence. assert len(result["class_probabilities"]) == 21 assert result["tokens"] == [ "What", "a", "great", "test", "sentence", "." ] assert isinstance(result["trees"], str) for class_distribution in result["class_probabilities"]: self.assertAlmostEqual(sum(class_distribution), 1.0, places=4) result = results[1] assert len(result["spans"] ) == 36 # number of possible substrings of the sentence. assert len(result["class_probabilities"]) == 36 assert result["tokens"] == [ "Here", "'s", "another", "good", ",", "interesting", "one", "." ] assert isinstance(result["trees"], str) for class_distribution in result["class_probabilities"]: self.assertAlmostEqual(sum(class_distribution), 1.0, places=4)
def test_build_hierplane_tree(self): tree = Tree.fromstring( "(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))") archive = load_archive(self.FIXTURES_ROOT / 'constituency_parser' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'constituency-parser') hierplane_tree = predictor._build_hierplane_tree(tree, 0, is_root=True) # pylint: disable=bad-continuation correct_tree = { 'text': 'the dog chased the cat', "linkNameToLabel": LINK_TO_LABEL, "nodeTypeToStyle": NODE_TYPE_TO_STYLE, 'root': { 'word': 'the dog chased the cat', 'nodeType': 'S', 'attributes': ['S'], 'link': 'S', 'children': [{ 'word': 'the dog', 'nodeType': 'NP', 'attributes': ['NP'], 'link': 'NP', 'children': [{ 'word': 'the', 'nodeType': 'D', 'attributes': ['D'], 'link': 'D' }, { 'word': 'dog', 'nodeType': 'N', 'attributes': ['N'], 'link': 'N' }] }, { 'word': 'chased the cat', 'nodeType': 'VP', 'attributes': ['VP'], 'link': 'VP', 'children': [{ 'word': 'chased', 'nodeType': 'V', 'attributes': ['V'], 'link': 'V' }, { 'word': 'the cat', 'nodeType': 'NP', 'attributes': ['NP'], 'link': 'NP', 'children': [{ 'word': 'the', 'nodeType': 'D', 'attributes': ['D'], 'link': 'D' }, { 'word': 'cat', 'nodeType': 'N', 'attributes': ['N'], 'link': 'N' }] }] }] } } # pylint: enable=bad-continuation assert correct_tree == hierplane_tree
def test_uses_named_inputs(self): inputs = { "sentence": "Please could you parse this sentence?", } archive = load_archive(self.FIXTURES_ROOT / 'biaffine_dependency_parser' / 'serialization' / 'model.tar.gz') predictor = Predictor.from_archive(archive, 'biaffine-dependency-parser') result = predictor.predict_json(inputs) words = result.get("words") predicted_heads = result.get("predicted_heads") assert len(predicted_heads) == len(words) predicted_dependencies = result.get("predicted_dependencies") assert len(predicted_dependencies) == len(words) assert isinstance(predicted_dependencies, list) assert all(isinstance(x, str) for x in predicted_dependencies) assert result.get("loss") is not None assert result.get("arc_loss") is not None assert result.get("tag_loss") is not None hierplane_tree = result.get("hierplane_tree") hierplane_tree.pop("nodeTypeToStyle") hierplane_tree.pop("linkToPosition") # pylint: disable=line-too-long,bad-continuation assert result.get("hierplane_tree") == { 'text': 'Please could you parse this sentence ?', 'root': { 'word': 'Please', 'nodeType': 'det', 'attributes': ['INTJ'], 'link': 'det', 'spans': [{ 'start': 0, 'end': 7 }], 'children': [{ 'word': 'could', 'nodeType': 'nummod', 'attributes': ['VERB'], 'link': 'nummod', 'spans': [{ 'start': 7, 'end': 13 }] }, { 'word': 'you', 'nodeType': 'nummod', 'attributes': ['PRON'], 'link': 'nummod', 'spans': [{ 'start': 13, 'end': 17 }] }, { 'word': 'parse', 'nodeType': 'nummod', 'attributes': ['VERB'], 'link': 'nummod', 'spans': [{ 'start': 17, 'end': 23 }] }, { 'word': 'this', 'nodeType': 'nummod', 'attributes': ['DET'], 'link': 'nummod', 'spans': [{ 'start': 23, 'end': 28 }] }, { 'word': 'sentence', 'nodeType': 'nummod', 'attributes': ['NOUN'], 'link': 'nummod', 'spans': [{ 'start': 28, 'end': 37 }] }, { 'word': '?', 'nodeType': 'nummod', 'attributes': ['PUNCT'], 'link': 'nummod', 'spans': [{ 'start': 37, 'end': 39 }] }] } }
def demo_model(archive_file: str, predictor_name: str) -> Predictor: archive = load_archive(archive_file) return Predictor.from_archive(archive, predictor_name)