class NlvrParserPredictor(Predictor): #overrides def _json_to_instance(self, json_dict): sentence = json_dict[u'sentence'] if u'worlds' in json_dict: # This is grouped data worlds = json_dict[u'worlds'] else: worlds = [json_dict[u'structured_rep']] identifier = json_dict[ u'identifier'] if u'identifier' in json_dict else None instance = self._dataset_reader.text_to_instance( sentence=sentence, # type: ignore structured_representations=worlds, identifier=identifier) return instance #overrides def dump_line(self, outputs): # pylint: disable=no-self-use if u"identifier" in outputs: # Returning CSV lines for official evaluation identifier = outputs[u"identifier"] denotation = outputs[u"denotations"][0][0] return "{identifier},{denotation}\n" else: return json.dumps(outputs) + u"\n" NlvrParserPredictor = Predictor.register(u'nlvr-parser')(NlvrParserPredictor)
---------- premise : ``str`` A passage representing what is assumed to be true. hypothesis : ``str`` A sentence that may be entailed by the premise. Returns ------- A dictionary where the key "label_probs" determines the probabilities of each of [entailment, contradiction, neutral]. """ return self.predict_json({ u"premise": premise, u"hypothesis": hypothesis }) #overrides def _json_to_instance(self, json_dict): u""" Expects JSON that looks like ``{"premise": "...", "hypothesis": "..."}``. """ premise_text = json_dict[u"premise"] hypothesis_text = json_dict[u"hypothesis"] return self._dataset_reader.text_to_instance(premise_text, hypothesis_text) DecomposableAttentionPredictor = Predictor.register(u'textual-entailment')( DecomposableAttentionPredictor)
test_data_filename = os.path.join(SEMPRE_DIR, u'data.examples') with open(test_data_filename, u'w') as temp_file: temp_file.write(test_record) # TODO(matt): The jar that we have isn't optimal for this use case - we're using a # script designed for computing accuracy, and just pulling out a piece of it. Writing # a new entry point to the jar that's tailored for this use would be cleaner. command = u' '.join([ u'java', u'-jar', cached_path(DEFAULT_EXECUTOR_JAR), test_data_filename, logical_form_filename, table_dir ]) run(command, shell=True) denotations_file = os.path.join(SEMPRE_DIR, u'logical_forms_denotations.tsv') with open(denotations_file) as temp_file: line = temp_file.readline().split(u'\t') # Clean up all the temp files generated from this function. # Take care to not remove the auxiliary sempre files os.remove(logical_form_filename) shutil.rmtree(table_dir) os.remove(denotations_file) os.remove(test_data_filename) return line[1] if len(line) > 1 else line[0] WikiTablesParserPredictor = Predictor.register(u'wikitables-parser')( WikiTablesParserPredictor)
# corner of the node. attributes = [pos[index]] start, end = word_index_to_cumulative_indices[index] hierplane_node = { u"word": words[index], # The type of the node - all nodes with the same # type have a unified colour. u"nodeType": tags[index], # Attributes of the node. u"attributes": attributes, # The link between the node and it's parent. u"link": tags[index], u"spans": [{u"start": start, u"end": end}] } if children: hierplane_node[u"children"] = children return hierplane_node # We are guaranteed that there is a single word pointing to # the root index, so we can find it just by searching for 0 in the list. root_index = heads.index(0) hierplane_tree = { u"text": u" ".join(words), u"root": node_constuctor(root_index), u"nodeTypeToStyle": NODE_TYPE_TO_STYLE, u"linkToPosition": LINK_TO_POSITION } return hierplane_tree BiaffineDependencyParserPredictor = Predictor.register(u'biaffine-dependency-parser')(BiaffineDependencyParserPredictor)
.. code-block:: js {"words": [...], "verbs": [ {"verb": "...", "description": "...", "tags": [...]}, ... {"verb": "...", "description": "...", "tags": [...]}, ]} """ instances = self._sentence_to_srl_instances(inputs) if not instances: return sanitize({u"verbs": [], u"words": self._tokenizer.split_words(inputs[u"sentence"])}) outputs = self._model.forward_on_instances(instances) results = {u"verbs": [], u"words": outputs[0][u"words"]} for output in outputs: tags = output[u'tags'] description = self.make_srl_string(output[u"words"], tags) results[u"verbs"].append({ u"verb": output[u"verb"], u"description": description, u"tags": tags, }) return sanitize(results) SemanticRoleLabelerPredictor = Predictor.register(u"semantic-role-labeling")(SemanticRoleLabelerPredictor)
from __future__ import absolute_import #overrides from allennlp.common.util import JsonDict from allennlp.data import Instance from allennlp.predictors.predictor import Predictor class SimpleSeq2SeqPredictor(Predictor): u""" Predictor for the :class:`~allennlp.models.encoder_decoder.simple_seq2seq` model. """ def predict(self, source): return self.predict_json({u"source": source}) #overrides def _json_to_instance(self, json_dict): u""" Expects JSON that looks like ``{"source": "..."}``. """ source = json_dict[u"source"] return self._dataset_reader.text_to_instance(source) SimpleSeq2SeqPredictor = Predictor.register(u'simple_seq2seq')( SimpleSeq2SeqPredictor)
Make a machine comprehension prediction on the supplied input. See https://rajpurkar.github.io/SQuAD-explorer/ for more information about the machine comprehension task. Parameters ---------- question : ``str`` A question about the content in the supplied paragraph. The question must be answerable by a span in the paragraph. passage : ``str`` A paragraph of information relevant to the question. Returns ------- A dictionary that represents the prediction made by the system. The answer string will be under the "best_span_str" key. """ return self.predict_json({u"passage": passage, u"question": question}) #overrides def _json_to_instance(self, json_dict): u""" Expects JSON that looks like ``{"question": "...", "passage": "..."}``. """ question_text = json_dict[u"question"] passage_text = json_dict[u"passage"] return self._dataset_reader.text_to_instance(question_text, passage_text) BidafPredictor = Predictor.register(u'machine-comprehension')(BidafPredictor)
class SentenceTaggerPredictor(Predictor): u""" Predictor for any model that takes in a sentence and returns a single set of tags for it. In particular, it can be used with the :class:`~allennlp.models.crf_tagger.CrfTagger` model and also the :class:`~allennlp.models.simple_tagger.SimpleTagger` model. """ def __init__(self, model, dataset_reader): super(SentenceTaggerPredictor, self).__init__(model, dataset_reader) self._tokenizer = SpacyWordSplitter(language=u'en_core_web_sm', pos_tags=True) def predict(self, sentence): return self.predict_json({u"sentence": sentence}) #overrides def _json_to_instance(self, json_dict): u""" Expects JSON that looks like ``{"sentence": "..."}``. Runs the underlying model, and adds the ``"words"`` to the output. """ sentence = json_dict[u"sentence"] tokens = self._tokenizer.split_words(sentence) return self._dataset_reader.text_to_instance(tokens) SentenceTaggerPredictor = Predictor.register(u'sentence-tagger')( SentenceTaggerPredictor)
children.append( self._build_hierplane_tree(child, index, is_root=False)) else: # We're at a leaf, so add the length of # the word to the character index. index += len(child) label = tree.label() span = u" ".join(tree.leaves()) hierplane_node = { u"word": span, u"nodeType": label, u"attributes": [label], u"link": label } if children: hierplane_node[u"children"] = children # TODO(Mark): Figure out how to span highlighting to the leaves. if is_root: hierplane_node = { u"linkNameToLabel": LINK_TO_LABEL, u"nodeTypeToStyle": NODE_TYPE_TO_STYLE, u"text": span, u"root": hierplane_node } return hierplane_node ConstituencyParserPredictor = Predictor.register(u'constituency-parser')( ConstituencyParserPredictor)