def _make_instance(example):
            d = dict()

            # For human-readability
            d["raw_passage"] = MetadataField(" ".join(example["passage"]))
            d["raw_question"] = MetadataField(" ".join(example["question"]))

            if model_preprocessing_interface.model_flags[
                    "uses_pair_embedding"]:
                inp, start_offset, _ = model_preprocessing_interface.boundary_token_fn(
                    example["passage"], example["question"], get_offset=True)
                d["inputs"] = sentence_to_text_field(inp, indexers)
            else:
                d["passage"] = sentence_to_text_field(
                    model_preprocessing_interface.boundary_token_fn(
                        example["passage"]), indexers)
                d["question"] = sentence_to_text_field(
                    model_preprocessing_interface.boundary_token_fn(
                        example["question"]), indexers)
                start_offset = 0
            d["span_start"] = NumericField(example["answer_span"][0] +
                                           start_offset,
                                           label_namespace="span_start_labels")
            d["span_end"] = NumericField(example["answer_span"][1] +
                                         start_offset,
                                         label_namespace="span_end_labels")
            d["start_offset"] = MetadataField(start_offset)
            d["passage_str"] = MetadataField(example["passage_str"])
            d["answer_str"] = MetadataField(example["answer_str"])
            d["space_processed_token_map"] = MetadataField(
                example["space_processed_token_map"])
            return Instance(d)
Exemple #2
0
        def _make_instance(sentence_tokens, question_tokens, answer_span, idx):
            d = dict()

            # For human-readability
            d["raw_sentence"] = MetadataField(" ".join(sentence_tokens[1:-1]))
            d["raw_question"] = MetadataField(" ".join(question_tokens[1:-1]))

            if model_preprocessing_interface.model_flags[
                    "uses_pair_embedding"]:
                inp = model_preprocessing_interface.boundary_token_fn(
                    sentence_tokens, question_tokens)
                d["inputs"] = sentence_to_text_field(inp, indexers)
            else:
                d["sentence"] = sentence_to_text_field(
                    model_preprocessing_interface.boundary_token_fn(
                        sentence_tokens), indexers)
                d["question"] = sentence_to_text_field(
                    model_preprocessing_interface.boundary_token_fn(
                        question_tokens), indexers)

            d["span_start"] = NumericField(answer_span[0],
                                           label_namespace="span_start_labels")
            d["span_end"] = NumericField(answer_span[1],
                                         label_namespace="span_end_labels")
            d["idx"] = LabelField(idx,
                                  label_namespace="idxs",
                                  skip_indexing=True)
            return Instance(d)
        def _make_instance(psg, qst, ans_str, label, psg_idx, qst_idx,
                           ans_idx):
            """ pq_id: passage-question ID """
            d = {}
            d["psg_str"] = MetadataField(" ".join(psg))
            d["qst_str"] = MetadataField(" ".join(qst))
            d["ans_str"] = MetadataField(ans_str)
            d["psg_idx"] = MetadataField(par_idx)
            d["qst_idx"] = MetadataField(qst_idx)
            d["ans_idx"] = MetadataField(ans_idx)
            d["idx"] = MetadataField(ans_idx)  # required by evaluate()
            if model_preprocessing_interface.model_flags[
                    "uses_pair_embedding"]:
                inp = model_preprocessing_interface.boundary_token_fn(psg, qst)
                d["psg_qst_ans"] = sentence_to_text_field(inp, indexers)
            else:
                d["psg"] = sentence_to_text_field(
                    model_preprocessing_interface.boundary_token_fn(psg),
                    indexers)
                d["qst"] = sentence_to_text_field(
                    model_preprocessing_interface.boundary_token_fn(qst),
                    indexers)
            d["label"] = LabelField(label,
                                    label_namespace="labels",
                                    skip_indexing=True)

            return Instance(d)
Exemple #4
0
        def _make_instance(passage, question, answer, label, par_idx, qst_idx,
                           ans_idx):
            """ pq_id: passage-question ID """
            d = {}
            d["psg_str"] = MetadataField(" ".join(passage))
            d["qst_str"] = MetadataField(" ".join(question))
            d["ans_str"] = MetadataField(" ".join(answer))
            d["psg_idx"] = MetadataField(par_idx)
            d["qst_idx"] = MetadataField(qst_idx)
            d["ans_idx"] = MetadataField(ans_idx)
            d["idx"] = MetadataField(ans_idx)  # required by evaluate()
            if is_using_pytorch_transformers:
                inp = boundary_token_fn(para, question + answer)
                d["psg_qst_ans"] = sentence_to_text_field(inp, indexers)
            else:
                d["psg"] = sentence_to_text_field(boundary_token_fn(passage),
                                                  indexers)
                d["qst"] = sentence_to_text_field(boundary_token_fn(question),
                                                  indexers)
                d["ans"] = sentence_to_text_field(boundary_token_fn(answer),
                                                  indexers)
            d["label"] = LabelField(label,
                                    label_namespace="labels",
                                    skip_indexing=True)

            return Instance(d)
Exemple #5
0
 def _make_instance(sent):
     """ Forward targs adds <s> as a target for input </s>
     and bwd targs adds </s> as a target for input <s>
     to avoid issues with needing to strip extra tokens
     in the input for each direction """
     d = {}
     d["input"] = sentence_to_text_field(sent[:-1], indexers)
     d["targs"] = sentence_to_text_field(sent[1:], self.target_indexer)
     d["targs_b"] = sentence_to_text_field([sent[-1]] + sent[:-2], self.target_indexer)
     return Instance(d)
    def make_instance(self, record, idx, indexers, model_preprocessing_interface) -> Type[Instance]:
        """Convert a single record to an AllenNLP Instance."""
        tokens = record["text"].split()  # already space-tokenized by Moses
        tokens = model_preprocessing_interface.boundary_token_fn(
            tokens
        )  # apply model-appropriate variants of [cls] and [sep].
        text_field = sentence_to_text_field(tokens, indexers)

        d = {}
        d["idx"] = MetadataField(idx)

        d["input1"] = text_field

        d["span1s"] = ListField(
            [self._make_span_field(t["span1"], text_field, 1) for t in record["targets"]]
        )
        if not self.single_sided:
            d["span2s"] = ListField(
                [self._make_span_field(t["span2"], text_field, 1) for t in record["targets"]]
            )

        # Always use multilabel targets, so be sure each label is a list.
        labels = [utils.wrap_singleton_string(t["label"]) for t in record["targets"]]
        d["labels"] = ListField(
            [
                MultiLabelField(
                    label_set, label_namespace=self._label_namespace, skip_indexing=False
                )
                for label_set in labels
            ]
        )
        return Instance(d)
 def _make_instance(sent_):
     """ Forward targs adds <s> as a target for input </s>
     and bwd targs adds </s> as a target for input <s>
     to avoid issues with needing to strip extra tokens
     in the input for each direction """
     d = {
         "input":
         sentence_to_text_field(sent_, indexers),
         "targs":
         sentence_to_text_field(sent_[1:] + [sent_[0]],
                                self.target_indexer),
         "targs_b":
         sentence_to_text_field([sent_[-1]] + sent_[:-1],
                                self.target_indexer),
     }
     return Instance(d)
def run_repl(model, vocab, indexers, task, args):
    """ Run REPL """
    print("Input CTRL-C or enter 'QUIT' to terminate.")
    while True:
        try:
            print()
            input_string = input(" INPUT: ")
            if input_string == "QUIT":
                break

            tokens = process_sentence(
                tokenizer_name=task.tokenizer_name, sent=input_string, max_seq_len=args.max_seq_len
            )
            print("TOKENS:", " ".join("[{}]".format(tok) for tok in tokens))
            field = sentence_to_text_field(tokens, indexers)
            field.index(vocab)
            batch = Batch([Instance({"input1": field})]).as_tensor_dict()
            batch = move_to_device(batch, args.cuda)
            with torch.no_grad():
                out = model.forward(task, batch, predict=True)
            assert out["logits"].shape[1] == 2

            s = "  PRED: "
            s += "TRUE " if out["preds"][0].item() else "FALSE"
            s += "  ({:.1f}%, logits: {:.3f} vs {:.3f})".format(
                torch.softmax(out["logits"][0], dim=0)[1].item() * 100,
                out["logits"][0][0].item(),
                out["logits"][0][1].item(),
            )
            print(s)
        except KeyboardInterrupt:
            print("\nTerminating.")
            break
Exemple #9
0
 def _make_instance(question, choices, label, id_str):
     d = {}
     d["question_str"] = MetadataField(" ".join(question))
     if not model_preprocessing_interface.model_flags["uses_pair_embedding"]:
         d["question"] = sentence_to_text_field(
             model_preprocessing_interface.boundary_token_fn(question), indexers
         )
     for choice_idx, choice in enumerate(choices):
         inp = (
             model_preprocessing_interface.boundary_token_fn(question, choice)
             if model_preprocessing_interface.model_flags["uses_pair_embedding"]
             else model_preprocessing_interface.boundary_token_fn(choice)
         )
         d["choice%d" % choice_idx] = sentence_to_text_field(inp, indexers)
         d["choice%d_str" % choice_idx] = MetadataField(" ".join(choice))
     d["label"] = LabelField(label, label_namespace="labels", skip_indexing=True)
     d["id_str"] = MetadataField(id_str)
     return Instance(d)
Exemple #10
0
 def _make_instance(sent_):
     sent_ = model_preprocessing_interface.boundary_token_fn(sent_)
     input_sent = sentence_to_text_field(sent_, indexers)
     d = {
         "input":
         input_sent,
         "targs":
         SequenceLabelField(sent_,
                            input_sent,
                            label_namespace=self._label_namespace),
     }
     return Instance(d)
def prepare_batch(tokens_batch, vocab, indexers, args):
    """ Do preprocessing for batch """
    instance_ls = []
    token_ls = []
    for tokens in tokens_batch:
        field = sentence_to_text_field(tokens, indexers)
        field.index(vocab)
        instance_ls.append(Instance({"input1": field}))
        token_ls.append(tokens)
    batch = Batch(instance_ls).as_tensor_dict()
    batch = move_to_device(batch, args.cuda)
    return batch, token_ls
        def _make_instance(psg, qst, ans_str, label, psg_idx, qst_idx,
                           ans_idx):
            """ pq_id: passage-question ID """
            d = {}
            d["psg_str"] = MetadataField(" ".join(psg))
            d["qst_str"] = MetadataField(" ".join(qst))
            d["ans_str"] = MetadataField(ans_str)
            d["psg_idx"] = MetadataField(par_idx)
            d["qst_idx"] = MetadataField(qst_idx)
            d["ans_idx"] = MetadataField(ans_idx)
            d["idx"] = MetadataField(ans_idx)  # required by evaluate()
            if is_using_bert:
                inp = psg + qst[1:]
                d["psg_qst_ans"] = sentence_to_text_field(inp, indexers)
            else:
                d["psg"] = sentence_to_text_field(psg, indexers)
                d["qst"] = sentence_to_text_field(qst, indexers)
            d["label"] = LabelField(label,
                                    label_namespace="labels",
                                    skip_indexing=True)

            return Instance(d)