def make_predictions(all_examples, all_features, all_results, n_best_size, \
                     max_answer_length, do_lower_case, \
                     verbose_logging, validate_flag=True):

    assert len(all_examples) == len(all_features)

    example_index_to_results = collections.defaultdict(list)
    for result in all_results:
        example_index_to_results[result.example_index].append(result)

    _PrelimPrediction = collections.namedtuple(
        "PrelimPrediction",
        ["result_index", "start_index", "end_index", "text", "logprob"])

    validate_predictions = dict()
    all_predictions = []
    all_nbest_json = []
    for (example_index, feature) in enumerate(all_features):
        example = all_examples[example_index]
        results = example_index_to_results[example_index]
        prelim_predictions = []
        for result_index, result in enumerate(results):
            #stop_logprob = np.log(result.stop_prob)
            #yes_no_flag_logprobs = np.log(_compute_softmax(result.yes_no_flag_logits)) # (2,)
            #yes_no_ans_logprobs = np.log(_compute_softmax(result.yes_no_ans_logits)) # (2,)

            # yes-no question
            if (np.argmax(result.yes_no_flag_logits) == 1):
                if (np.argmax(result.yes_no_ans_logits) == 1):
                    text = 'yes'
                    #logprob = stop_logprob + yes_no_flag_logprobs[1] + yes_no_ans_logprobs[1]
                    logprob = result.stop_logits[1] + result.yes_no_flag_logits[
                        1] + result.yes_no_ans_logits[1]
                else:
                    text = 'no'
                    #logprob = stop_logprob + yes_no_flag_logprobs[1] + yes_no_ans_logprobs[0]
                    logprob = result.stop_logits[1] + result.yes_no_flag_logits[
                        1] + result.yes_no_ans_logits[0]
                prelim_predictions.append(
                    _PrelimPrediction(result_index=result_index,
                                      start_index=-1,
                                      end_index=-1,
                                      text=text,
                                      logprob=logprob))
                continue
            start_indexes = _get_best_indexes(result.start_logits, n_best_size)
            end_indexes = _get_best_indexes(result.end_logits, n_best_size)
            #start_logprobs = np.log(_compute_softmax(result.start_logits))
            #end_logprobs = np.log(_compute_softmax(result.end_logits))

            for start_index in start_indexes:
                for end_index in end_indexes:
                    if start_index not in result.id_to_tok_map:
                        continue
                    if end_index not in result.id_to_tok_map:
                        continue
                    if end_index < start_index:
                        continue
                    length = end_index - start_index + 1
                    if length > max_answer_length:
                        continue
                    #logprob = stop_logprob + yes_no_flag_logprobs[0] + \
                    #          start_logprobs[start_index] + end_logprobs[end_index]
                    logprob = result.stop_logits[1] + result.yes_no_flag_logits[0] + \
                              result.start_logits[start_index] + result.end_logits[end_index]
                    prelim_predictions.append(
                        _PrelimPrediction(result_index=result_index,
                                          start_index=start_index,
                                          end_index=end_index,
                                          text=None,
                                          logprob=logprob))
        prelim_predictions = sorted(prelim_predictions,
                                    key=lambda x: x.logprob,
                                    reverse=True)

        _NbestPrediction = collections.namedtuple("NbestPrediction",
                                                  ["text", "logprob"])

        seen_predictions = {}
        nbest = []
        for pred in prelim_predictions:
            if len(nbest) >= n_best_size:
                break
            result = results[pred.result_index]
            if (pred.start_index == -1 or pred.end_index == -1):
                final_text = pred.text
            else:
                # answer_tokens: tokenized answers
                doc_start = result.id_to_tok_map[pred.start_index]
                doc_end = result.id_to_tok_map[pred.end_index]
                answer_tokens = feature.doc_tokens[doc_start:doc_end + 1]
                answer_text = " ".join(answer_tokens)
                # De-tokenize WordPieces that have been split off.
                answer_text = answer_text.replace(" ##", "")
                answer_text = answer_text.replace("##", "")
                # Clean whitespace
                answer_text = answer_text.strip()
                answer_text = " ".join(answer_text.split())

                # orig_answer_tokens: original answers
                orig_doc_start = feature.tok_to_orig_map[doc_start]
                orig_doc_end = feature.tok_to_orig_map[doc_end]
                orig_answer_tokens = example.doc_tokens[
                    orig_doc_start:orig_doc_end + 1]
                orig_answer_text = " ".join(orig_answer_tokens)

                # combine tokenized answer text and original text
                final_text = get_final_text(answer_text, orig_answer_text,
                                            do_lower_case, verbose_logging)

            if final_text in seen_predictions:
                continue
            seen_predictions[final_text] = True
            nbest.append(
                _NbestPrediction(text=final_text, logprob=pred.logprob))
            if validate_flag:
                break

        if not nbest:
            nbest.append(_NbestPrediction(text="empty", logprob=0.0))

        assert len(nbest) >= 1

        if validate_flag:
            validate_predictions[(example.paragraph_id,
                                  example.turn_id)] = nbest[0].text
        else:
            total_scores = []
            for entry in nbest:
                total_scores.append(entry.logprob)

            nbest_json = []
            for (i, entry) in enumerate(nbest):
                output = collections.OrderedDict()
                output["text"] = entry.text
                output["logprob"] = entry.logprob
                nbest_json.append(output)

            assert len(nbest_json) >= 1

            cur_prediction = collections.OrderedDict()
            cur_prediction["id"] = example.paragraph_id
            cur_prediction["turn_id"] = example.turn_id
            cur_prediction["answer"] = nbest_json[0]["text"]
            all_predictions.append(cur_prediction)

            cur_nbest_json = collections.OrderedDict()
            cur_nbest_json["id"] = example.paragraph_id
            cur_nbest_json["turn_id"] = example.turn_id
            cur_nbest_json["answers"] = nbest_json
            all_nbest_json.append(cur_nbest_json)

    if validate_flag:
        return validate_predictions
    else:
        return all_predictions, all_nbest_json
Exemple #2
0
def make_predictions(all_examples, all_features, all_results, n_best_size, \
                     max_answer_length, do_lower_case, verbose_logging, \
                     validate_flag=True):
    assert len(all_examples) == len(all_features)
    example_index_to_results = collections.defaultdict(list)
    for result in all_results:
        example_index_to_results[result.example_index].append(result)

    _PrelimPrediction = collections.namedtuple(
        "PrelimPrediction",
        ["result_index", "start_index", "end_index", "text", "logit"])

    validate_predictions = collections.defaultdict(dict)
    all_predictions = []
    all_nbest_json = []
    for (example_index, feature) in enumerate(all_features):
        example = all_examples[example_index]
        results = example_index_to_results[example_index]
        prelim_predictions = []
        for result_index, result in enumerate(results):
            # yesno
            #yes_no_flag_logits = result.yes_no_flag_logits
            #yes_no_pred_flag = np.argmax(yes_no_flag_logits)

            # followup
            #followup_logits = result.followup_logits
            #followup = np.argmax(followup_logits)

            # answer span
            start_indexes = _get_best_indexes(result.start_logits, n_best_size)
            end_indexes = _get_best_indexes(result.end_logits, n_best_size)
            for start_index in start_indexes:
                for end_index in end_indexes:
                    if start_index not in result.id_to_tok_map:
                        continue
                    if end_index not in result.id_to_tok_map:
                        continue
                    if end_index < start_index:
                        continue
                    length = end_index - start_index + 1
                    if length > max_answer_length:
                        continue
                    """
                    logit = result.stop_logits[1] + yes_no_flag_logits[yes_no_pred_flag] + \
                            followup_logits[followup] + result.start_logits[start_index] + \
                            result.end_logits[end_index]
                    logit = result.stop_logits[1] + result.start_logits[start_index] + result.end_logits[end_index]
                    """
                    logit = result.stop_logits[1] + result.start_logits[start_index] + \
                        result.end_logits[end_index]
                    prelim_predictions.append(
                        _PrelimPrediction(
                            result_index=result_index,
                            start_index=start_index,
                            end_index=end_index,
                            text=None,
                            logit=logit))
        prelim_predictions = sorted(
            prelim_predictions,
            key=lambda x: x.logit,
            reverse=True)

        _NbestPrediction = collections.namedtuple(
                    "NbestPrediction", ["text", "logit"])

        seen_predictions = {}
        nbest = []
        for pred in prelim_predictions:
            if len(nbest) >= n_best_size:
                break
            result = results[pred.result_index]
            if pred.start_index < 0 or pred.end_index < 0:
                final_text = UNK
            else:
                # answer_tokens: tokenized answers
                doc_start = result.id_to_tok_map[pred.start_index]
                doc_end = result.id_to_tok_map[pred.end_index]
                answer_tokens = feature.doc_tokens[doc_start:doc_end+1]
                answer_text = " ".join(answer_tokens)
                # De-tokenize WordPieces that have been split off.
                answer_text = answer_text.replace(" ##", "")
                answer_text = answer_text.replace("##", "")
                # Clean whitespace
                answer_text = answer_text.strip()
                answer_text = " ".join(answer_text.split())

                # orig_answer_tokens: original answers
                orig_doc_start = feature.tok_to_orig_map[doc_start]
                orig_doc_end = feature.tok_to_orig_map[doc_end]
                orig_answer_tokens = example.doc_tokens[orig_doc_start:orig_doc_end+1]
                orig_answer_text =  " ".join(orig_answer_tokens)
                
                # combine tokenized answer text and original text
                final_text = get_final_text(answer_text, orig_answer_text, do_lower_case, verbose_logging)
            
            if final_text in seen_predictions:
                continue
            seen_predictions[final_text] = True
            nbest.append(
                _NbestPrediction(
                    text=final_text,
                    logit=pred.logit))
            if validate_flag:
                break

        if not nbest:
            nbest.append(
                _NbestPrediction(
                    text=UNK,
                    logit=0.0))

        assert len(nbest) >= 1
        if validate_flag:
            qid = example.example_id
            dia_id = qid.split("_q#")[0]
            validate_predictions[dia_id][qid] = nbest[0].text
        else:
            total_scores = []
            for entry in nbest:
                total_scores.append(entry.logit)
            probs = _compute_softmax(total_scores)

            nbest_json = []
            for (i, entry) in enumerate(nbest):
                output = collections.OrderedDict()
                output["text"] = entry.text
                output["probability"] = probs[i]
                output["logit"] = entry.logit
                nbest_json.append(output)

            assert len(nbest_json) >= 1
            
            cur_prediction = collections.OrderedDict()
            cur_prediction['example_id'] = example.example_id
            cur_prediction['answer'] = nbest_json[0]["text"]
            all_predictions.append(cur_prediction)

            cur_nbest_json = collections.OrderedDict()
            cur_nbest_json["example_id"] = example.example_id
            cur_nbest_json["answers"] = nbest_json
            all_nbest_json.append(cur_nbest_json)
            
    if validate_flag:
        return validate_predictions
    else:
        return all_predictions, all_nbest_json
Exemple #3
0
def make_predictions(all_examples,
                     all_features,
                     all_results,
                     n_best_size,
                     max_answer_length,
                     do_lower_case,
                     verbose_logging,
                     validate_flag=True):
    example_index_to_features = collections.defaultdict(list)
    for feature in all_features:
        example_index_to_features[feature.example_index].append(feature)

    unique_id_to_result = {}
    for result in all_results:
        unique_id_to_result[result.unique_id] = result

    _PrelimPrediction = collections.namedtuple(
        "PrelimPrediction",
        ["feature_index", "start_index", "end_index", "text", "logit"])

    validate_predictions = collections.defaultdict(dict)
    all_predictions = []
    all_nbest_json = []
    for (example_index, example) in enumerate(all_examples):
        features = example_index_to_features[example_index]

        prelim_predictions = []
        for (feature_index, feature) in enumerate(features):
            result = unique_id_to_result[feature.unique_id]
            start_indexes = _get_best_indexes(result.start_logits, n_best_size)
            end_indexes = _get_best_indexes(result.end_logits, n_best_size)
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # We could hypothetically create invalid predictions, e.g., predict
                    # that the start of the span is in the question. We throw out all
                    # invalid predictions.
                    if start_index >= len(feature.tokens):
                        continue
                    if end_index >= len(feature.tokens):
                        continue
                    if start_index not in feature.token_to_orig_map:
                        continue
                    if end_index not in feature.token_to_orig_map:
                        continue
                    if not feature.token_is_max_context.get(
                            start_index, False):
                        continue
                    if end_index < start_index:
                        continue
                    length = end_index - start_index + 1
                    if length > max_answer_length:
                        continue
                    prelim_predictions.append(
                        _PrelimPrediction(
                            feature_index=feature_index,
                            start_index=start_index,
                            end_index=end_index,
                            text=None,
                            logit=result.start_logits[start_index] +
                            result.end_logits[end_index]))

        prelim_predictions = sorted(prelim_predictions,
                                    key=lambda x: x.logit,
                                    reverse=True)

        _NbestPrediction = collections.namedtuple("NbestPrediction",
                                                  ["text", "logit"])

        seen_predictions = {}
        nbest = []
        for pred in prelim_predictions:
            if len(nbest) >= n_best_size:
                break
            feature = features[pred.feature_index]

            if (pred.start_index == -1 or pred.end_index == -1):
                final_text = pred.text
            else:
                tok_tokens = feature.tokens[pred.start_index:(pred.end_index +
                                                              1)]
                orig_doc_start = feature.token_to_orig_map[pred.start_index]
                orig_doc_end = feature.token_to_orig_map[pred.end_index]
                orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end +
                                                                 1)]
                tok_text = " ".join(tok_tokens)

                # De-tokenize WordPieces that have been split off.
                tok_text = tok_text.replace(" ##", "")
                tok_text = tok_text.replace("##", "")

                # Clean whitespace
                tok_text = tok_text.strip()
                tok_text = " ".join(tok_text.split())
                orig_text = " ".join(orig_tokens)

                final_text = get_final_text(tok_text, orig_text, do_lower_case,
                                            verbose_logging)

            if final_text in seen_predictions:
                continue
            seen_predictions[final_text] = True
            nbest.append(_NbestPrediction(text=final_text, logit=pred.logit))

            # for validation, only the best one prediction is needed
            if validate_flag:
                break

        # In very rare edge cases we could have no valid predictions. So we
        # just create a nonce prediction in this case to avoid failure.
        if not nbest:
            nbest.append(_NbestPrediction(text="empty", logit=0.0))

        assert len(nbest) >= 1

        if validate_flag:
            qid = example.example_id
            dia_id = qid.split("_q#")[0]
            validate_predictions[dia_id][qid] = nbest[0].text
        else:
            total_scores = []
            for entry in nbest:
                total_scores.append(entry.logit)

            probs = _compute_softmax(total_scores)

            nbest_json = []
            for (i, entry) in enumerate(nbest):
                output = collections.OrderedDict()
                output["text"] = entry.text
                output["probability"] = probs[i]
                output["logit"] = entry.logit
                nbest_json.append(output)

            assert len(nbest_json) >= 1

            cur_prediction = collections.OrderedDict()
            cur_prediction["example_id"] = example.example_id
            cur_prediction["answer"] = nbest_json[0]["text"]
            all_predictions.append(cur_prediction)

            cur_nbest_json = collections.OrderedDict()
            cur_nbest_json["example_id"] = example.example_id
            cur_nbest_json["answers"] = nbest_json
            all_nbest_json.append(cur_nbest_json)

    if validate_flag:
        return validate_predictions
    else:
        return all_predictions, all_nbest_json