def predict_final_word_models(models, vocabulary, filename):
    id_list = []
    prev_tokens_list = []
    prev_tokens_lens = []
    with open(filename, "r") as fin:
        fin.readline()
        for line in fin:
            id_, prev_sent, grt_last_token = line.strip().split(",")
            id_list.append(id_)
            prev_tokens = prev_sent.split()
            prev_tokens_list.append(prev_tokens)
            prev_tokens_lens.append(len(prev_tokens))
    X = np.array([
        build_input_data(t, vocabulary)[0][0].tolist()
        for t in prev_tokens_list
    ])

    y_prob = 0
    ind = 0
    for model in models:
        if ind == 0:
            att = 0.6
        elif ind == 1:
            att = 0.4
        y_prob = y_prob + att * model.predict(X, batch_size=32)
        ind = ind + 1

    last_token_probs = np.array([
        y_prob[b, prev_tokens_lens[b] - 1, :] for b in range(y_prob.shape[0])
    ])

    return dict(zip(id_list, last_token_probs))
Beispiel #2
0
def predict_final_word(model_list, vocabulary, filename):
    list_prob = []
    id_list = []
    for model in model_list:
        id_list = []
        prev_tokens_list = []
        prev_tokens_lens = []
        with open(filename, "r") as fin:
            fin.readline()
            for line in fin:
                id_, prev_sent, grt_last_token = line.strip().split(",")
                id_list.append(id_)
                prev_tokens = prev_sent.split()
                prev_tokens_list.append(prev_tokens)
                prev_tokens_lens.append(len(prev_tokens))
        X = np.array([
            build_input_data(t, vocabulary)[0][0].tolist()
            for t in prev_tokens_list
        ])
        y_prob = (model.predict(X, batch_size=32))
        last_token_probs = np.array([
            y_prob[b, prev_tokens_lens[b] - 1, :]
            for b in range(y_prob.shape[0])
        ])
        list_prob.append(last_token_probs)

    list_sum = 0
    for a in list_prob:
        list_sum = list_sum + a
    last_token_probs = list_sum / (len(list_prob))
    return dict(zip(id_list, last_token_probs))