def search(decode, state, en_sent, cand):
    zh_pad_bos = sent2ind([bos], zh_word_inds, seq_len, keep_oov=True)
    zh_word = torch.LongTensor([zh_pad_bos]).to(device)
    prods = decode(zh_word, state, en_sent)[0][0]
    probs = F.softmax(prods, dim=0).numpy()
    max_probs, max_inds = check(probs, cand, keep_eos=False)
    zh_texts, log_sums = [bos] * cand, np.log(max_probs)
    fin_zh_texts, fin_logs = list(), list()
    next_words, count = [zh_ind_words[ind] for ind in max_inds], 1
    while cand > 0:
        log_mat, ind_mat = list(), list()
        count = count + 1
        for i in range(cand):
            zh_texts[i] = zh_texts[i] + next_words[i]
            zh_pad_seq = sent2ind(zh_texts[i],
                                  zh_word_inds,
                                  seq_len,
                                  keep_oov=True)
            zh_sent = torch.LongTensor([zh_pad_seq]).to(device)
            step = min(count - 1, seq_len - 1)
            prods = decode(zh_sent, state, en_sent)[0][step]
            probs = F.softmax(prods, dim=0).numpy()
            max_probs, max_inds = check(probs, cand, keep_eos=True)
            max_logs = np.log(max_probs) + log_sums[i]
            log_mat.append(max_logs)
            ind_mat.append(max_inds)
        max_logs = -np.sort(-np.array(log_mat), axis=None)[:cand]
        next_zh_texts, next_words, log_sums = list(), list(), list()
        for log in max_logs:
            args = np.where(log_mat == log)
            sent_arg, ind_arg = int(args[0][0]), int(args[1][0])
            next_word = zh_ind_words[ind_mat[sent_arg][ind_arg]]
            if next_word != eos and count < max_len:
                next_words.append(next_word)
                next_zh_texts.append(zh_texts[sent_arg])
                log_sums.append(log)
            else:
                cand = cand - 1
                fin_zh_texts.append(zh_texts[sent_arg])
                fin_logs.append(log / count)
        zh_texts = next_zh_texts
    max_arg = np.argmax(np.array(fin_logs))
    return fin_zh_texts[max_arg][1:]
def predict(text, name):
    en_text = clean(text, 'en')
    en_words = en_text.split()
    en_pad_seq = sent2ind(en_words, en_word_inds, seq_len, keep_oov=True)
    en_sent = torch.LongTensor([en_pad_seq]).to(device)
    encode = map_item(name + '_encode', models)
    decode = map_item(name + '_decode', models)
    with torch.no_grad():
        encode.eval()
        state = encode(en_sent)
        decode.eval()
        return search(decode, state, en_sent, cand=3)
Exemple #3
0
def predict(text, name):
    en_text = clean(text, 'en')
    en_text = ' '.join([en_text, eos])
    en_words = en_text.split()
    en_pad_seq = sent2ind(en_words, en_word_inds, seq_len, 'pre', keep_oov=True)
    en_sent = torch.LongTensor([en_pad_seq]).to(device)
    encode = map_item(name + '_encode', models)
    decode = map_item(name + '_decode', models)
    with torch.no_grad():
        encode.eval()
        state = encode(en_sent)
        decode.eval()
        zh_pred = search(decode, state, cand=3)
        if name == 'att' and __name__ == '__main__':
            zh_text = bos + zh_pred
            zh_pad_seq = sent2ind(zh_text, zh_word_inds, seq_len, 'post', keep_oov=True)
            zh_sent = torch.LongTensor([zh_pad_seq]).to(device)
            core = map_item(name + '_core', models)
            atts = core(zh_sent, state)[0]
            plot_att(en_words[:-1], zh_text[1:] + eos, atts)
        return zh_pred
def predict(text, name):
    text1 = clean(text)
    text1 = ' '.join([text1, eos])
    word1s = text1.split()
    pad_seq1 = sent2ind(word1s, word_inds, seq_len1, 'pre', keep_oov=True)
    sent1 = torch.LongTensor([pad_seq1]).to(device)
    encode = map_item(name + '_encode', models)
    decode = map_item(name + '_decode', models)
    with torch.no_grad():
        encode.eval()
        state = encode(sent1)
        decode.eval()
        pred = search(decode, state, text1, cand=3)
        if __name__ == '__main__':
            text2 = ' '.join([bos, pred])
            word2s = text2.split()
            pad_seq2 = sent2ind(word2s, word_inds, seq_len2, 'post', keep_oov=True)
            sent2 = torch.LongTensor([pad_seq2]).to(device)
            core = map_item(name + '_core', models)
            ptrs = core(sent2, state)[0]
            plot_ptr(word1s[:-1], word2s[1:] + [eos], ptrs)
        return pred
def search(decode, state, text1, cand):
    pad_bos = sent2ind([bos], word_inds, seq_len2, 'post', keep_oov=True)
    word2 = torch.LongTensor([pad_bos]).to(device)
    probs = decode(word2, state)[0][0].numpy()
    max_probs, max_inds = check(probs, cand, keep_eos=False)
    text2s, log_sums = [bos] * cand, np.log(max_probs)
    fin_text2s, fin_logs = list(), list()
    next_words, count = [switch(ind, text1, vocab_num) for ind in max_inds], 1
    while cand > 0:
        log_mat, ind_mat = list(), list()
        count = count + 1
        for i in range(cand):
            text2s[i] = ' '.join([text2s[i], next_words[i]])
            pad_seq2 = sent2ind(text2s[i], word_inds, seq_len2, 'post', keep_oov=True)
            sent2 = torch.LongTensor([pad_seq2]).to(device)
            step = min(count - 1, seq_len2 - 1)
            probs = decode(sent2, state)[0][step].numpy()
            max_probs, max_inds = check(probs, cand, keep_eos=True)
            max_logs = np.log(max_probs) + log_sums[i]
            log_mat.append(max_logs)
            ind_mat.append(max_inds)
        max_logs = -np.sort(-np.array(log_mat), axis=None)[:cand]
        next_text2s, next_words, log_sums = list(), list(), list()
        for log in max_logs:
            args = np.where(log_mat == log)
            sent_arg, ind_arg = int(args[0][0]), int(args[1][0])
            next_word = switch(ind_mat[sent_arg][ind_arg], text1, vocab_num)
            if next_word != eos and count < max_len:
                next_words.append(next_word)
                next_text2s.append(text2s[sent_arg])
                log_sums.append(log)
            else:
                cand = cand - 1
                fin_text2s.append(text2s[sent_arg])
                fin_logs.append(log / count)
        text2s = next_text2s
    max_arg = np.argmax(np.array(fin_logs))
    return fin_text2s[max_arg][1:]
Exemple #6
0
def predict(text, name):
    text = clean(text)
    pad_seq = sent2ind(text, word_inds, seq_len, keep_oov=True)
    sent = torch.LongTensor([pad_seq]).to(device)
    model = map_item(name, models)
    with torch.no_grad():
        model.eval()
        probs = F.softmax(model(sent), dim=1)
    probs = probs.numpy()[0]
    sort_probs = sorted(probs, reverse=True)
    sort_inds = np.argsort(-probs)
    sort_preds = [ind_labels[ind] for ind in sort_inds]
    formats = list()
    for pred, prob in zip(sort_preds, sort_probs):
        formats.append('{} {:.3f}'.format(pred, prob))
    return ', '.join(formats)
def predict(text, name, thre):
    pad_seq = sent2ind(text, word_inds, seq_len, keep_oov=True)
    sent = torch.LongTensor([pad_seq]).to(device)
    model = map_item(name, models)
    with torch.no_grad():
        model.eval()
        probs = torch.sigmoid(model(sent))
    probs = probs.numpy()[0]
    probs = np.squeeze(probs, axis=-1)
    preds = probs > thre
    bound = min(len(text), seq_len)
    mask_preds = preds[-bound:]
    cands = list()
    for word, pred in zip(text, mask_preds):
        cands.append(word)
        if pred:
            cands.append(' ')
    return ''.join(cands)