def search(decode, state, cand):
    bos_ind = [word_inds[bos]]
    pad_bos = pad_sequences([bos_ind],
                            maxlen=seq_len,
                            padding='post',
                            truncating='post')
    pad_bos = add_buf(pad_bos, sym=False)
    probs = decode.predict([pad_bos, state])[0][0]
    max_probs, max_inds = check(probs, cand, keep_eos=False)
    sent2s, log_sums = [bos] * cand, np.log(max_probs)
    fin_sent2s, fin_logs = list(), list()
    next_words, count = [ind_words[ind] for ind in max_inds], 1
    while cand > 0:
        log_mat, ind_mat = list(), list()
        count = count + 1
        for i in range(cand):
            sent2s[i] = ' '.join([sent2s[i], next_words[i]])
            seq2 = word2ind.texts_to_sequences([sent2s[i]])[0]
            pad_seq2 = pad_sequences([seq2],
                                     maxlen=seq_len,
                                     padding='post',
                                     truncating='post')
            pad_seq2 = add_buf(pad_seq2, sym=False)
            step = min(count - 1, seq_len - 1)
            probs = decode.predict([pad_seq2, state])[0][step]
            max_probs, max_inds = check(probs, cand, keep_eos=True)
            max_logs = np.log(max_probs) + log_sums[i]
            log_mat.append(max_logs)
            ind_mat.append(max_inds)
        max_logs = -np.sort(-np.array(log_mat), axis=None)[:cand]
        next_sent2s, next_words, log_sums = list(), list(), list()
        for log in max_logs:
            args = np.where(log_mat == log)
            sent_arg, ind_arg = int(args[0][0]), int(args[1][0])
            next_word = ind_words[ind_mat[sent_arg][ind_arg]]
            if next_word != eos and count < max_len:
                next_words.append(next_word)
                next_sent2s.append(sent2s[sent_arg])
                log_sums.append(log)
            else:
                cand = cand - 1
                fin_sent2s.append(sent2s[sent_arg])
                fin_logs.append(log / count)
        sent2s = next_sent2s
    max_arg = np.argmax(np.array(fin_logs))
    return fin_sent2s[max_arg][2:]
def predict(text, name, mode):
    sent1 = clean(text)
    seq1 = word2ind.texts_to_sequences([sent1])[0]
    pad_seq1 = pad_sequences([seq1],
                             maxlen=seq_len,
                             padding='pre',
                             truncating='pre')
    pad_seq1 = add_buf(pad_seq1, sym=True)
    encode = map_item(name + '_encode', models)
    state = encode.predict(pad_seq1)
    decode = map_item(name + '_decode', models)
    func = map_item(mode, funcs)
    return func(decode, state, cand=3)
def predict(text, name):
    seq = word2ind.texts_to_sequences([text])[0]
    pad_seq = pad_sequences([seq], maxlen=seq_len)
    if name == 'cnn':
        pad_seq = add_buf(pad_seq)
    model = map_item(name, models)
    probs = model.predict(pad_seq)[0]
    bound = min(len(text), seq_len)
    preds = np.argmax(probs, axis=1)[-bound:]
    if __name__ == '__main__':
        pairs = list()
        for word, pred in zip(text, preds):
            pairs.append((word, ind_labels[pred]))
        return pairs
    else:
        return preds
def sample(decode, state, cand):
    sent2 = bos
    next_word, count = '', 0
    while next_word != eos and count < max_len:
        count = count + 1
        sent2 = ' '.join([sent2, next_word])
        seq2 = word2ind.texts_to_sequences([sent2])[0]
        pad_seq2 = pad_sequences([seq2],
                                 maxlen=seq_len,
                                 padding='post',
                                 truncating='post')
        pad_seq2 = add_buf(pad_seq2, sym=False)
        step = min(count - 1, seq_len - 1)
        probs = decode.predict([pad_seq2, state])[0][step]
        max_probs, max_inds = check(probs, cand, keep_eos=True)
        if max_inds[0] == word_inds[eos]:
            next_word = eos
        else:
            max_probs = max_probs / np.sum(max_probs)
            next_word = ind_words[choice(max_inds, p=max_probs)]
    return sent2[3:]