def search(decode, state, cand): bos_ind = [word_inds[bos]] pad_bos = pad_sequences([bos_ind], maxlen=seq_len, padding='post', truncating='post') pad_bos = add_buf(pad_bos, sym=False) probs = decode.predict([pad_bos, state])[0][0] max_probs, max_inds = check(probs, cand, keep_eos=False) sent2s, log_sums = [bos] * cand, np.log(max_probs) fin_sent2s, fin_logs = list(), list() next_words, count = [ind_words[ind] for ind in max_inds], 1 while cand > 0: log_mat, ind_mat = list(), list() count = count + 1 for i in range(cand): sent2s[i] = ' '.join([sent2s[i], next_words[i]]) seq2 = word2ind.texts_to_sequences([sent2s[i]])[0] pad_seq2 = pad_sequences([seq2], maxlen=seq_len, padding='post', truncating='post') pad_seq2 = add_buf(pad_seq2, sym=False) step = min(count - 1, seq_len - 1) probs = decode.predict([pad_seq2, state])[0][step] max_probs, max_inds = check(probs, cand, keep_eos=True) max_logs = np.log(max_probs) + log_sums[i] log_mat.append(max_logs) ind_mat.append(max_inds) max_logs = -np.sort(-np.array(log_mat), axis=None)[:cand] next_sent2s, next_words, log_sums = list(), list(), list() for log in max_logs: args = np.where(log_mat == log) sent_arg, ind_arg = int(args[0][0]), int(args[1][0]) next_word = ind_words[ind_mat[sent_arg][ind_arg]] if next_word != eos and count < max_len: next_words.append(next_word) next_sent2s.append(sent2s[sent_arg]) log_sums.append(log) else: cand = cand - 1 fin_sent2s.append(sent2s[sent_arg]) fin_logs.append(log / count) sent2s = next_sent2s max_arg = np.argmax(np.array(fin_logs)) return fin_sent2s[max_arg][2:]
def predict(text, name, mode): sent1 = clean(text) seq1 = word2ind.texts_to_sequences([sent1])[0] pad_seq1 = pad_sequences([seq1], maxlen=seq_len, padding='pre', truncating='pre') pad_seq1 = add_buf(pad_seq1, sym=True) encode = map_item(name + '_encode', models) state = encode.predict(pad_seq1) decode = map_item(name + '_decode', models) func = map_item(mode, funcs) return func(decode, state, cand=3)
def predict(text, name): seq = word2ind.texts_to_sequences([text])[0] pad_seq = pad_sequences([seq], maxlen=seq_len) if name == 'cnn': pad_seq = add_buf(pad_seq) model = map_item(name, models) probs = model.predict(pad_seq)[0] bound = min(len(text), seq_len) preds = np.argmax(probs, axis=1)[-bound:] if __name__ == '__main__': pairs = list() for word, pred in zip(text, preds): pairs.append((word, ind_labels[pred])) return pairs else: return preds
def sample(decode, state, cand): sent2 = bos next_word, count = '', 0 while next_word != eos and count < max_len: count = count + 1 sent2 = ' '.join([sent2, next_word]) seq2 = word2ind.texts_to_sequences([sent2])[0] pad_seq2 = pad_sequences([seq2], maxlen=seq_len, padding='post', truncating='post') pad_seq2 = add_buf(pad_seq2, sym=False) step = min(count - 1, seq_len - 1) probs = decode.predict([pad_seq2, state])[0][step] max_probs, max_inds = check(probs, cand, keep_eos=True) if max_inds[0] == word_inds[eos]: next_word = eos else: max_probs = max_probs / np.sum(max_probs) next_word = ind_words[choice(max_inds, p=max_probs)] return sent2[3:]