def generate_sonnet_rhyme(num_states): hmm = train.load(num_states, is_reversed=True) text = open('data/shakespeare.txt').read() obs, vocab, inv_vocab = preprocess.get_observations(text) rhyme_dict = preprocess.build_rhyme_dict(text, vocab) lengths = preprocess.get_lengths(obs) punctuation = preprocess.get_punctuation(text) samples = [None] * 14 for quatrain in range(3): for line in range(2): couplet = random.sample(random.choice(rhyme_dict), 2) length = np.random.choice(list(lengths.keys()), p=list(lengths.values())) samples[quatrain * 4 + line] = generate_emission_seeded( hmm, length, couplet[0]) samples[quatrain * 4 + line + 2] = generate_emission_seeded( hmm, length, couplet[1]) couplet = random.sample(random.choice(rhyme_dict), 2) samples[12] = generate_emission_seeded(hmm, length, couplet[0]) samples[13] = generate_emission_seeded(hmm, length, couplet[1]) for i in range(len(samples)): samples[i].reverse() return format_sonnet(samples, inv_vocab, punctuation)
def predict_BERT(para, feature="radical", use_bert=True): para['tag_num'] = len(tags) para['rad_vocab_size'] = len(rad2id.keys()) + 1 para['radical_vocab_size'] = len(radical2id.keys()) + 1 para['pinyin_vocab_size'] = len(pinyin2id.keys()) + 1 para["word_num"] = len(word2id.keys()) + 1 para["fea_embed"] = None bool_x_test = creat_bool_x(test_x) if "img" in feature: para["img_embed_weight"] = img_embed para["is_trainable"] = False model = ModelLib.FGN(para, feature=feature, use_bert=use_bert) # else: # model = ModelLib.NORMAL_MODEL(para,feature=feature) model.load_weights(filepath=para["model_path"]) if feature == "": pred_y = model.predict([x1_test, x2_test], batch_size=64, verbose=1) elif feature == "radical": pred_y = model.predict([x1_test, x2_test, radical_train], batch_size=64, verbose=1) elif feature == "pinyin": pred_y = model.predict([x1_test, x2_test, pinyin_train], batch_size=64, verbose=1) elif feature == "img": pred_y = model.predict([x1_test, x2_test, test_x, bool_x_test], batch_size=64, verbose=1) elif feature == "img&radical": pred_y = model.predict( [x1_test, x2_test, test_x, bool_x_test, radical_test], batch_size=64, verbose=1) lengths = get_lengths(x1_test) tag_pred_y = [] tag_val_y = [] for i, y in enumerate(pred_y): y = [numpy.argmax(dim) for dim in y] print(lengths[i]) p_y = y[:lengths[i]] print(p_y) v_y = y_test[i][:lengths[i]].flatten() print(v_y) p_y = [tags[dim] for dim in p_y] v_y = [tags[dim] for dim in v_y] tag_pred_y.append(p_y) tag_val_y.append(v_y) return tag_pred_y, tag_val_y
def generate_sonnet(num_states): hmm = train.load(num_states) text = open('data/shakespeare.txt').read() obs, vocab, inv_vocab = preprocess.get_observations(text) lengths = preprocess.get_lengths(obs) punctuation = preprocess.get_punctuation(text) samples = [] for _ in range(14): length = np.random.choice(list(lengths.keys()), p=list(lengths.values())) samples.append(generate_emission(hmm, length)) return format_sonnet(samples, inv_vocab, punctuation)
def result_proess(pred_y, x1_test, tags): lengths = get_lengths(x1_test) tag_pred_y = [] tag_val_y = [] for i, y in enumerate(pred_y): y = [numpy.argmax(dim) for dim in y] # print(lengths[i]) p_y = y[:lengths[i]] # print(p_y) v_y = y_test[i][:lengths[i]].flatten() # print(v_y) p_y = [tags[dim] for dim in p_y] v_y = [tags[dim] for dim in v_y] tag_pred_y.append(p_y) tag_val_y.append(v_y) return tag_pred_y, tag_val_y
def predict_bert(para): para['tag_num'] = len(tags) model = ModelLib.BERT_MODEL(para) model.load_weights(filepath=para["model_path"]) bert_val =load_path_bert(para["test_path"],sep=para["sep"]) lengths = get_lengths(val_x) pred_y = model.predict(bert_val) tag_pred_y = [] tag_val_y = [] for i, y in enumerate(pred_y): y = [numpy.argmax(dim) for dim in y] print(lengths[i]) p_y = y[:lengths[i]] print(p_y) v_y = val_y[i][:lengths[i]].flatten() print(v_y) p_y = [tags[dim] for dim in p_y] v_y = [tags[dim] for dim in v_y] tag_pred_y.append(p_y) tag_val_y.append(v_y) return tag_pred_y,tag_val_y