def get_training_stats(model_id, model, one_hot_words, one_hot_dictionary): vis_model_id, vis_layer_id, fs = rnn_models.get_vis_model_from_id(model_id) max_feat_len, max_sent_len = rnn_models.get_max_lengths(model_id) ref = {} hypo = {} # Transform sentences into one hot vector sentences with open(sentence_path + 'sents_train_lc_nopunc.txt') as f: lines = f.readlines() # read the lines into an array random.shuffle(lines) IDS = [] X = [] Y = [] for line in lines: IDS.append(line.rstrip().split('\t')[0]) ref[line.rstrip().split('\t')[0]] = [line.rstrip().split('\t')[1],line.rstrip().split('\t')[1]] # need at least two ref for bleu X_sample = np.load(feature_path + vis_model_id + '/' + vis_layer_id + '/npy/' + line.rstrip().split('\t')[0] + '.npy') Y_sample = [] for word in line.rstrip().split('\t')[1].split(): Y_sample.append(one_hot_words[word]) Y_sample.append(one_hot_words['<eos>']) X.append(X_sample) Y.append(Y_sample) X = pad_sequences(X, maxlen=max_feat_len) Y = pad_sequences(Y, maxlen=max_sent_len) loss = model.evaluate(X, Y, batch_size=64, verbose=1) preds = model.predict_classes(X, batch_size=64, verbose=1) print(loss) for i in range(len(IDS)): pred_str = '' for word in preds[i]: if one_hot_dictionary[word] == '<eos>': break pred_str += one_hot_dictionary[word] + ' ' hypo[IDS[i]] = [pred_str] return loss, score(ref, hypo)["METEOR"]
def test(type, model, one_hot_words, one_hot_dictionary): ref = {} hypo = {} # Transform sentences into one hot vector sentences with open(sentence_path + 'sents_'+type+'_lc_nopunc.txt') as f: lines = f.readlines() # read the lines into an array # random.shuffle(lines) IDS = [] X = [] Y = [] for line in lines: IDS.append(line.rstrip().split('\t')[0]) ref[line.rstrip().split('\t')[0]] = [line.rstrip().split('\t')[1],line.rstrip().split('\t')[1]] X_sample = np.load(feature_path + vis_model_id + '/' + vis_layer_id + '/npy/' + line.rstrip().split('\t')[0] + '.npy') Y_sample = [] for word in line.rstrip().split('\t')[1].split(): Y_sample.append(one_hot_words[word]) Y_sample.append(one_hot_words['<eos>']) X.append(X_sample) Y.append(Y_sample) X = pad_sequences(X, maxlen=max_feat_len) Y = pad_sequences(Y, maxlen=max_sent_len) loss = model.evaluate(X, Y, batch_size=64, verbose=1) preds = model.predict_classes(X, batch_size=64, verbose=1) for i in range(len(IDS)): pred_str = '' for word in preds[i]: if one_hot_dictionary[word] == '<eos>': break pred_str += one_hot_dictionary[word] + ' ' hypo[IDS[i]] = [pred_str] return loss, score(ref, hypo), ref, hypo