def evaluate_LSTM(line_tensor): model = LSTM_Model() model.load_state_dict(torch.load('./model/LSTM_model.pkl')) hidden = model.init_hidden() for i in range(line_tensor.size(0)): output, hidden = model(line_tensor[i], hidden) return output
def main(): # create a config object: config = LSTM_Config() # get the pretrained embeddings matrix: GloVe_embeddings = cPickle.load(open("coco/data/embeddings_matrix", "rb")) GloVe_embeddings = GloVe_embeddings.astype(np.float32) # create an LSTM model object: model = LSTM_Model(config, GloVe_embeddings) # create the saver: saver = tf.train.Saver() with tf.Session() as sess: # restore all model variables: params_dir = "weights/model-41" saver.restore( sess, "models/LSTMs/model_keep=0.75_batch=256_hidden_dim=400_embed_dim=300_layers=1/%s" % params_dir) # get the restored W_img and b_img: with tf.variable_scope("img_transform", reuse=True): W_img = tf.get_variable("W_img") b_img = tf.get_variable("b_img") W_img = sess.run(W_img) b_img = sess.run(b_img) transform_params = {} transform_params["W_img"] = W_img transform_params["b_img"] = b_img cPickle.dump( transform_params, open( "coco/data/img_features_attention/transform_params/numpy_params", "wb"))
elif model_type in ["LSTM_attention", "GRU_attention"]: extract_img_features_attention(["coco/images/test/%s" % img_file_name], demo=True) img_features = cPickle.load( open("coco/data/img_features_attention/%d" % -1)) # initialize the model: if model_type == "GRU": config = GRU_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = GRU_Model(config, dummy_embeddings, mode="demo") elif model_type == "LSTM": config = LSTM_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = LSTM_Model(config, dummy_embeddings, mode="demo") elif model_type == "LSTM_attention": config = LSTM_attention_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = LSTM_attention_Model(config, dummy_embeddings, mode="demo") elif model_type == "GRU_attention": config = GRU_attention_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = GRU_attention_Model(config, dummy_embeddings, mode="demo") # create the saver: saver = tf.train.Saver() with tf.Session() as sess:
def evaluate_best_model(model_type, test_img_ids, test_img_id_2_feature_vector, vocabulary, train_captions): # initialize the model: if model_type == "GRU": config = GRU_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = GRU_Model(config, dummy_embeddings, mode="demo") elif model_type == "LSTM": config = LSTM_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = LSTM_Model(config, dummy_embeddings, mode="demo") elif model_type == "LSTM_attention": config = LSTM_attention_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = LSTM_attention_Model(config, dummy_embeddings, mode="demo") elif model_type == "GRU_attention": config = GRU_attention_Config() dummy_embeddings = np.zeros((config.vocab_size, config.embed_dim), dtype=np.float32) model = GRU_attention_Model(config, dummy_embeddings, mode="demo") # create the saver: saver = tf.train.Saver() with tf.Session() as sess: # restore the best model: if model_type == "GRU": saver.restore(sess, "models/GRUs/best_model/model") elif model_type == "LSTM": saver.restore(sess, "models/LSTMs/best_model/model") elif model_type == "LSTM_attention": saver.restore(sess, "models/LSTMs_attention/best_model/model") elif model_type == "GRU_attention": saver.restore(sess, "models/GRUs_attention/best_model/model") captions = [] no_of_new_captions = 0 no_of_old_captions = 0 unique_words = [] for img_id in test_img_ids: #if step % 100 == 0: # print("generating captions on test: %d" % step) # generate a caption for the img: if model_type in ["LSTM", "GRU"]: img_id = int(img_id) img_features = test_img_id_2_feature_vector[img_id] img_caption = model.generate_img_caption(sess, img_features, vocabulary) elif model_type in ["LSTM_attention", "GRU_attention"]: # get the img features from disk: img_features = cPickle.load( open("coco/data/img_features_attention/%d" % img_id, "rb")) # generate a caption: img_caption, attention_maps = model.generate_img_caption(sess, img_features, vocabulary) # save the generated caption together with the img id in the format # expected by the COCO evaluation script: caption_obj = {} caption_obj["image_id"] = img_id caption_obj["caption"] = img_caption captions.append(caption_obj) # check if the generated caption is new or is in train: if img_caption in train_captions: no_of_old_captions += 1 else: no_of_new_captions += 1 # check if there are any words in the caption that he model hasn't # generated before: for word in img_caption.split(" "): if word not in unique_words: unique_words.append(word) # save the captions as a json file (will be used by the eval script): captions_file = "coco/data/test_captions.json" with open(captions_file, "w") as file: json.dump(captions, file, sort_keys=True, indent=4) # evaluate the generated captions: results_dict = evaluate_captions(captions_file) # compute the ratio of new captions: new_captions_ratio = float(no_of_new_captions)/float(no_of_new_captions + no_of_old_captions) # get the number of unique words that the model generated: vocab_size = len(unique_words) print("vocab size is ") print(vocab_size) results_dict["new_captions_ratio"] = new_captions_ratio results_dict["vocab_size"] = vocab_size return results_dict
type=str, required=False, help='Choose a model : RNN, LSTM, GRU') args = parser.parse_args() model_path = './model/' + args.model + '_model.pkl' print(model_path) # 网络中进行了log_softmax 因此计算损失采用NLLLoss criterion = nn.NLLLoss() # 设置学习率 learning_rate = 0.005 if args.model == 'RNN': model = RNN_Model() elif args.model == 'LSTM': model = LSTM_Model() else: model = GRU_Model() if os.path.exists(model_path): model.load_state_dict(torch.load(model_path)) def train_rnn(category_tensor, line_tensor): """ :param category_tensor: 转换后的类别tensor,即目标值(随机抽取的一个) :param line_tensor: 进行one-hot和tensor的封装的一个人名(随机抽取类别中随机抽取的一个人名) :return: """ # 每次训练batch为一个完整的人名长度 line_tensor[len(name), 1, n_letters] # 每次将人名中每个字母放入网络进行训练 line_tensor[i] [1, n_letters]