def train(): # load training dataset (6K) filename = 'Flickr_8k.trainImages.txt' train = util.load_ids(filename) print('Dataset: %d' % len(train)) train_captions = util.load_clean_captions('descriptions.txt', train) print('Captions: train number=%d' % len(train_captions)) # photo features train_features = util.load_photo_features('features.pkl', train) print('Photos: train=%d' % len(train_features)) # prepare tokenizer tokenizer = load(open('tokenizer.pkl', 'rb')) vocab_size = len(tokenizer.word_index) + 1 print('Vocabulary Size: %d' % vocab_size) # determine the maximum sequence length max_len = util.get_max_length(train_captions) print('Description Length: %d' % max_len) # define the model model = caption_model(vocab_size, max_len) # train the model, run epochs manually and save after each epoch epochs = 20 steps = len(train_captions) for i in range(epochs): # create the data generator generator = data_generator(train_captions, train_features, tokenizer, max_len) # fit for one epoch model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1) # save model model.save('model_' + str(i) + '.h5')
def evaluate_model_run(): model = load_model('model_19.h5') filename = 'Flickr_8k.testImages.txt' test = util.load_ids(filename) # test play as "index" role, just from description.txt and featute.pkl to # load the special info which define in "index" test_caption = util.load_clean_captions('descriptions.txt', test) test_features = util.load_photo_features('features.pkl', test) tokenizer = load(open('tokenizer.pkl', 'rb')) bleu1, bleu2, bleu3, bleu4 = evaluate_model(model, test_caption, test_features, tokenizer) print('BLEU-1: %f' % bleu1) print('BLEU-2: %f' % bleu2) print('BLEU-3: %f' % bleu3) print('BLEU-4: %f' % bleu4)
def evaluate_check(): filename = 'Flickr_8k.testImages.txt' test = util.load_ids(filename) test_features = util.load_photo_features('features3.pkl', test) print("Photos: test=%d" % len(test_features)) # load the model model_name = 'model_1.h5' model = load_model(model_name) tokenizer = load(open('tokenizer.pkl', 'rb')) captions = util.load_clean_captions('descriptions.txt', test) evaluate_model(model, captions, test_features, tokenizer)
def train(): # load training dataset (6K) filename = 'Flickr_8k.trainImages.txt' train = util.load_ids(filename) print('Dataset: %d' % len(train)) train_captions = util.load_clean_captions('descriptions.txt', train) print('Captions: train number=%d' % len(train_captions)) # photo features train_features = util.load_photo_features('features.pkl', train) print('Photos: train=%d' % len(train_features)) # prepare tokenizer tokenizer = load(open('tokenizer.pkl', 'rb')) vocab_size = len(tokenizer.word_index) + 1 print('Vocabulary Size: %d' % vocab_size) # determine the maximum sequence length max_len = util.get_max_length(train_captions) print('Description Length: %d' % max_len) # define the model model = caption_model(vocab_size, max_len) # train the model, run epochs manually and save after each epoch epochs = 5 steps = len(train_captions) for i in range(epochs): # create the data generator generator = data_generator(train_captions, train_features, tokenizer, max_len) # fit for one epoch # generator just return two dimenstion data, the first means X, which has two data # first is the featur of the pic, second is the surfix words; second means Y, the # word of predict for Next. # At first I don't kown why generator will return three value(feature, surfix, the next word) # but the model just has two input, later I got first tow means X and will go into the model, # the third is means Y, the reponse variance. # Don't need fear the generator will be executed forever, for the super-parameter epoches & steps_per_epoch # has limited the time of invoking generator, which is epochs * steps_per_epoch # Generator, is magical! model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1) # save model model.save('model' + os.sep + 'model_my' + str(i) + '.h5')
def create_tokenizer(): """ 根据训练数据集中图像名,和其对应的标题,生成一个tokenizer,作为LSTM的输入/输出必须是数字,所以需要我们使用 字典数据类型来存储文字和数字对应关系。 :return: 生成的tokenizer https://keras-cn.readthedocs.io/en/latest/legacy/preprocessing/text/#tokenizer """ train_image_names = util.load_image_names('{}{}{}'.format(current_path, os.sep, 'Flickr_8k.trainImages.txt')) description_path = '{}{}{}'.format(current_path, os.sep, 'descriptions.txt') train_descriptions = util.load_clean_captions(description_path, train_image_names) lines = util.to_list(train_descriptions) tokenizer = Tokenizer() tokenizer.fit_on_texts(lines) return tokenizer
def evaluate_model_run(model_name): # load test set filename = '../Flickr8k_text/Flickr_8k.testImages.txt' # test = util.load_ids(filename) test = task3.load_image_names(filename) test_captions = util.load_clean_captions('../task3/descriptions.txt', test) # photo feaatures test_features = util.load_photo_features('../task2/features.pkl', test) print('Photos: test=%d' % len(test_features)) # load the model model = load_model(model_name) tokenizer = load(open('../task3/tokenizer.pkl', 'rb')) print(evaluate_model(model, test_captions, test_features, tokenizer))
def evaluate_model_run(): # load test set filename = 'Flickr_8k.testImages.txt' test = util.load_ids(filename) print('number of test images: %d' % len(test)) test_captions = util.load_clean_captions('descriptions.txt', test) # photo features test_features = util.load_photo_features('features.pkl', test) # load the model filename = 'model_19.h5' model = load_model(filename) tokenizer = load(open('tokenizer.pkl', 'rb')) # evaluate model bleu1, bleu2, bleu3, bleu4 = evaluate_model(model, test_captions, test_features, tokenizer, 40) print('BLEU-1: %f' % bleu1) print('BLEU-2: %f' % bleu2) print('BLEU-3: %f' % bleu3) print('BLEU-4: %f' % bleu4)
# # # adding embeddings to model # predictive_model.layers[2] # predictive_model.layers[2].set_weights([embedding_matrix]) # predictive_model.layers[2].trainable = False pass if __name__ == '__main__': # add_weights() filename = 'Flickr_8k.trainImages.txt' train = util.load_ids(filename) # 返回了一个{},包含了文件名(去除.jpg) des_path = r'E:/AI资源计算机视觉/JM07 - TXXY - CV2期/02.资料/homework-master-7fc833414b95225130c323c278230bc388af5c6b/homework1/task5/descriptions.txt' train_captions = util.load_clean_captions(des_path, train) # print('Captions: train number=%d' % len(train_captions)) # print(train_captions["3227594168_3351722aae"]) # ['startseq Two blonde ladies wearing sunglasses lounge on the grass with a dacshund . endseq', # 'startseq Two blonde young women hang out in the grass with a brown dog . endseq', # 'startseq Two blond women sit in grass with a small dog . endseq', # 'startseq Two women laying on grass with a dog . endseq', # 'startseq Two women on a grassy hill lit by the sun ; one looks at a dachshund , the other looks to the side . endseq'] # photo features feature_path = r"E:/AI资源计算机视觉/JM07 - TXXY - CV2期/02.资料/homework-master-7fc833414b95225130c323c278230bc388af5c6b/homework1/features.pkl" train_features = util.load_photo_features(feature_path, train) # print('Photos: train=%d' % len(train_features)) # print(len(train_features["3585117340_73e96b6173"][0])) # 4096 # prepare tokenizer tokenizer = load(open('tokenizer.pkl', 'rb'))