def train(): # load training dataset (6K) filename = 'Flickr_8k.trainImages.txt' train = util.load_ids(filename) print('Dataset: %d' % len(train)) train_captions = util.load_clean_captions('descriptions.txt', train) print('Captions: train number=%d' % len(train_captions)) # photo features train_features = util.load_photo_features('features.pkl', train) print('Photos: train=%d' % len(train_features)) # prepare tokenizer tokenizer = load(open('tokenizer.pkl', 'rb')) vocab_size = len(tokenizer.word_index) + 1 print('Vocabulary Size: %d' % vocab_size) # determine the maximum sequence length max_len = util.get_max_length(train_captions) print('Description Length: %d' % max_len) # define the model model = caption_model(vocab_size, max_len) # train the model, run epochs manually and save after each epoch epochs = 20 steps = len(train_captions) for i in range(epochs): # create the data generator generator = data_generator(train_captions, train_features, tokenizer, max_len) # fit for one epoch model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1) # save model model.save('model_' + str(i) + '.h5')
def generate_caption_run(): image_file = 'Flickr_8k.testImages.txt' ids = util.load_ids(image_file) features = util.load_photo_features('features.pkl', ids) nlp_model = load_model('model_19.h5') tokenizer = load(open('tokenizer.pkl', 'rb')) caption = generate_caption(nlp_model, tokenizer, features['3596131692_91b8a05606'], 40) print('+++++++++++ caption is: ', caption, '+++++++++++++++++++')
def evaluate_check(): filename = 'Flickr_8k.testImages.txt' test = util.load_ids(filename) test_features = util.load_photo_features('features3.pkl', test) print("Photos: test=%d" % len(test_features)) # load the model model_name = 'model_1.h5' model = load_model(model_name) tokenizer = load(open('tokenizer.pkl', 'rb')) captions = util.load_clean_captions('descriptions.txt', test) evaluate_model(model, captions, test_features, tokenizer)
def evaluate_model_run(): model = load_model('model_19.h5') filename = 'Flickr_8k.testImages.txt' test = util.load_ids(filename) # test play as "index" role, just from description.txt and featute.pkl to # load the special info which define in "index" test_caption = util.load_clean_captions('descriptions.txt', test) test_features = util.load_photo_features('features.pkl', test) tokenizer = load(open('tokenizer.pkl', 'rb')) bleu1, bleu2, bleu3, bleu4 = evaluate_model(model, test_caption, test_features, tokenizer) print('BLEU-1: %f' % bleu1) print('BLEU-2: %f' % bleu2) print('BLEU-3: %f' % bleu3) print('BLEU-4: %f' % bleu4)
def train(): # load training dataset (6K) filename = 'Flickr_8k.trainImages.txt' train = util.load_ids(filename) print('Dataset: %d' % len(train)) train_captions = util.load_clean_captions('descriptions.txt', train) print('Captions: train number=%d' % len(train_captions)) # photo features train_features = util.load_photo_features('features.pkl', train) print('Photos: train=%d' % len(train_features)) # prepare tokenizer tokenizer = load(open('tokenizer.pkl', 'rb')) vocab_size = len(tokenizer.word_index) + 1 print('Vocabulary Size: %d' % vocab_size) # determine the maximum sequence length max_len = util.get_max_length(train_captions) print('Description Length: %d' % max_len) # define the model model = caption_model(vocab_size, max_len) # train the model, run epochs manually and save after each epoch epochs = 5 steps = len(train_captions) for i in range(epochs): # create the data generator generator = data_generator(train_captions, train_features, tokenizer, max_len) # fit for one epoch # generator just return two dimenstion data, the first means X, which has two data # first is the featur of the pic, second is the surfix words; second means Y, the # word of predict for Next. # At first I don't kown why generator will return three value(feature, surfix, the next word) # but the model just has two input, later I got first tow means X and will go into the model, # the third is means Y, the reponse variance. # Don't need fear the generator will be executed forever, for the super-parameter epoches & steps_per_epoch # has limited the time of invoking generator, which is epochs * steps_per_epoch # Generator, is magical! model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1) # save model model.save('model' + os.sep + 'model_my' + str(i) + '.h5')
def generate_caption_run(): # load test set filename = "Flickr_8k.testImages.txt" test = util.load_ids(filename) # photo features test_features = util.load_photo_features('features.pkl', test) print('Photos: test=%d' % len(test_features)) # load the model filename = 'model_19.h5' model = load_model(filename) tokenizer = load(open('tokenizer.pkl', 'rb')) caption = generate_caption(model, tokenizer, test_features['3596131692_91b8a05606'], 40) print('Generated caption is:' + caption) return caption
def evalueateModel(img_path): # load training dataset (6K) filename = 'Flickr_8k.testImages.txt' test = util.load_ids(filename) # photo features test_features = util.load_photo_features('features.pkl', test) print("Photos :test =%d" % len(test_features)) model_name = 'model_8.h5' model = load_model(model_name) tokenizer = load(open('tokenizer.pkl', 'rb')) expected = generate_caption( model, tokenizer, test_features[img_path.split("/")[-1].split(".")[0]], 40) img = Image.open(img_path) plt.imshow(img) plt.xlabel(expected[9:][:-6], fontsize=15, color='red') plt.show() return expected
def evaluate_model_run(): # load test set filename = 'Flickr_8k.testImages.txt' test = util.load_ids(filename) print('number of test images: %d' % len(test)) test_captions = util.load_clean_captions('descriptions.txt', test) # photo features test_features = util.load_photo_features('features.pkl', test) # load the model filename = 'model_19.h5' model = load_model(filename) tokenizer = load(open('tokenizer.pkl', 'rb')) # evaluate model bleu1, bleu2, bleu3, bleu4 = evaluate_model(model, test_captions, test_features, tokenizer, 40) print('BLEU-1: %f' % bleu1) print('BLEU-2: %f' % bleu2) print('BLEU-3: %f' % bleu3) print('BLEU-4: %f' % bleu4)
def generate_caption_check(img_path): filename = 'Flickr_8k.testImages.txt' test = util.load_ids(filename) test_features = util.load_photo_features('features3.pkl', test) print("Photos: test=%d" % len(test_features)) # load the model model_name = 'model_1.h5' model = load_model(model_name) tokenizer = load(open('tokenizer.pkl', 'rb')) expected = generate_caption( model, tokenizer, test_features[img_path.split("/")[1].split(".")[0]], 40) img = Image.open(img_path) plt.imshow(img) plt.xlabel(expected[9:-6], fontsize=15, color="red") #plt.xlabel("WoW, You are so beautiful.", fontsize=15, color="red") plt.show() return expected
# doc = nlp(word) # embedding_vector = np.array(doc.vector) # embedding_matrix[index] = embedding_vector # # # adding embeddings to model # predictive_model.layers[2] # predictive_model.layers[2].set_weights([embedding_matrix]) # predictive_model.layers[2].trainable = False pass if __name__ == '__main__': # add_weights() filename = 'Flickr_8k.trainImages.txt' train = util.load_ids(filename) # 返回了一个{},包含了文件名(去除.jpg) des_path = r'E:/AI资源计算机视觉/JM07 - TXXY - CV2期/02.资料/homework-master-7fc833414b95225130c323c278230bc388af5c6b/homework1/task5/descriptions.txt' train_captions = util.load_clean_captions(des_path, train) # print('Captions: train number=%d' % len(train_captions)) # print(train_captions["3227594168_3351722aae"]) # ['startseq Two blonde ladies wearing sunglasses lounge on the grass with a dacshund . endseq', # 'startseq Two blonde young women hang out in the grass with a brown dog . endseq', # 'startseq Two blond women sit in grass with a small dog . endseq', # 'startseq Two women laying on grass with a dog . endseq', # 'startseq Two women on a grassy hill lit by the sun ; one looks at a dachshund , the other looks to the side . endseq'] # photo features feature_path = r"E:/AI资源计算机视觉/JM07 - TXXY - CV2期/02.资料/homework-master-7fc833414b95225130c323c278230bc388af5c6b/homework1/features.pkl" train_features = util.load_photo_features(feature_path, train) # print('Photos: train=%d' % len(train_features)) # print(len(train_features["3585117340_73e96b6173"][0])) # 4096