class Generator: def __init__(self, folder_image, folder_label): self.folder_image = folder_image self.folder_label = folder_label self.batch_size = BATCH_SIZE self.max_txt_length = max_txt_length self.examples = [] self.cur_index = 0 self.load_data() self.image_util = ImageUtil(image_height=image_height, image_width=image_width) self.vocab = Vocabulary() def load_data(self): with open(self.folder_label, 'r') as f: for line in f.readlines(): if ';' in line: image_file, txt = line.split(sep=';', maxsplit=1) image_file = os.path.abspath(os.path.join(self.folder_image, image_file)) txt = txt.strip() if os.path.isfile(image_file): self.examples.append((txt, image_file)) def examples_generator(self): random.shuffle(self.examples) while True: images, target, encode_hidden = [], [], [] for i in range(self.batch_size): self.cur_index += 1 if self.cur_index >= len(self.examples): self.cur_index = 0 txt, img_path = self.examples[self.cur_index] images.append(self.image_util.load(img_path)) target.append(self.vocab.one_hot_encode(txt)) # print(self.vocab.text_to_labels(txt)) # print(self.vocab.labels_to_text(target[0])) yield np.array(images), np.array(target)
def visual_attention(result, attention_plot): len_result = len(result) for i in range(len_result): show_origin_image() temp_att = np.reshape(attention_plot[i], (height, width)) cv2.imshow(f'predict word: {result[i]}', temp_att) cv2.waitKey() cv2.destroyAllWindows() if __name__ == '__main__': image_util = ImageUtil(image_height=image_height, image_width=image_width) img_tensor = image_util.load(args.image) img_tensor = np.expand_dims(img_tensor, 0) result = '' hidden = tf.zeros((1, decode_units)) word_one_hot = np.zeros((1, vocab_size)) word_one_hot[0][1] = 1. attention_plot = np.zeros((max_txt_length, height * width)) for i in range(max_txt_length): predict, hidden, attention_weights = model(word_one_hot, hidden, img_tensor) predict_id = tf.argmax(predict, axis=-1) attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()