Esempio n. 1
0
class Generator:
    def __init__(self, folder_image, folder_label):
        self.folder_image = folder_image
        self.folder_label = folder_label
        self.batch_size = BATCH_SIZE
        self.max_txt_length = max_txt_length
        self.examples = []
        self.cur_index = 0
        self.load_data()
        self.image_util = ImageUtil(image_height=image_height, image_width=image_width)
        self.vocab = Vocabulary()

    def load_data(self):
        with open(self.folder_label, 'r') as f:
            for line in f.readlines():
                if ';' in line:
                    image_file, txt = line.split(sep=';', maxsplit=1)
                    image_file = os.path.abspath(os.path.join(self.folder_image, image_file))
                    txt = txt.strip()
                    if os.path.isfile(image_file):
                        self.examples.append((txt, image_file))


    def examples_generator(self):
        random.shuffle(self.examples)
        while True:
            images, target, encode_hidden = [], [], []
            for i in range(self.batch_size):
                self.cur_index += 1
                if self.cur_index >= len(self.examples):
                    self.cur_index = 0


                txt, img_path = self.examples[self.cur_index]

                images.append(self.image_util.load(img_path))
                target.append(self.vocab.one_hot_encode(txt))
                # print(self.vocab.text_to_labels(txt))
                # print(self.vocab.labels_to_text(target[0]))
            yield np.array(images), np.array(target)
Esempio n. 2
0

def visual_attention(result, attention_plot):
    len_result = len(result)
    for i in range(len_result):
        show_origin_image()
        temp_att = np.reshape(attention_plot[i], (height, width))

        cv2.imshow(f'predict word: {result[i]}', temp_att)
        cv2.waitKey()
        cv2.destroyAllWindows()


if __name__ == '__main__':
    image_util = ImageUtil(image_height=image_height, image_width=image_width)
    img_tensor = image_util.load(args.image)
    img_tensor = np.expand_dims(img_tensor, 0)

    result = ''
    hidden = tf.zeros((1, decode_units))
    word_one_hot = np.zeros((1, vocab_size))
    word_one_hot[0][1] = 1.

    attention_plot = np.zeros((max_txt_length, height * width))

    for i in range(max_txt_length):
        predict, hidden, attention_weights = model(word_one_hot, hidden,
                                                   img_tensor)
        predict_id = tf.argmax(predict, axis=-1)

        attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()