Python ImageLoader.load_images Exemples, utils.misc.ImageLoader.load_images Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : baseModel.py Projet : audreycui/Show_and_Tell

class BaseModel(object):
    def __init__(self, config):
        self.config = config
        #self.is_train = True
        self.is_train = True if config.phase == 'train' else False
        self.train_cnn = self.is_train and config.train_cnn
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        self.image_shape = [224, 224, 3]
        self.nn = NN(config)
        self.global_step = tf.Variable(0,
                                       name = 'global_step',
                                       trainable = False)

        self.build()

    def get_imagefeatures(self, image_files, batch_size):
        return self.image_loader.extract_features_vgg19(self.trained_model, image_files, batch_size)

    def build(self):
        # use pretrained vgg model to extract image features
        net = VGG19(weights='imagenet')
        self.trained_model = Model(input= net.input, output= net.get_layer('fc2').output)

        self.object_model = self.get_mod()
        self.object_model.load_params('./img2poem/model/object.params')

        self.sentiment_model = self.get_mod(sym = img2poem.symbol_sentiment.get_sym(), img_len = 227)
        self.sentiment_model.load_params('./img2poem/model/Sentiment.params')

        self.scene_model = self.get_mod()
        self.scene_model.load_params('./img2poem/model/scene.params')
        #self.sentiment_model = Model(input= net.input, output= net.get_layer('block3_conv1').output)

    def get_mod(self, output_name = 'relu7_output', sym = None, img_len = 224):
        if sym is None:
            vgg = VGG()
            sym = vgg.get_symbol(num_classes = 1000, 
                      blocks = [(2, 64),
                                (2, 128),
                                (3, 256), 
                                (3, 512),
                                (3, 512)])
            internals = sym.get_internals()
            sym = internals[output_name]
        ctx = mx.cpu()
        mod = mx.module.Module(
                context = ctx,
                symbol = sym,
                data_names = ("data", ),
                label_names = ()
        )

        mod.bind(data_shapes = [("data", (1, 3, 224, 224))], for_training = False)

        return mod

    def train(self, sess, train_data):
        raise NotImplementedError()

    def eval(self, sess, eval_gt_coco, eval_data, vocabulary):
        """ Evaluate the model using the COCO val2014 data. """
        print("Evaluating the model ...")
        config = self.config

        results = []
        if not os.path.exists(config.eval_result_dir):
            os.mkdir(config.eval_result_dir)

        # Generate the captions for the images
        idx = 0
        for k in tqdm(list(range(eval_data.num_batches)), desc='batch'):
        #for k in range(1):
            batch = eval_data.next_batch()
            #caption_data = self.beam_search(sess, batch, vocabulary)
            images = self.image_loader.load_images(batch)
            caption_data, scores = sess.run([self.predictions, self.probs], feed_dict={self.images: images})
            fake_cnt = 0 if k<eval_data.num_batches-1 \
                         else eval_data.fake_count
            for l in range(eval_data.batch_size-fake_cnt):
                ## self.predictions will return the indexes of words, we need to find the corresponding word from it.
                word_idxs = caption_data[l]
                ## get_sentence will return a sentence till there is a end delimiter which is '.'
                caption = str(vocabulary.get_sentence(word_idxs))
                results.append({'image_id': int(eval_data.image_ids[idx]),
                                'caption': caption})
                #print(results)
                idx += 1

                # Save the result in an image file, if requested
                if config.save_eval_result_as_image:
                    image_file = batch[l]
                    image_name = image_file.split(os.sep)[-1]
                    image_name = os.path.splitext(image_name)[0]
                    img = mpimg.imread(image_file)
                    plt.imshow(img)
                    plt.axis('off')
                    plt.title(caption)
                    plt.savefig(os.path.join(config.eval_result_dir,
                                             image_name+'_result.jpg'))

        fp = open(config.eval_result_file, 'w')
        json.dump(results, fp)
        fp.close()

        # Evaluate these captions
        eval_result_coco = eval_gt_coco.loadRes(config.eval_result_file)
        scorer = COCOEvalCap(eval_gt_coco, eval_result_coco)
        scorer.evaluate()
        print("Evaluation complete.")

    def test(self, sess, test_data, vocabulary):
        """ Test the model using any given images. """
        print("Testing the model ...")
        config = self.config

        if not os.path.exists(config.test_result_dir):
            os.mkdir(config.test_result_dir)

        captions = []
        scores = []

        # Generate the captions for the images
        for k in tqdm(list(range(test_data.num_batches)), desc='path'):
            batch = test_data.next_batch()
            images = self.image_loader.load_images(batch)
            caption_data,scores_data = sess.run([self.predictions,self.probs],feed_dict={self.images:images})

            fake_cnt = 0 if k<test_data.num_batches-1 \
                         else test_data.fake_count
            for l in range(test_data.batch_size-fake_cnt):
                ## self.predictions will return the indexes of words, we need to find the corresponding word from it.
                word_idxs = caption_data[l]
                ## get_sentence will return a sentence till there is a end delimiter which is '.'
                caption = vocabulary.get_sentence(word_idxs)
                print(caption)
                captions.append(caption)
                scores.append(scores_data[l])

                # Save the result in an image file
                image_file = batch[l]
                image_name = image_file.split(os.sep)[-1]
                image_name = os.path.splitext(image_name)[0]
                img = mpimg.imread(image_file)
                plt.imshow(img)
                plt.axis('off')
                plt.title(caption)
                plt.savefig(os.path.join(config.test_result_dir,
                                         image_name+'_result.jpg'))

        ##Save the captions to a file
        results = pd.DataFrame({'image_files':test_data.image_files,
                                'caption':captions,
                                'prob':scores})
        results.to_csv(config.test_result_file)
        print("Testing complete.")

    def save(self):
        """ Save the model. """
        config = self.config
        data = {v.name: v.eval() for v in tf.global_variables()}
        save_path = os.path.join(config.save_dir, str(self.global_step.eval()))

        print((" Saving the model to %s..." % (save_path+".npy")))
        np.save(save_path, data)
        info_file = open(os.path.join(config.save_dir, "config.pickle"), "wb")
        config_ = copy.copy(config)
        config_.global_step = self.global_step.eval()
        pickle.dump(config_, info_file)
        info_file.close()
        print("Model saved.")

    def load(self, sess, model_file=None):
        """ Load the model. """
        config = self.config
        if model_file is not None:
            save_path = model_file
        else:
            info_path = os.path.join(config.save_dir, "config.pickle")
            info_file = open(info_path, "rb")
            config = pickle.load(info_file)
            global_step = config.global_step
            info_file.close()
            save_path = os.path.join(config.save_dir,
                                     str(global_step)+".npy")

        print("Loading the model from %s..." %save_path)
        data_dict = np.load(save_path).item()
        count = 0
        for v in tqdm(tf.global_variables()):
            if v.name in data_dict.keys():
                sess.run(v.assign(data_dict[v.name]))
                count += 1
        print("%d tensors loaded." %count)

    def load_cnn(self, session, data_path, ignore_missing=True):
        """ Load a pretrained CNN model. """
        print("All variables present...")
        for var in tf.all_variables():
            print(var)
        with tf.variable_scope('conv1_1',reuse = True):
            kernel = tf.get_variable('conv1_1_W')

        print("Loading the CNN from %s..." %data_path)
        data_dict = np.load(data_path,encoding='latin1')
        count = 0
        for param_name in tqdm(data_dict.keys()):
            op_name = param_name[:-2]
            print(param_name)
            #print(op_name)
            with tf.variable_scope(op_name, reuse = True):
                try:
                    var = tf.get_variable(param_name)
                    session.run(var.assign(data_dict[param_name]))
                    count += 1
                except ValueError:
                    print("No such variable")
                    pass

        print("%d tensors loaded." %count)

Exemple #2

0

Afficher le fichier

class BaseModel(object):
    def __init__(self, config):
        self.config = config
        self.is_train = True if config.phase == 'train' else False
        self.train_cnn = self.is_train and config.train_cnn
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        self.image_shape = [224, 224, 3]
        self.nn = NN(config)
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.build()

    def build(self):
        raise NotImplementedError()

    def train(self, sess, train_data):
        """ Train the model using the COCO train2014 data. """
        print("Training the model...")
        config = self.config

        if not os.path.exists(config.summary_dir):
            os.mkdir(config.summary_dir)
        train_writer = tf.summary.FileWriter(config.summary_dir, sess.graph)

        for _ in tqdm(list(range(config.num_epochs)), desc='epoch'):
            for _ in tqdm(list(range(train_data.num_batches)), desc='batch'):
                batch = train_data.next_batch()
                image_files, sentences, masks = batch
                images = self.image_loader.load_images(image_files)
                feed_dict = {
                    self.images: images,
                    self.sentences: sentences,
                    self.masks: masks
                }
                _, summary, global_step = sess.run(
                    [self.opt_op, self.summary, self.global_step],
                    feed_dict=feed_dict)
                if (global_step + 1) % config.save_period == 0:
                    self.save()
                train_writer.add_summary(summary, global_step)
            train_data.reset()

        self.save()
        train_writer.close()
        print("Training complete.")

    def eval(self, sess, eval_gt_coco, eval_data, vocabulary):
        """ Evaluate the model using the COCO val2014 data. """
        print("Evaluating the model ...")
        config = self.config

        results = []
        if not os.path.exists(config.eval_result_dir):
            os.mkdir(config.eval_result_dir)

        # Generate the captions for the images
        idx = 0
        for k in tqdm(list(range(eval_data.num_batches)), desc='batch'):
            batch = eval_data.next_batch()
            caption_data = self.beam_search(sess, batch, vocabulary)

            fake_cnt = 0 if k<eval_data.num_batches-1 \
                         else eval_data.fake_count
            for l in range(eval_data.batch_size - fake_cnt):
                word_idxs = caption_data[l][0].sentence
                score = caption_data[l][0].score
                caption = vocabulary.get_sentence(word_idxs)
                results.append({
                    'image_id': int(eval_data.image_ids[idx]),
                    'caption': caption
                })
                idx += 1

                # Save the result in an image file, if requested
                if config.save_eval_result_as_image:
                    image_file = batch[l]
                    image_name = image_file.split(os.sep)[-1]
                    image_name = os.path.splitext(image_name)[0]
                    img = plt.imread(image_file)
                    plt.imshow(img)
                    plt.axis('off')
                    plt.title(caption)
                    plt.savefig(
                        os.path.join(config.eval_result_dir,
                                     image_name + '_result.jpg'))

        fp = open(config.eval_result_file, 'w')
        json.dump(results, fp)
        fp.close()

        # Evaluate these captions
        eval_result_coco = eval_gt_coco.loadRes(config.eval_result_file)
        scorer = COCOEvalCap(eval_gt_coco, eval_result_coco)
        scorer.evaluate()
        print("Evaluation complete.")

    def test(self, sess, test_data, vocabulary):
        """ Test the model using any given images. """
        print("Testing the model ...")
        config = self.config

        if not os.path.exists(config.test_result_dir):
            os.mkdir(config.test_result_dir)

        captions = []
        scores = []

        # Generate the captions for the images
        for k in tqdm(list(range(test_data.num_batches)), desc='path'):
            batch = test_data.next_batch()
            caption_data = self.beam_search(sess, batch, vocabulary)

            fake_cnt = 0 if k<test_data.num_batches-1 \
                         else test_data.fake_count
            for l in range(test_data.batch_size - fake_cnt):
                word_idxs = caption_data[l][0].sentence
                score = caption_data[l][0].score
                caption = vocabulary.get_sentence(word_idxs)
                captions.append(caption)
                scores.append(score)

                # Save the result in an image file
                image_file = batch[l]

                # image_name = image_file.split(os.sep)[-1]
                # image_name = os.path.splitext(image_name)[0]

                image_name = os.path.basename(image_file)
                image_name = os.path.splitext(image_name)[0]

                img = plt.imread(image_file)
                plt.imshow(img)
                plt.axis('off')
                plt.title(caption)
                plt.savefig(
                    os.path.join(config.test_result_dir,
                                 image_name + '_result.jpg'))

        # Save the captions to a file
        results = pd.DataFrame({
            'image_files': test_data.image_files,
            'caption': captions,
            'prob': scores
        })
        results.to_csv(config.test_result_file)
        print("Testing complete.")

    def beam_search(self, sess, image_files, vocabulary):
        """Use beam search to generate the captions for a batch of images."""
        # Feed in the images to get the contexts and the initial LSTM states
        config = self.config
        images = self.image_loader.load_images(image_files)
        contexts, initial_memory, initial_output = sess.run(
            [self.conv_feats, self.initial_memory, self.initial_output],
            feed_dict={self.images: images})

        partial_caption_data = []
        complete_caption_data = []
        for k in range(config.batch_size):
            initial_beam = CaptionData(sentence=[],
                                       memory=initial_memory[k],
                                       output=initial_output[k],
                                       score=1.0)
            partial_caption_data.append(TopN(config.beam_size))
            partial_caption_data[-1].push(initial_beam)
            complete_caption_data.append(TopN(config.beam_size))

        # Run beam search
        for idx in range(config.max_caption_length):
            partial_caption_data_lists = []
            for k in range(config.batch_size):
                data = partial_caption_data[k].extract()
                partial_caption_data_lists.append(data)
                partial_caption_data[k].reset()

            num_steps = 1 if idx == 0 else config.beam_size
            for b in range(num_steps):
                if idx == 0:
                    last_word = np.zeros((config.batch_size), np.int32)
                else:
                    last_word = np.array([
                        pcl[b].sentence[-1]
                        for pcl in partial_caption_data_lists
                    ], np.int32)

                last_memory = np.array(
                    [pcl[b].memory for pcl in partial_caption_data_lists],
                    np.float32)
                last_output = np.array(
                    [pcl[b].output for pcl in partial_caption_data_lists],
                    np.float32)

                memory, output, scores = sess.run(
                    [self.memory, self.output, self.probs],
                    feed_dict={
                        self.contexts: contexts,
                        self.last_word: last_word,
                        self.last_memory: last_memory,
                        self.last_output: last_output
                    })

                # Find the beam_size most probable next words
                for k in range(config.batch_size):
                    caption_data = partial_caption_data_lists[k][b]
                    words_and_scores = list(enumerate(scores[k]))
                    words_and_scores.sort(key=lambda x: -x[1])
                    words_and_scores = words_and_scores[0:config.beam_size + 1]

                    # Append each of these words to the current partial caption
                    for w, s in words_and_scores:
                        sentence = caption_data.sentence + [w]
                        score = caption_data.score * s
                        beam = CaptionData(sentence, memory[k], output[k],
                                           score)
                        if vocabulary.words[w] == '.':
                            complete_caption_data[k].push(beam)
                        else:
                            partial_caption_data[k].push(beam)

        results = []
        for k in range(config.batch_size):
            if complete_caption_data[k].size() == 0:
                complete_caption_data[k] = partial_caption_data[k]
            results.append(complete_caption_data[k].extract(sort=True))

        return results

    def save(self):
        """ Save the model. """
        config = self.config
        data = {v.name: v.eval() for v in tf.global_variables()}
        save_path = os.path.join(config.save_dir, str(self.global_step.eval()))

        print((" Saving the model to %s..." % (save_path + ".npy")))
        np.save(save_path, data)
        info_file = open(os.path.join(config.save_dir, "config.pickle"), "wb")
        config_ = copy.copy(config)
        config_.global_step = self.global_step.eval()
        pickle.dump(config_, info_file)
        info_file.close()
        print("Model saved.")

    def load(self, sess, model_file=None):
        """ Load the model. """
        config = self.config
        if model_file is not None:
            save_path = model_file
        else:
            info_path = os.path.join(config.save_dir, "config.pickle")
            info_file = open(info_path, "rb")
            config = pickle.load(info_file)
            global_step = config.global_step
            info_file.close()
            save_path = os.path.join(config.save_dir,
                                     str(global_step) + ".npy")

        print("Loading the model from %s..." % save_path)
        data_dict = np.load(save_path).item()
        count = 0
        for v in tqdm(tf.global_variables()):
            if v.name in data_dict.keys():
                sess.run(v.assign(data_dict[v.name]))
                count += 1
        print("%d tensors loaded." % count)

    def load_cnn(self, session, data_path, ignore_missing=True):
        """ Load a pretrained CNN model. """
        print("Loading the CNN from %s..." % data_path)
        data_dict = np.load(data_path).item()
        count = 0
        for op_name in tqdm(data_dict):
            with tf.variable_scope(op_name, reuse=True):
                for param_name, data in data_dict[op_name].iteritems():
                    try:
                        var = tf.get_variable(param_name)
                        session.run(var.assign(data))
                        count += 1
                    except ValueError:
                        pass
        print("%d tensors loaded." % count)

Exemple #3

0

Afficher le fichier

class BaseModel(object):
    def __init__(self, config):
        self.config = config
        self.is_train = False
        self.train_cnn = self.is_train and config.train_cnn
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        self.image_shape = [224, 224, 3]
        self.nn = NN(config)
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.build()

    def build(self):
        raise NotImplementedError()

    def beam_search(self, sess, image_files, vocabulary):
        """Use beam search to generate the captions for a batch of images."""
        # Feed in the images to get the contexts and the initial LSTM states
        config = self.config
        images = self.image_loader.load_images(image_files)
        contexts, initial_memory, initial_output = sess.run(
            [self.conv_feats, self.initial_memory, self.initial_output],
            feed_dict={self.images: images})

        partial_caption_data = []
        complete_caption_data = []
        for k in range(config.batch_size):
            initial_beam = CaptionData(sentence=[],
                                       memory=initial_memory[k],
                                       output=initial_output[k],
                                       score=1.0)
            partial_caption_data.append(TopN(config.beam_size))
            partial_caption_data[-1].push(initial_beam)
            complete_caption_data.append(TopN(config.beam_size))

        # Run beam search
        for idx in range(config.max_caption_length):
            partial_caption_data_lists = []
            for k in range(config.batch_size):
                data = partial_caption_data[k].extract()
                partial_caption_data_lists.append(data)
                partial_caption_data[k].reset()

            num_steps = 1 if idx == 0 else config.beam_size
            for b in range(num_steps):
                if idx == 0:
                    last_word = np.zeros((config.batch_size), np.int32)
                else:
                    last_word = np.array([
                        pcl[b].sentence[-1]
                        for pcl in partial_caption_data_lists
                    ], np.int32)

                last_memory = np.array(
                    [pcl[b].memory for pcl in partial_caption_data_lists],
                    np.float32)
                last_output = np.array(
                    [pcl[b].output for pcl in partial_caption_data_lists],
                    np.float32)

                memory, output, scores = sess.run(
                    [self.memory, self.output, self.probs],
                    feed_dict={
                        self.contexts: contexts,
                        self.last_word: last_word,
                        self.last_memory: last_memory,
                        self.last_output: last_output
                    })

                # Find the beam_size most probable next words
                for k in range(config.batch_size):
                    caption_data = partial_caption_data_lists[k][b]
                    words_and_scores = list(enumerate(scores[k]))
                    words_and_scores.sort(key=lambda x: -x[1])
                    words_and_scores = words_and_scores[0:config.beam_size + 1]

                    # Append each of these words to the current partial caption
                    for w, s in words_and_scores:
                        sentence = caption_data.sentence + [w]
                        score = caption_data.score * s
                        beam = CaptionData(sentence, memory[k], output[k],
                                           score)
                        if vocabulary.words[w] == '.':
                            complete_caption_data[k].push(beam)
                        else:
                            partial_caption_data[k].push(beam)

        results = []
        for k in range(config.batch_size):
            if complete_caption_data[k].size() == 0:
                complete_caption_data[k] = partial_caption_data[k]
            results.append(complete_caption_data[k].extract(sort=True))

        return results

    def load(self, sess, model_file=None):
        """ Load the model. """
        config = self.config
        if model_file is not None:
            save_path = model_file
        else:
            info_path = os.path.join(config.save_dir, "config.pickle")
            info_file = open(info_path, "rb")
            config = pickle.load(info_file)
            global_step = config.global_step
            info_file.close()
            save_path = os.path.join(config.save_dir,
                                     str(global_step) + ".npy")

        print("Loading the model from %s..." % save_path)
        data_dict = np.load(save_path, allow_pickle=True,
                            encoding="bytes").item()
        count = 0
        for v in tqdm(tf.compat.v1.global_variables()):
            if v.name in data_dict.keys():
                sess.run(v.assign(data_dict[v.name]))
                count += 1
        print("%d tensors loaded." % count)

Exemple #4

0

Afficher le fichier

class ATT_NIC(GraphLoader):
    def __init__(self, graph_path, vocab, max_caption_length):
        self.image_loader = ImageLoader('utils/ilsvrc_2012_mean.npy')
        self._vocab = vocab
        self._max_caption_length = max_caption_length
        self._graph = self.load_graph(graph_path)
        with self.open_session(self._graph) as self._sess:
            # inputs
            self._images = self._graph.get_tensor_by_name('images:0')
            self._contexts = self._graph.get_tensor_by_name('contexts:0')
            self._last_word = self._graph.get_tensor_by_name('last_word:0')
            self._last_memory = self._graph.get_tensor_by_name('last_memory:0')
            self._last_output = self._graph.get_tensor_by_name('last_output:0')
            # output
            self._conv_feats = self._graph.get_tensor_by_name('conv_feats:0')
            self._initial_memory = self._graph.get_tensor_by_name(
                'initial_memory:0')
            self._initial_output = self._graph.get_tensor_by_name(
                'initial_output:0')
            self._memory = self._graph.get_tensor_by_name('memory:0')
            self._output = self._graph.get_tensor_by_name('output:0')
            self._probs = self._graph.get_tensor_by_name('probs:0')
            self._alpha = self._graph.get_tensor_by_name('alpha:0')

    def get_sentence(self, result):
        word_idxs = result.sentence
        score = result.score
        score = score / (len(word_idxs) - 1)

        caption = self._vocab.get_sentence(word_idxs)
        results = {'caption': caption, 'score': score}
        return results

    def get_attention(self, result):

        return result.alphas

    def show_attention(self, caption, alphas, image_np, save_path):
        # alphas = result.alphas
        cap = caption['caption'].split()
        plt_w = 4
        plt_h = math.ceil((len(cap) + 1) / plt_w)
        im_width, im_height = image_np.shape[0:2]

        plt.figure(figsize=(10, 8))
        plt.subplot(plt_h, plt_w, 1)
        plt.imshow(image_np)
        plt.axis('off')
        # generate attention map for each word
        for idx in range(len(cap)):
            # assign weights for each region in the picture
            alpha_image = cv2.resize(alphas[idx].reshape(14, 14),
                                     (im_height, im_width))
            plt.subplot(plt_h, plt_w, idx + 2)
            lab = cap[idx]
            plt.text(0,
                     1,
                     lab,
                     backgroundcolor='white',
                     color='black',
                     fontsize=8)
            plt.imshow(image_np)
            plt.imshow(alpha_image, alpha=0.8)
            plt.set_cmap(cm.Greys_r)
            plt.axis('off')
            plt.subplots_adjust(left=0.02,
                                bottom=0.02,
                                right=0.98,
                                top=0.98,
                                hspace=0.05,
                                wspace=0.05)

        plt.savefig(save_path)

    def decode(self, filename):
        """Use beam search to generate the captions for a batch of images."""
        # Feed in the images to get the contexts and the initial LSTM states
        images = self.image_loader.load_images([filename])
        contexts, initial_memory, initial_output = self._sess.run(
            [self._conv_feats, self._initial_memory, self._initial_output],
            feed_dict={self._images: images})

        def _inference_step_fn(last_word, last_memory, last_output):
            return self._sess.run(
                [self._memory, self._output, self._probs, self._alpha],
                feed_dict={
                    self._contexts: contexts,
                    self._last_word: last_word,
                    self._last_memory: last_memory,
                    self._last_output: last_output
                })

        # generate caption for each picture
        bs = BeamSearch(3, self._max_caption_length, self._vocab.start_id,
                        self._vocab.end_id, 1)
        # Run beam search
        result = bs.search(_inference_step_fn, initial_memory, initial_output)
        caption = self.get_sentence(result[0])
        attention = self.get_attention(result[0])
        return caption, attention

Exemple #5

0

Afficher le fichier

class BaseModel(object):
    def __init__(self, config):
        self.config = config
        self.is_train = True if config.phase == 'train' else False
        self.train_cnn = self.is_train and config.train_cnn
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        self.image_shape = [224, 224, 3]
        self.nn = NN(config)
        self.classes = cfg.CLASSES
        self.num_class = 80  #len(self.classes)
        self.num_anchor = cfg.NUM_ANCHOR
        self.anchors = cfg.ANCHORS
        self.image_size = cfg.IMAGE_SIZE
        self.cell_size = cfg.CELL_SIZE
        self.boxes_per_cell = cfg.BOXES_PER_CELL
        #self.output_size = (self.cell_size * self.cell_size) *\
        #(self.num_class + self.boxes_per_cell * 5)
        #7*7*(20+10)
        self.output_size = (self.num_class + 5) * self.num_anchor
        self.scale = 1.0 * self.image_size / self.cell_size
        self.boundary1 = self.cell_size * self.cell_size * self.num_class
        #7*7*20
        self.use_pretrain = True
        self.boundary2 = self.boundary1 +\
            self.cell_size * self.cell_size * self.boxes_per_cell
        #7*7*20 + 7*7*2
        self.object_scale = cfg.OBJECT_SCALE
        self.noobject_scale = cfg.NOOBJECT_SCALE
        self.class_scale = cfg.CLASS_SCALE
        self.coord_scale = cfg.COORD_SCALE
        self.ckpt_file = './yolo2_data/yolo2_coco.ckpt'  #cfg.CKPT_FILE
        self.learning_rate = cfg.LEARNING_RATE
        self.batch_size = cfg.BATCH_SIZE
        self.alpha = cfg.ALPHA
        self.lamda = cfg.LAMDA
        self.offset = np.transpose(
            np.reshape(
                np.array([np.arange(self.cell_size)] * self.cell_size *
                         self.boxes_per_cell),
                (self.boxes_per_cell, self.cell_size, self.cell_size)),
            (1, 2, 0))
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.build()

    def build(self):
        raise NotImplementedError()

    def train(self, sess, train_data):
        """ Train the model using the COCO train2014 data. """
        print("Training the model...")
        config = self.config

        if not os.path.exists(config.summary_dir):
            os.mkdir(config.summary_dir)
        train_writer = tf.summary.FileWriter(config.summary_dir, sess.graph)

        for _ in tqdm(list(range(config.num_epochs)), desc='epoch'):
            for _ in tqdm(list(range(train_data.num_batches)), desc='batch'):
                batch = train_data.next_batch()
                image_files, sentences, masks = batch
                images = self.image_loader.load_images(image_files)
                feed_dict = {
                    self.images: images,
                    self.sentences: sentences,
                    self.masks: masks
                }
                _, summary, global_step = sess.run(
                    [self.opt_op, self.summary, self.global_step],
                    feed_dict=feed_dict)
                if (global_step + 1) % config.save_period == 0:
                    self.save()
                train_writer.add_summary(summary, global_step)
            train_data.reset()

        self.save()
        train_writer.close()
        print("Training complete.")

    def test(self, sess, images, vocabulary):
        """ Test the model using any given images. """
        print("Testing the model ...")
        config = self.config

        if not os.path.exists(config.test_result_dir):
            os.mkdir(config.test_result_dir)

        captions = []
        scores = []
        caption_data = self.beam_search(sess, images)
        word_idxs = caption_data[0][0].sentence
        score = caption_data[0][0].score
        caption = vocabulary.get_sentence(word_idxs)
        print(caption)
        return caption

    def beam_search(self, sess, image):
        """Use beam search to generate the captions for a batch of images."""
        # Feed in the images to get the contexts and the initial LSTM states
        config = self.config
        scale_shape = np.array([224, 224], np.int32)
        crop_shape = np.array([224, 224], np.int32)
        image = cv2.resize(image, (scale_shape[0], scale_shape[1]))
        offset = (scale_shape - crop_shape) / 2
        offset = offset.astype(np.int32)
        image = image[offset[0]:offset[0] + crop_shape[0],
                      offset[1]:offset[1] + crop_shape[1]]
        image = np.expand_dims(image, 0)
        contexts, initial_memory, initial_output = sess.run(
            [self.conv_feats, self.initial_memory, self.initial_output],
            feed_dict={self.images: image})

        partial_caption_data = []
        complete_caption_data = []
        for k in range(config.batch_size):
            initial_beam = CaptionData(sentence=[],
                                       memory=initial_memory[k],
                                       output=initial_output[k],
                                       score=1.0)
            partial_caption_data.append(TopN(config.beam_size))
            partial_caption_data[-1].push(initial_beam)
            complete_caption_data.append(TopN(config.beam_size))

        # Run beam search
        for idx in range(config.max_caption_length):
            partial_caption_data_lists = []
            for k in range(config.batch_size):
                data = partial_caption_data[k].extract()
                partial_caption_data_lists.append(data)
                partial_caption_data[k].reset()

            num_steps = 1 if idx == 0 else config.beam_size
            for b in range(num_steps):
                if idx == 0:
                    last_word = np.zeros((config.batch_size), np.int32)
                else:
                    last_word = np.array([
                        pcl[b].sentence[-1]
                        for pcl in partial_caption_data_lists
                    ], np.int32)

                last_memory = np.array(
                    [pcl[b].memory for pcl in partial_caption_data_lists],
                    np.float32)
                last_output = np.array(
                    [pcl[b].output for pcl in partial_caption_data_lists],
                    np.float32)

                memory, output, scores = sess.run(
                    [self.memory, self.output, self.probs],
                    feed_dict={
                        self.contexts: contexts,
                        self.last_word: last_word,
                        self.last_memory: last_memory,
                        self.last_output: last_output
                    })

                # Find the beam_size most probable next words
                for k in range(config.batch_size):
                    caption_data = partial_caption_data_lists[k][b]
                    words_and_scores = list(enumerate(scores[k]))
                    words_and_scores.sort(key=lambda x: -x[1])
                    words_and_scores = words_and_scores[0:config.beam_size + 1]

                    # Append each of these words to the current partial caption
                    for w, s in words_and_scores:
                        sentence = caption_data.sentence + [w]
                        score = caption_data.score * s
                        beam = CaptionData(sentence, memory[k], output[k],
                                           score)
                        partial_caption_data[k].push(beam)

        results = []
        for k in range(config.batch_size):
            if complete_caption_data[k].size() == 0:
                complete_caption_data[k] = partial_caption_data[k]
            results.append(complete_caption_data[k].extract(sort=True))

        return results

    def save(self):
        """ Save the model. """
        config = self.config
        data = {v.name: v.eval() for v in tf.global_variables()}
        save_path = os.path.join(config.save_dir, str(self.global_step.eval()))

        print((" Saving the model to %s..." % (save_path + ".npy")))
        np.save(save_path, data)
        info_file = open(os.path.join(config.save_dir, "config.pickle"), "wb")
        config_ = copy.copy(config)
        config_.global_step = self.global_step.eval()
        pickle.dump(config_, info_file)
        info_file.close()
        print("Model saved.")

    def load(self, sess, model_file=None):
        """ Load the model. """
        config = self.config
        if model_file is not None:
            save_path = model_file
        else:
            info_path = os.path.join(config.save_dir, "config.pickle")
            info_file = open(info_path, "rb")
            config = pickle.load(info_file)
            global_step = config.global_step
            info_file.close()
            save_path = os.path.join(config.save_dir,
                                     str(global_step) + ".npy")

        print("Loading the model from %s..." % save_path)
        data_dict = np.load(save_path).item()
        count = 0
        for v in tqdm(tf.global_variables()):
            if v.name in data_dict.keys():
                sess.run(v.assign(data_dict[v.name]))
                count += 1
        print("%d tensors loaded." % count)

    def load_cnn(self, session, data_path, ignore_missing=True):
        """ Load a pretrained CNN model. """
        print("Loading the CNN from %s..." % data_path)
        data_dict = np.load(data_path).item()
        count = 0
        for op_name in tqdm(data_dict):
            with tf.variable_scope(op_name, reuse=True):
                for param_name, data in data_dict[op_name].iteritems():
                    try:
                        var = tf.get_variable(param_name)
                        session.run(var.assign(data))
                        count += 1
                    except ValueError:
                        pass
        print("%d tensors loaded." % count)

Exemple #6

0

Afficher le fichier

class BaseModel(object):
    def __init__(self, config):
        self.config = config
        self.is_train = True if config.phase == 'train' else False
        self.train_cnn = self.is_train and config.train_cnn
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        self.image_shape = [224, 224, 3]
        self.nn = NN(config)
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.global_epoch = 0
        self.build()

    def build(self):
        raise NotImplementedError()

    def train(self, sess, train_data):
        """ Train the model using the COCO train2014 data. """
        print("Training the model...")
        config = self.config

        if not os.path.exists(config.summary_dir):
            os.mkdir(config.summary_dir)
        train_writer = tf.summary.FileWriter(config.summary_dir, sess.graph)

        Loss_table = []
        Loss_line = []
        for _ in tqdm(list(range(config.num_epochs)), desc='epoch'):
            for _ in tqdm(list(range(train_data.num_batches)), desc='batch'):
                batch = train_data.next_batch()
                image_files, sentences, masks = batch
                images = self.image_loader.load_images(image_files)
                feed_dict = {
                    self.images: images,
                    self.sentences: sentences,
                    self.masks: masks
                }
                Loss, summary, global_step = sess.run(
                    [self.opt_op, self.summary, self.global_step],
                    feed_dict=feed_dict)
                Loss_line.append(Loss)
                # if (global_step + 1) % config.save_period == 0:
                #     self.save()
                # train_writer.add_summary(summary, global_step)
                if (global_step + 1) % 10 == 0:
                    train_writer.add_summary(summary, global_step)
            Loss_table.append(Loss_line)
            Loss_line = []
            self.save()
            train_data.reset()

        file = open('ResultProcess/Loss/Loss.pickle', 'wb')
        pickle.dump(Loss_table, file)
        file.close()

        self.save()
        train_writer.close()
        print("Training complete.")

    def eval(self, sess, eval_gt_coco, eval_data, vocabulary):
        """ Evaluate the model using the COCO val2014 data. """
        print("Evaluating the model ...")
        config = self.config

        results = []
        if not os.path.exists(config.eval_result_dir):
            os.mkdir(config.eval_result_dir)

########################################################################################
# Generate the captions for the images
        idx = 0
        for k in tqdm(list(range(eval_data.num_batches)), desc='batch'):
            batch = eval_data.next_batch()
            caption_data = self.beam_search(sess, batch, vocabulary)

            fake_cnt = 0 if k<eval_data.num_batches-1 \
                         else eval_data.fake_count
            for l in range(eval_data.batch_size - fake_cnt):
                word_idxs = caption_data[l][0].sentence
                score = caption_data[l][0].score
                caption = vocabulary.get_sentence(word_idxs)
                results.append({
                    'image_id': int(eval_data.image_ids[idx]),
                    'caption': caption
                })
                idx += 1

                # Save the result in an image file, if requested
                if config.save_eval_result_as_image:
                    image_file = batch[l]
                    image_name = image_file.split(os.sep)[-1]
                    image_name = os.path.splitext(image_name)[0]
                    img = plt.imread(image_file)
                    plt.imshow(img)
                    plt.axis('off')
                    plt.title(caption)
                    plt.savefig(
                        os.path.join(
                            # config.eval_result_dir,
                            image_name + '_result.jpg'))
        fp = open(config.eval_result_file, 'w')
        json.dump(results, fp)
        #json_dict = json.dumps(results)
        #fp.write(json_dict)
        fp.close()
        #######################################################################################################
        print('json done')
        # Evaluate these captions
        eval_result_coco = eval_gt_coco.loadRes(config.eval_result_file)
        scorer = COCOEvalCap(eval_gt_coco, eval_result_coco)
        scorer.evaluate()
        print("Evaluation complete.")

    def test(self, sess, test_data, vocabulary):
        """ Test the model using any given images. """
        print("Testing the model ...")
        config = self.config

        if not os.path.exists(config.test_result_dir):
            os.mkdir(config.test_result_dir)

        captions = []
        scores = []

        # Generate the captions for the images
        for k in tqdm(list(range(test_data.num_batches)), desc='path'):
            batch = test_data.next_batch()
            caption_data = self.beam_search(sess, batch, vocabulary)

            fake_cnt = 0 if k<test_data.num_batches-1 \
                         else test_data.fake_count
            for l in range(test_data.batch_size - fake_cnt):
                word_idxs = caption_data[l][0].sentence
                score = caption_data[l][0].score
                caption = vocabulary.get_sentence(word_idxs)
                captions.append(caption)
                scores.append(score)

                # Save the result in an image file
                image_file = batch[l]
                image_name = image_file.split(os.sep)[-1]
                image_name = os.path.splitext(image_name)[0]
                img = plt.imread(image_file)
                plt.imshow(img)
                plt.axis('off')
                plt.title(caption)
                plt.savefig(
                    os.path.join(config.test_result_dir,
                                 image_name[-1] + '_result.jpg'))

        # Save the captions to a file
        results = pd.DataFrame({
            'image_files': test_data.image_files,
            'caption': captions,
            'prob': scores
        })
        results.to_csv(config.test_result_file)
        print("Testing complete.")

    def beam_search(self, sess, image_files, vocabulary):
        """Use beam search to generate the captions for a batch of images."""
        # Feed in the images to get the contexts and the initial LSTM states
        config = self.config
        images = self.image_loader.load_images(image_files)
        contexts, initial_memory, initial_output = sess.run(
            [self.conv_feats, self.initial_memory, self.initial_output],
            feed_dict={self.images: images})

        partial_caption_data = []
        complete_caption_data = []

        alpha_all = []
        last_output_all = []

        for k in range(config.batch_size):
            initial_beam = CaptionData(sentence=[],
                                       memory=initial_memory[k],
                                       output=initial_output[k],
                                       score=1.0)
            partial_caption_data.append(TopN(config.beam_size))
            partial_caption_data[-1].push(initial_beam)
            complete_caption_data.append(TopN(config.beam_size))

        # Run beam search
        for idx in range(config.max_caption_length):
            partial_caption_data_lists = []
            for k in range(config.batch_size):
                data = partial_caption_data[k].extract()
                partial_caption_data_lists.append(data)
                partial_caption_data[k].reset()

            num_steps = 1 if idx == 0 else config.beam_size
            for b in range(num_steps):
                if idx == 0:
                    last_word = np.zeros((config.batch_size), np.int32)
                else:
                    last_word = np.array([
                        pcl[b].sentence[-1]
                        for pcl in partial_caption_data_lists
                    ], np.int32)

                last_memory = np.array(
                    [pcl[b].memory for pcl in partial_caption_data_lists],
                    np.float32)
                last_output = np.array(
                    [pcl[b].output for pcl in partial_caption_data_lists],
                    np.float32)

                memory, output, scores = sess.run(
                    [self.memory, self.output, self.probs],
                    feed_dict={
                        self.contexts: contexts,
                        self.last_word: last_word,
                        self.last_memory: last_memory,
                        self.last_output: last_output
                    })

                # Find the beam_size most probable next words
                for k in range(config.batch_size):
                    caption_data = partial_caption_data_lists[k][b]
                    words_and_scores = list(enumerate(scores[k]))
                    words_and_scores.sort(key=lambda x: -x[1])
                    words_and_scores = words_and_scores[0:config.beam_size + 1]

                    # Append each of these words to the current partial caption
                    for w, s in words_and_scores:
                        sentence = caption_data.sentence + [w]
                        score = caption_data.score * s
                        beam = CaptionData(sentence, memory[k], output[k],
                                           score)
                        if vocabulary.words[w] == '.':
                            complete_caption_data[k].push(beam)
                        else:
                            partial_caption_data[k].push(beam)

            results = []

            alpha_status = sess.run(self.alpha,
                                    feed_dict={
                                        self.contexts: contexts,
                                        self.last_memory: last_memory,
                                        self.last_output: last_output
                                    })

            alpha_all.append(alpha_status)
            last_output_all.append(last_output)

        file = open('ResultProcess/alpha.pickle', 'wb')
        pickle.dump(alpha_all, file)
        file.close()
        # file = open('ResultProcess/last_output_all.pickle', 'wb')
        # pickle.dump(last_output_all, file)
        # file.close()

        for k in range(config.batch_size):
            if complete_caption_data[k].size() == 0:
                complete_caption_data[k] = partial_caption_data[k]
            results.append(complete_caption_data[k].extract(sort=True))

        return results

    def save(self):
        """ Save the model. """
        config = self.config
        data = {v.name: v.eval() for v in tf.global_variables()}
        save_path = os.path.join(config.save_dir, str(self.global_epoch))
        self.global_epoch += 1

        print((" Saving the model to %s..." % (save_path + ".npy")))
        np.save(save_path, data)
        info_file = open(os.path.join(config.save_dir, "config.pickle"), "wb")
        config_ = copy.copy(config)
        config_.global_step = self.global_step.eval()
        pickle.dump(config_, info_file)
        info_file.close()
        print("Model saved.")

    def load(self, sess, model_file=None):
        """ Load the model. """
        config = self.config
        if model_file is not None:
            save_path = model_file
        else:
            info_path = os.path.join(config.save_dir, "config.pickle")
            info_file = open(info_path, "rb")
            config = pickle.load(info_file)
            global_step = config.global_step
            info_file.close()
            save_path = os.path.join(config.save_dir,
                                     str(global_step) + ".npy")

        print("Loading the model from %s..." % save_path)
        data_dict = np.load(save_path).item()
        count = 0
        for v in tqdm(tf.global_variables()):
            if v.name in data_dict.keys():
                sess.run(v.assign(data_dict[v.name]))
                count += 1
        print("%d tensors loaded." % count)

    def load_cnn(self, session, data_path, ignore_missing=True):
        """ Load a pretrained CNN model. """
        print("Loading the CNN from %s..." % data_path)
        data_dict = np.load(data_path, encoding='latin1').item()
        del data_dict['fc6']
        del data_dict['fc7']
        del data_dict['fc8']

        count = 0
        for op_name in tqdm(data_dict):
            with tf.variable_scope(op_name, reuse=True):
                # for param_name, data in data_dict[op_name].iteritems():
                # for param_name, data in data_dict[op_name].items():
                # for param_name, data in data_dict[op_name]:
                #     try:
                #         var = tf.get_variable(param_name)
                #         session.run(var.assign(data))
                #         count += 1
                #     except ValueError:
                #         pass
                data = data_dict[op_name]
                try:
                    var_k = tf.get_variable('kernel')
                    session.run(var_k.assign(data[0]))
                    var_b = tf.get_variable('bias')
                    session.run(var_b.assign(data[1]))
                    count += 1
                except ValueError as e:
                    assert e
                    pass
        print("%d tensors loaded." % count)

    def load_cnn_ckpt(self, session, data_path):
        print("Loading the CNN from %s..." % data_path)
        reader = pywrap_tensorflow.NewCheckpointReader(data_path)
        var_to_shape_map = reader.get_variable_to_shape_map()
        del var_to_shape_map['global_step']
        del var_to_shape_map['resnet_v1_50/mean_rgb']
        del var_to_shape_map['resnet_v1_50/logits/weights']
        del var_to_shape_map['resnet_v1_50/logits/biases']
        count = 0
        for key in tqdm(var_to_shape_map):
            ckpt_tensor = reader.get_tensor(key)
            tensor_name = self.get_tensor_name(key.split('/')).split('/')
            with tf.variable_scope(tensor_name[0], reuse=True):
                var_ = tf.get_variable(tensor_name[1])
                session.run(var_.assign(ckpt_tensor))
                count += 1
        with tf.variable_scope('conv1', reuse=True):
            var_ = tf.get_variable('bias')
            session.run(var_.assign(np.zeros([64])))
            count += 1
        print("%d tensors loaded." % count)

    def get_tensor_name(self, keys):
        name = ''
        flag = False
        n = 0
        for item in ['block1', 'block2', 'block3', 'block4']:
            if item in keys:
                flag = True
            if flag:
                if 'BatchNorm' in keys:
                    name += 'bn' + str(n + 2)
                else:
                    name += 'res' + str(n + 2)
                break
            elif n == 3:
                if 'BatchNorm' in keys:
                    name += 'bn_conv1/'
                else:
                    name += 'conv1/'
            n += 1

        flag = False
        n = 0
        for item in [
                'unit_1', 'unit_2', 'unit_3', 'unit_4', 'unit_5', 'unit_6'
        ]:
            if item in keys:
                flag = True
            if flag:
                name += chr(97 + n) + '_'
                break
            n += 1

        flag = False
        n = 0
        for item in ['shortcut', 'conv1', 'conv2', 'conv3']:
            if item in keys:
                flag = True
            if flag and (name[-2:-7:-1] != '1vnoc'):
                if n == 0:
                    name += 'branch1'
                else:
                    name += 'branch2' + chr(96 + n)
                name += '/'
                break
            n += 1

        flag = False
        n = 0
        for item in [
                'weights', 'gamma', 'beta', 'moving_mean', 'moving_variance'
        ]:
            if item in keys:
                flag = True
            if flag:
                if n == 0:
                    name += 'kernel'
                else:
                    name += item
                break
            n += 1

        return name

Exemple #7

0

Afficher le fichier

class BaseModel(object):
    def __init__(self, config):
        self.config = config
        self.is_train = True if config.phase == 'train' else False
        self.train_cnn = self.is_train and config.train_cnn
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        self.image_shape = [224, 224, 3]
        self.nn = NN(config)
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.build()

    def build(self):
        raise NotImplementedError()

    def train(self, sess, train_data):
        """ Train the model using the COCO train2014 data. """
        print("Training the model...")
        config = self.config

        if not os.path.exists(config.summary_dir):
            os.mkdir(config.summary_dir)
        train_writer = tf.summary.FileWriter(config.summary_dir, sess.graph)

        for _ in tqdm(list(range(config.num_epochs)), desc='epoch'):
            for _ in tqdm(list(range(train_data.num_batches)), desc='batch'):
                batch = train_data.next_batch()
                image_files, sentences, masks = batch
                images = self.image_loader.load_images(image_files)
                feed_dict = {
                    self.images: images,
                    self.sentences: sentences,
                    self.masks: masks
                }
                # _, summary, global_step = sess.run([self.opt_op,
                #                                     self.summary,
                #                                     self.global_step],
                #                                     feed_dict=feed_dict)
                _, global_step = sess.run([self.opt_op, self.global_step],
                                          feed_dict=feed_dict)
                if (global_step + 1) % config.save_period == 0:
                    self.save()
                #train_writer.add_summary(summary, global_step)
            train_data.reset()

        self.save()
        train_writer.close()
        print("Training complete.")

    def eval(self, sess, eval_gt_coco, eval_data, vocabulary):
        """ Evaluate the model using the COCO val2014 data. """
        print("Evaluating the model ...")
        config = self.config

        results = []
        if not os.path.exists(config.eval_result_dir):
            os.mkdir(config.eval_result_dir)

        # Generate the captions for the images
        idx = 0
        for k in tqdm(list(range(eval_data.num_batches)), desc='batch'):
            #for k in range(1):
            batch = eval_data.next_batch()
            #caption_data = self.beam_search(sess, batch, vocabulary)
            images = self.image_loader.load_images(batch)
            caption_data, scores = sess.run([self.predictions, self.probs],
                                            feed_dict={self.images: images})
            fake_cnt = 0 if k<eval_data.num_batches-1 \
                         else eval_data.fake_count
            for l in range(eval_data.batch_size - fake_cnt):
                ## self.predictions will return the indexes of words, we need to find the corresponding word from it.
                word_idxs = caption_data[l]
                ## get_sentence will return a sentence till there is a end delimiter which is '.'
                caption = str(vocabulary.get_sentence(word_idxs))
                results.append({
                    'image_id': int(eval_data.image_ids[idx]),
                    'caption': caption
                })
                #print(results)
                idx += 1

                # Save the result in an image file, if requested
                if config.save_eval_result_as_image:
                    image_file = batch[l]
                    image_name = image_file.split(os.sep)[-1]
                    image_name = os.path.splitext(image_name)[0]
                    img = mpimg.imread(image_file)
                    plt.imshow(img)
                    plt.axis('off')
                    plt.title(caption)
                    plt.savefig(
                        os.path.join(config.eval_result_dir,
                                     image_name + '_result.jpg'))

        fp = open(config.eval_result_file, 'w')
        json.dump(results, fp)
        fp.close()

        # Evaluate these captions
        eval_result_coco = eval_gt_coco.loadRes(config.eval_result_file)
        scorer = COCOEvalCap(eval_gt_coco, eval_result_coco)
        scorer.evaluate()
        print("Evaluation complete.")

    def test(self, sess, test_data, vocabulary):
        """ Test the model using any given images. """
        print("Testing the model ...")
        config = self.config

        if not os.path.exists(config.test_result_dir):
            os.makedirs(config.test_result_dir)

        captions = []
        scores = []

        # Generate the captions for the images
        for k in tqdm(list(range(test_data.num_batches)), desc='path'):
            batch = test_data.next_batch()
            images = self.image_loader.load_images(batch)
            caption_data, scores_data = sess.run(
                [self.predictions, self.probs],
                feed_dict={self.images: images})

            fake_cnt = 0 if k<test_data.num_batches-1 \
                         else test_data.fake_count
            for l in range(test_data.batch_size - fake_cnt):
                ## self.predictions will return the indexes of words, we need to find the corresponding word from it.
                word_idxs = caption_data[l]
                ## get_sentence will return a sentence till there is a end delimiter which is '.'
                caption = vocabulary.get_sentence(word_idxs)
                print(caption)
                captions.append(caption)
                scores.append(scores_data[l])

                # Save the result in an image file
                image_file = batch[l]
                image_name = image_file.split("/")[-1]
                image_name = os.path.splitext(image_name)[0]
                img = mpimg.imread(image_file)
                plt.imshow(img)
                plt.axis('off')
                plt.title(caption)
                plt.savefig(
                    os.path.join(config.test_result_dir,
                                 image_name + '_result.jpg'))

        ##Save the captions to a file
        results = pd.DataFrame({
            'image_files': test_data.image_files,
            'caption': captions,
            'prob': scores
        })
        results.to_csv(config.test_result_file)
        print("Testing complete.")

    def save(self):
        """ Save the model. """
        config = self.config
        data = {v.name: v.eval() for v in tf.global_variables()}
        save_path = os.path.join(config.save_dir, str(self.global_step.eval()))

        print((" Saving the model to %s..." % (save_path + ".npy")))
        if not os.path.exists(os.path.dirname(save_path)):
            try:
                os.makedirs(os.path.dirname(save_path))
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise
        np.save(save_path, data)
        info_file = open(os.path.join(config.save_dir, "config.pickle"), "wb")
        config_ = copy.copy(config)
        config_.global_step = self.global_step.eval()
        pickle.dump(config_, info_file)
        info_file.close()
        print("Model saved.")

    def load(self, sess, model_file=None):
        """ Load the model. """
        config = self.config
        if model_file is not None:
            save_path = model_file
        else:
            info_path = os.path.join(config.save_dir, "config.pickle")
            info_file = open(info_path, "rb")
            config = pickle.load(info_file)
            global_step = config.global_step
            info_file.close()
            save_path = os.path.join(config.save_dir,
                                     str(global_step) + ".npy")

        print("Loading the model from %s..." % save_path)
        data_dict = np.load(save_path, allow_pickle=True).item()
        count = 0
        for v in tqdm(tf.global_variables()):
            if v.name in data_dict.keys():
                sess.run(v.assign(data_dict[v.name]))
                count += 1
        print("%d tensors loaded." % count)

    def load_cnn(self, session, data_path, ignore_missing=True):
        """ Load a pretrained CNN model. """
        print("All variables present...")
        for var in tf.all_variables():
            print(var)
        with tf.variable_scope('conv1_1', reuse=True):
            kernel = tf.get_variable('conv1_1_W')

        print("Loading the CNN from %s..." % data_path)
        data_dict = np.load(data_path, encoding='latin1')
        count = 0
        for param_name in tqdm(data_dict.keys()):
            op_name = param_name[:-2]
            print(param_name)
            #print(op_name)
            with tf.variable_scope(op_name, reuse=True):
                try:
                    var = tf.get_variable(param_name)
                    session.run(var.assign(data_dict[param_name]))
                    count += 1
                except ValueError:
                    print("No such variable")
                    pass

        print("%d tensors loaded." % count)

Exemple #8

0

Afficher le fichier

Fichier : base_model.py Projet : shubham1172/VQA

class BaseModel(object):
    def __init__(self, config):
        self.config = config
        self.is_train = True if config.phase == 'train' else False
        self.train_cnn = self.is_train and config.train_cnn
        self.image_loader = ImageLoader('./DeepRNN/utils/ilsvrc_2012_mean.npy')
        self.image_shape = [224, 224, 3]
        self.nn = NN(config)
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.build()

    def build(self):
        raise NotImplementedError()

    def test(self, sess, test_data, vocabulary):
        """ Test the model using any given images. """
        config = self.config

        # Generate the captions for the images
        for k in tqdm(list(range(test_data.num_batches)), desc='path'):
            batch = test_data.next_batch()
            caption_data = self.beam_search(sess, batch, vocabulary)

            fake_cnt = 0 if k<test_data.num_batches-1 \
                         else test_data.fake_count
            for l in range(test_data.batch_size - fake_cnt):
                word_idxs = caption_data[l][0].sentence
                score = caption_data[l][0].score
                caption = vocabulary.get_sentence(word_idxs)
                print('**' + caption + '**')

    def beam_search(self, sess, image_files, vocabulary):
        """Use beam search to generate the captions for a batch of images."""
        # Feed in the images to get the contexts and the initial LSTM states
        config = self.config
        images = self.image_loader.load_images(image_files)
        contexts, initial_memory, initial_output = sess.run(
            [self.conv_feats, self.initial_memory, self.initial_output],
            feed_dict={self.images: images})

        partial_caption_data = []
        complete_caption_data = []
        for k in range(config.batch_size):
            initial_beam = CaptionData(sentence=[],
                                       memory=initial_memory[k],
                                       output=initial_output[k],
                                       score=1.0)
            partial_caption_data.append(TopN(config.beam_size))
            partial_caption_data[-1].push(initial_beam)
            complete_caption_data.append(TopN(config.beam_size))

        # Run beam search
        for idx in range(config.max_caption_length):
            partial_caption_data_lists = []
            for k in range(config.batch_size):
                data = partial_caption_data[k].extract()
                partial_caption_data_lists.append(data)
                partial_caption_data[k].reset()

            num_steps = 1 if idx == 0 else config.beam_size
            for b in range(num_steps):
                if idx == 0:
                    last_word = np.zeros((config.batch_size), np.int32)
                else:
                    last_word = np.array([
                        pcl[b].sentence[-1]
                        for pcl in partial_caption_data_lists
                    ], np.int32)

                last_memory = np.array(
                    [pcl[b].memory for pcl in partial_caption_data_lists],
                    np.float32)
                last_output = np.array(
                    [pcl[b].output for pcl in partial_caption_data_lists],
                    np.float32)

                memory, output, scores = sess.run(
                    [self.memory, self.output, self.probs],
                    feed_dict={
                        self.contexts: contexts,
                        self.last_word: last_word,
                        self.last_memory: last_memory,
                        self.last_output: last_output
                    })

                # Find the beam_size most probable next words
                for k in range(config.batch_size):
                    caption_data = partial_caption_data_lists[k][b]
                    words_and_scores = list(enumerate(scores[k]))
                    words_and_scores.sort(key=lambda x: -x[1])
                    words_and_scores = words_and_scores[0:config.beam_size + 1]

                    # Append each of these words to the current partial caption
                    for w, s in words_and_scores:
                        sentence = caption_data.sentence + [w]
                        score = caption_data.score * s
                        beam = CaptionData(sentence, memory[k], output[k],
                                           score)
                        if vocabulary.words[w] == '.':
                            complete_caption_data[k].push(beam)
                        else:
                            partial_caption_data[k].push(beam)

        results = []
        for k in range(config.batch_size):
            if complete_caption_data[k].size() == 0:
                complete_caption_data[k] = partial_caption_data[k]
            results.append(complete_caption_data[k].extract(sort=True))

        return results

    def load(self, sess, model_file=None):
        """ Load the model. """
        config = self.config
        if model_file is not None:
            save_path = model_file
        else:
            info_path = os.path.join(config.save_dir, "config.pickle")
            info_file = open(info_path, "rb")
            config = pickle.load(info_file)
            global_step = config.global_step
            info_file.close()
            save_path = os.path.join(config.save_dir,
                                     str(global_step) + ".npy")

        data_dict = np.load(save_path).item()
        count = 0
        for v in tqdm(tf.global_variables()):
            if v.name in data_dict.keys():
                sess.run(v.assign(data_dict[v.name]))
                count += 1

    def load_cnn(self, session, data_path, ignore_missing=True):
        """ Load a pretrained CNN model. """
        data_dict = np.load(data_path).item()
        count = 0
        for op_name in tqdm(data_dict):
            with tf.variable_scope(op_name, reuse=True):
                for param_name, data in data_dict[op_name].iteritems():
                    try:
                        var = tf.get_variable(param_name)
                        session.run(var.assign(data))
                        count += 1
                    except ValueError:
                        pass

Exemple #9

0

Afficher le fichier

class BaseModel(object):
    def __init__(self, config):
        self.config = config
        self.is_train = True if config.phase == 'train' else False
        self.train_cnn = self.is_train and config.train_cnn
        self.image_loader = ImageLoader('./utils/ilsvrc_2012_mean.npy')
        self.image_shape = [224, 224, 3]
        self.nn = NN(config)
        self.global_step = tf.train.get_or_create_global_step()
        self.build()

    def build(self):
        raise NotImplementedError()

    def train(self, sess, train_data):
        """ Train the model using the COCO train2014 data. """
        print("Training the model...")
        config = self.config
        run_metadata = None
        run_options = None
        if config.save_timeline:
            run_metadata = tf.RunMetadata()
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)

        for _ in tqdm(list(range(config.num_epochs)), desc='epoch'):
            for _ in tqdm(list(range(train_data.num_batches)), desc='batch'):
                batch = train_data.next_batch()
                image_files, sentences, masks = batch
                images = self.image_loader.load_images(image_files)
                feed_dict = {
                    self.images: images,
                    self.sentences: sentences,
                    self.masks: masks
                }
                _, summary, global_step = sess.run(
                    [self.opt_op, self.summary, self.global_step],
                    feed_dict=feed_dict,
                    options=run_options,
                    run_metadata=run_metadata)
                if (global_step + 1) % config.save_period == 0:
                    self.save(sess._tf_sess())

                    if config.save_timeline:
                        tl = timeline.Timeline(run_metadata.step_stats)
                        ctf = tl.generate_chrome_trace_format()
                        with open(
                                os.path.join(config.summary_dir,
                                             'timeline-%d.json' % global_step),
                                'w') as wd:
                            wd.write(ctf)
            train_data.reset()
        self.save(sess._tf_sess())
        print("Training complete.")

    def eval(self, sess, eval_gt_coco, eval_data, vocabulary):
        """ Evaluate the model using the COCO val2014 data. """
        print("Evaluating the model ...")
        config = self.config

        results = []
        if not os.path.exists(config.eval_result_dir):
            os.mkdir(config.eval_result_dir)

        # Generate the captions for the images
        idx = 0
        for k in tqdm(list(range(eval_data.num_batches)), desc='batch'):
            batch = eval_data.next_batch()
            caption_data = self.beam_search(sess, batch, vocabulary)

            fake_cnt = 0 if k<eval_data.num_batches-1 \
                         else eval_data.fake_count
            for l in range(eval_data.batch_size - fake_cnt):
                word_idxs = caption_data[l][0].sentence
                score = caption_data[l][0].score
                caption = vocabulary.get_sentence(word_idxs)
                results.append({
                    'image_id': eval_data.image_ids[idx],
                    'caption': caption
                })
                idx += 1

                # Save the result in an image file, if requested
                if config.save_eval_result_as_image:
                    image_file = batch[l]
                    image_name = image_file.split(os.sep)[-1]
                    image_name = os.path.splitext(image_name)[0]
                    img = plt.imread(image_file)
                    plt.imshow(img)
                    plt.axis('off')
                    plt.title(caption)
                    plt.savefig(
                        os.path.join(config.eval_result_dir,
                                     image_name + '_result.jpg'))

        fp = open(config.eval_result_file, 'wb')
        json.dump(results, fp)
        fp.close()

        # Evaluate these captions
        eval_result_coco = eval_gt_coco.loadRes(config.eval_result_file)
        scorer = COCOEvalCap(eval_gt_coco, eval_result_coco)
        scorer.evaluate()
        print("Evaluation complete.")

    def test(self, sess, test_data, vocabulary):
        """ Test the model using any given images. """
        print("Testing the model ...")
        config = self.config

        if not os.path.exists(config.test_result_dir):
            os.mkdir(config.test_result_dir)

        captions = []
        scores = []

        # Generate the captions for the images
        for k in tqdm(list(range(test_data.num_batches)), desc='path'):
            batch = test_data.next_batch()
            caption_data = self.beam_search(sess, batch, vocabulary)

            fake_cnt = 0 if k<test_data.num_batches-1 \
                         else test_data.fake_count
            for l in range(test_data.batch_size - fake_cnt):
                word_idxs = caption_data[l][0].sentence
                score = caption_data[l][0].score
                caption = vocabulary.get_sentence(word_idxs)
                captions.append(caption)
                scores.append(score)

                # Save the result in an image file
                image_file = batch[l]
                image_name = image_file.split(os.sep)[-1]
                image_name = os.path.splitext(image_name)[0]
                img = plt.imread(image_file)
                plt.imshow(img)
                plt.axis('off')
                plt.title(caption)
                plt.savefig(
                    os.path.join(config.test_result_dir,
                                 image_name + '_result.jpg'))

        # Save the captions to a file
        results = pd.DataFrame({
            'image_files': test_data.image_files,
            'caption': captions,
            'prob': scores
        })
        results.to_csv(config.test_result_file)
        print("Testing complete.")

    def beam_search(self, sess, image_files, vocabulary):
        """Use beam search to generate the captions for a batch of images."""
        # Feed in the images to get the contexts and the initial LSTM states
        config = self.config
        images = self.image_loader.load_images(image_files)
        contexts, initial_memory, initial_output = sess.run(
            [self.conv_feats, self.initial_memory, self.initial_output],
            feed_dict={self.images: images})

        partial_caption_data = []
        complete_caption_data = []
        for k in range(config.batch_size):
            initial_beam = CaptionData(sentence=[],
                                       memory=initial_memory[k],
                                       output=initial_output[k],
                                       score=1.0)
            partial_caption_data.append(TopN(config.beam_size))
            partial_caption_data[-1].push(initial_beam)
            complete_caption_data.append(TopN(config.beam_size))

        # Run beam search
        for idx in range(config.max_caption_length):
            partial_caption_data_lists = []
            for k in range(config.batch_size):
                data = partial_caption_data[k].extract()
                partial_caption_data_lists.append(data)
                partial_caption_data[k].reset()

            num_steps = 1 if idx == 0 else config.beam_size
            for b in range(num_steps):
                if idx == 0:
                    last_word = np.zeros((config.batch_size), np.int32)
                else:
                    last_word = np.array([
                        pcl[b].sentence[-1]
                        for pcl in partial_caption_data_lists
                    ], np.int32)

                last_memory = np.array(
                    [pcl[b].memory for pcl in partial_caption_data_lists],
                    np.float32)
                last_output = np.array(
                    [pcl[b].output for pcl in partial_caption_data_lists],
                    np.float32)

                memory, output, scores = sess.run(
                    [self.memory, self.output, self.probs],
                    feed_dict={
                        self.contexts: contexts,
                        self.last_word: last_word,
                        self.last_memory: last_memory,
                        self.last_output: last_output
                    })

                # Find the beam_size most probable next words
                for k in range(config.batch_size):
                    caption_data = partial_caption_data_lists[k][b]
                    words_and_scores = list(enumerate(scores[k]))
                    words_and_scores.sort(key=lambda x: -x[1])
                    words_and_scores = words_and_scores[0:config.beam_size + 1]

                    # Append each of these words to the current partial caption
                    for w, s in words_and_scores:
                        sentence = caption_data.sentence + [w]
                        score = caption_data.score * s
                        beam = CaptionData(sentence, memory[k], output[k],
                                           score)
                        if vocabulary.words[w] == '.':
                            complete_caption_data[k].push(beam)
                        else:
                            partial_caption_data[k].push(beam)

        results = []
        for k in range(config.batch_size):
            if complete_caption_data[k].size() == 0:
                complete_caption_data[k] = partial_caption_data[k]
            results.append(complete_caption_data[k].extract(sort=True))

        return results

    def save(self, sess):
        """ Save the model. """
        config = self.config
        save_dir = config.save_dir
        if tf.gfile.Exists(save_dir):
            tf.gfile.DeleteRecursively(save_dir)
        print('target exporting save_dir: %s', save_dir)

        saved_model = tf.saved_model.builder.SavedModelBuilder(save_dir)
        sess.graph._unsafe_unfinalize()
        saved_model.add_meta_graph_and_variables(sess,
                                                 [tf.saved_model.SERVING],
                                                 clear_devices=True)
        saved_model.save()
        print("Model saved.")

    def load(self, sess, model_file=None):
        """ Load the model. """
        config = self.config
        saver = tf.train.Saver()
        if model_file is not None:
            save_path = model_file
        else:
            save_path = config.checkpoint_dir

        ckpt = tf.train.latest_checkpoint(save_path)
        saver.restore(sess, ckpt)

        print("The model from %s loaded" % save_path)

    def load_cnn(self, session, data_path, ignore_missing=True):
        """ Load a pretrained CNN model. """
        print("Loading the CNN from %s..." % data_path)
        data_dict = np.load(data_path).item()
        count = 0
        for op_name in tqdm(data_dict):
            with tf.variable_scope(op_name, reuse=True):
                for param_name, data in data_dict[op_name].iteritems():
                    try:
                        var = tf.get_variable(param_name)
                        session.run(var.assign(data))
                        count += 1
                    except ValueError:
                        pass
        print("%d tensors loaded." % count)