コード例 #1
0
    def evaluate(self, queue, dataset, global_step=None, generate_results=False, tag=''):
        log.info("Evaluate Phase")
        self.sess.run(self.eval_reset)

        iter_length = int(len(dataset) / self.model.batch_size + 1)
        for i in range(iter_length):
            loss, pred, logit, sim, input_gate = self.eval_single_step(queue)
            if i%50 == 0:
                log.infov("{}/{}".format(i, iter_length))
        total_acc = self.sess.run([self.accuracy])
        log.infov("[MC] total accuracy: {} ".format(total_acc) + tag)
コード例 #2
0
    def evaluate(self,
                 queue,
                 dataset,
                 global_step=None,
                 generate_results=False,
                 tag=''):
        log.info("Evaluate Phase")
        batch_size = self.model.batch_size
        dataset_length = len(dataset)
        iter_num = int(dataset_length / batch_size)
        results = []
        iter_length = int(dataset_length / self.model.batch_size + 1)
        scores = []
        margin_mat = np.zeros([dataset_length, dataset_length])

        print('Iteration number  ', iter_num)

        for i in range(iter_num):
            for j in range(iter_num):
                loss, logit, output_score = self.eval_single_step(queue)
                margin_mat[i * batch_size:(i + 1) * batch_size,
                           j * batch_size:(j + 1) * batch_size] = output_score
                if i % 5 == 0 and j % 5 == 0:
                    ii = int(i / 5)
                    jj = int(j / 5)
            if i % 10 == 0:
                log.infov("{}/{}, margin-loss: {}".format(
                    i, iter_length, loss))
        acc = np.mean(np.diagonal(margin_mat))
        rank_list = []
        for i in range(dataset_length):
            col = -margin_mat[i, :]
            order = col.argsort()
            ranks = order.argsort()
            rank_list.append(ranks[i])
        c = [x for x in rank_list if x < 1]
        c5 = [x for x in rank_list if x < 5]
        c10 = [x for x in rank_list if x < 10]
        medr = np.median(rank_list)
        log.infov("[RET] R@1: {}, R@5: {}, R@10: {}, medr: {medr:.4f}".format(
            len(c), len(c5), len(c10), medr=medr))
        log.infov("[RET] total accuracy: {acc:.5f}".format(acc=np.sum(acc)))

        if generate_results:
            drive_dir = '/content/drive/My Drive/Graduation Project/Output/checkpoint_lsmdc_InceptionResNet_squeeze_dropout_1024/'
            with open(drive_dir + 'evaluate_log.tsv', 'a') as f:
                f.write(
                    '[RET] Step [{}]\t, R@1: {}\t, R@5: {}\t, R@10: {}\t, medr: {}\t, acc: {}\n'
                    .format(global_step, len(c), len(c5), len(c10), medr,
                            np.sum(acc)))
コード例 #3
0
    def read_feat_from_hdf5(self):
        if self.image_feature_net.lower() == 'resnet':
            if self.layer.lower() == 'pool5':
                if self.wav_data:
                    feature_file = os.path.join(VIDEO_FEATURE_DIR, self.image_feature_net.upper()
                                            + "_" + self.layer.lower() + "wav.hdf5")
                else:
                    feature_file = os.path.join(VIDEO_FEATURE_DIR, self.image_feature_net.upper()
                                            + "_" + self.layer.lower() + ".hdf5")

                assert_exists(feature_file)

        log.info("Load %s hdf5 file : %s", self.image_feature_net.upper(), feature_file)

        if self.more_data:
            self.msr_h5 = h5py.File(os.path.join(VIDEO_FEATURE_DIR,"MSR_RESNET_pool5.hdf5"))

        return h5py.File(feature_file, 'r')
コード例 #4
0
    def evaluate(self, queue, dataset,generate_results,tag):
        log.info("Evaluate Phase")
        iter_length = int(len(dataset) / self.model.batch_size + 1)
        self.sess.run(self.eval_reset)

        total_input_gate = []
        for i in range(iter_length):
            loss, predictions, target_indices, logits, ids, input_gate = self.eval_single_step(queue)
            for j,key in enumerate(ids):
                total_input_gate.append((str(dataset.idx2word[predictions[j]]), input_gate[0][j],input_gate[1][j],input_gate[2][j],input_gate[3][j]))

            if i%100 == 0:
                target_word = dataset.idx2word[target_indices[0]]
                output_word = dataset.idx2word[predictions[0]]
                log.infov("[FIB {step:3d}/{total_length:3d}] target: {target}, prediction: {prediction}".format(
                    step=i, total_length=iter_length, target=target_word, prediction=output_word))
        total_acc = self.sess.run(self.accuracy)
        log.infov("[FIB] total accurycy: {acc:.5f}".format(acc=total_acc))
コード例 #5
0
    def evaluate(self,
                 queue,
                 dataset,
                 global_step=None,
                 generate_results=False,
                 tag=''):
        log.info("Evaluate Phase")
        batch_size = self.model.batch_size
        iter_num = int(1000 / batch_size)

        results = []
        iter_length = int(len(dataset) / self.model.batch_size + 1)
        scores = []
        margin_mat = np.zeros([1000, 1000])
        for i in range(iter_num):
            for j in range(iter_num):
                loss, logit, output_score = self.eval_single_step(queue)
                margin_mat[i * batch_size:(i + 1) * batch_size,
                           j * batch_size:(j + 1) * batch_size] = output_score
                if i % 5 == 0 and j % 5 == 0:
                    ii = int(i / 5)
                    jj = int(j / 5)
            if i % 10 == 0:
                log.infov("{}/{}, margin-loss: {}".format(
                    i, iter_length, loss))
        acc = np.mean(np.diagonal(margin_mat))
        rank_list = []
        for i in range(1000):
            col = -margin_mat[i, :]
            order = col.argsort()
            ranks = order.argsort()
            rank_list.append(ranks[i])
        c = [x for x in rank_list if x < 1]
        c5 = [x for x in rank_list if x < 5]
        c10 = [x for x in rank_list if x < 10]
        medr = np.median(rank_list)
        log.infov("[RET] R@1: {}, R@5: {}, R@10: {}, medr : {medr:.4f}".format(
            len(c), len(c5), len(c10), medr=medr))
        log.infov("[RET] total accuracy: {acc:.5f}".format(acc=np.sum(acc)))

        if generate_results:
            with open('./checkpoint/evaluate_log.tsv', 'a') as f:
                f.write('[{}]\t{}\t{}\t{}\t{}\t{}\n'.format(
                    global_step, len(c), len(c5), len(c10), medr, np.sum(acc)))
コード例 #6
0
    def test(self, queue, dataset):
        log.info("Testing Phase")
        batch_size = self.model.batch_size
        print('batch size = ', batch_size)

        dataset_length = len(dataset)
        iter_num = int(dataset_length/batch_size)
        results = []
        iter_length = int(dataset_length / self.model.batch_size + 1)
        margin_mat = np.zeros([dataset_length, dataset_length])

        print('Testing on {} videos'.format(dataset_length))
        for i in range(iter_num):
            for j in range(iter_num):
                loss, logit, output_score  = self.test_single_step(queue)
                margin_mat[i*batch_size:(i+1)*batch_size, j*batch_size:(j+1)*batch_size] = output_score  
        
        print('Scores = ', margin_mat[:,0])
        scores = margin_mat[:,0]
        ranks = scores.argsort()
        print('Ranks = ', ranks);
        print('Finished Testing on {} videos'.format(dataset_length))
        return ranks
コード例 #7
0
    def build_word_vocabulary(self):
        word_matrix_path = os.path.join(VOCABULARY_DIR, 'common_word_matrix.hkl')
        assert_exists(word_matrix_path)
        word2idx_path = os.path.join(VOCABULARY_DIR, 'common_word_to_index.hkl')
        assert_exists(word2idx_path)
        idx2word_path = os.path.join(VOCABULARY_DIR, 'common_index_to_word.hkl')
        assert_exists(idx2word_path)

        with open(word_matrix_path, 'r') as f:
            self.word_matrix = hkl.load(f)
        log.info("Load word_matrix from hkl file : %s", word_matrix_path)

        with open(word2idx_path, 'r') as f:
            self.word2idx = hkl.load(f)
        log.info("Load word2idx from hkl file : %s", word2idx_path)

        with open(idx2word_path, 'r') as f:
            self.idx2word = hkl.load(f)
        log.info("Load idx2word from hkl file : %s", idx2word_path)
コード例 #8
0
def main(argv):
    model_config = ModelConfig()
    train_config = TrainConfig()

    drive_dir = '/content/drive/My Drive/Graduation Project/Output/checkpoint_lsmdc_InceptionResNet_squeeze_dropout/'
    base_dir = os.path.join(drive_dir,
                            train_config.train_tag + "_" + FLAGS.tag)
    checkpoint_dir = os.path.join(base_dir, "model.ckpt")
    logits_dir = os.path.join(base_dir, "logits_")
    if not os.path.exists(base_dir):
        os.mkdir(base_dir)
    #
    train_dataset = DatasetLSMDC(
        dataset_name='train',
        image_feature_net=model_config.image_feature_net,
        layer=model_config.layer,
        max_length=model_config.caption_length,
        max_vid_length=model_config.video_steps,
        max_n_videos=None,
        data_type=train_config.train_tag,
        attr_length=model_config.attr_length,
        wav_data=model_config.wav_data)
    validation_dataset = DatasetLSMDC(
        dataset_name='validation',
        image_feature_net=model_config.image_feature_net,
        layer=model_config.layer,
        max_length=model_config.caption_length,
        max_vid_length=model_config.video_steps,
        max_n_videos=None,
        data_type=train_config.train_tag,
        attr_length=model_config.attr_length,
        wav_data=model_config.wav_data)
    train_dataset.build_word_vocabulary()
    validation_dataset.share_word_vocabulary_from(train_dataset)
    if train_config.train_tag == 'RET':
        model_config.batch_size = model_config.ret_batch_size
    train_iter = train_dataset.batch_iter(train_config.num_epochs,
                                          model_config.batch_size)
    train_queue = BatchQueue(train_iter, name='train')
    if train_config.train_tag == 'RET':
        val_queue = BatchQueue(validation_dataset.batch_tile(
            20 * train_config.num_epochs, model_config.batch_size),
                               name='validation')
    else:
        val_iter = validation_dataset.batch_iter(20 * train_config.num_epochs,
                                                 model_config.batch_size,
                                                 shuffle=False)
        val_queue = BatchQueue(val_iter, name='validation')
    train_queue.start_threads()
    val_queue.start_threads()

    g = tf.Graph()
    with g.as_default():
        global session, model, trainer
        tf_config = tf.ConfigProto()
        tf_config.gpu_options.allow_growth = True
        session = tf.Session(graph=g, config=tf_config)

        model = MODELS[train_config.train_tag](model_config,
                                               train_dataset.word_matrix)

        log.info("Build the model...")
        model.build_model(**model.get_placeholder())
        trainer = MODEL_TRAINERS[train_config.train_tag](train_config, model,
                                                         session)

        steps_in_epoch = int(np.ceil(len(train_dataset) / model.batch_size))

        saver = tf.train.Saver(max_to_keep=10)

        if train_config.load_from_ckpt is not None:
            log.info("Restoring parameter from {}".format(
                train_config.load_from_ckpt))
            session.run(tf.global_variables_initializer())
            saver.restore(session, train_config.load_from_ckpt)
        else:
            session.run(tf.global_variables_initializer())
        for step in range(train_config.max_steps):
            skip = False
            if step <= train_config.last_step_taken:
                skip = True

            if step % 1000 == 0:
                print("Before Run single step = ", step)
            step_result = trainer.run_single_step(skip,
                                                  queue=train_queue,
                                                  is_train=True)
            if step % 1000 == 0:
                print("After Run single step = ", step)

            if not skip:
                if step % train_config.steps_per_logging == 0:
                    step_result['steps_in_epoch'] = steps_in_epoch
                    trainer.log_step_message(**step_result)

                if step % train_config.steps_per_evaluate == 0 or train_config.print_evaluate:
                    trainer.evaluate(queue=val_queue,
                                     dataset=validation_dataset,
                                     global_step=step,
                                     generate_results=True,
                                     tag=FLAGS.tag)
                    if step % (2 * train_config.steps_per_evaluate) == 0:
                        print("SAVE MODEL" + FLAGS.tag)
                        saver.save(session, checkpoint_dir, global_step=step)

                if step == train_config.max_steps - 1:
                    trainer.evaluate(queue=val_queue,
                                     dataset=validation_dataset,
                                     global_step=step,
                                     generate_results=True,
                                     tag=FLAGS.tag)
                    print("SAVE MODEL" + FLAGS.tag)
                    saver.save(session, checkpoint_dir, global_step=step)
                    print("Finished Epoch")
コード例 #9
0
 def start_threads(self):
     log.info("Start {} dataset Queue".format(self.name))
     self.thread = threading.Thread(target=self.thread_main, args=())
     self.thread.daemon = True
     self.thread.start()