コード例 #1
0
    def eval_on_dataset(self, ckpt_path, train_dataset):
        x = tf.placeholder(shape=[None, self.input_dims[0], self.input_dims[1], 3], dtype=tf.float32, name='input')
        label = tf.placeholder(shape=[None, self.num_classes], dtype=tf.float32, name='label')
        
        _, output = self.model(x, False)
            
        top_1_correct = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(output, axis=-1), tf.argmax(label, axis=-1)), tf.float32))
        top_5_correct = tf.reduce_sum(tf.cast(tf.nn.in_top_k(output, tf.argmax(label, axis=-1), 5), tf.float32))
        
        ### TF Records reading ###
        if train_dataset:
            data_initializer, img_data, label_data = get_batch(config.train_tfrecord_list, config.batch_size, augment=False, is_validation_set=False)
            steps_per_epoch = int(np.ceil(config.train_img_cnt / config.batch_size))
            total_imgs = config.train_img_cnt
        else:
            data_initializer, img_data, label_data = get_batch(config.test_tfrecord_list, config.batch_size, augment=False, is_validation_set=False)
            steps_per_epoch = int(np.ceil(config.test_img_cnt / config.batch_size))
            total_imgs = config.test_img_cnt

        restorer = tf.train.Saver()                    
        
        ### Restrict the GPU usage for training if possible. ###
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4
        
        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            restorer.restore(sess, ckpt_path)
            top_1_acc = 0
            top_5_acc = 0

            sess.run(data_initializer.initializer)

            for _ in tqdm(range(steps_per_epoch)):
                img_ip, label_ip = sess.run([img_data, label_data])
                t_1, t_5 = sess.run([top_1_correct, top_5_correct], feed_dict={x: img_ip, label: label_ip})
                top_1_acc += t_1
                top_5_acc += t_5

            top_1_acc = top_1_acc * 100 / total_imgs
            top_5_acc = top_5_acc * 100 / total_imgs

            print("Top-1 Accuracy on validation set: ", top_1_acc)
            print("Top-5 Accuracy on validation set: ", top_5_acc)
コード例 #2
0
    def dry_run_clr(self):
        time = str(datetime.datetime.now())
        time = time.replace(":", "_").replace(" ", "_").replace("-", "_").replace(".", "_")
        path = config.summary_path + "/" + self.model_name
        os.makedirs(path, exist_ok=True)
        summaries_path = config.summary_path + "/" + self.model_name + "/" + time + "_dry_run_clr"
        os.makedirs(summaries_path, exist_ok=True)
        
        current_files = glob.glob("*")
        for i in range(len(current_files)):
            if os.path.isfile(current_files[i]):
                shutil.copy2(current_files[i], summaries_path)
        

        model_files = glob.glob("./models/*")
        os.makedirs(summaries_path + "/models", exist_ok=True)
        for model_file in model_files:
            if self.model_name in model_file.lower():
                shutil.copy2(model_file, summaries_path + "/models/")
        
        x = tf.placeholder(shape=[None, self.input_dims[0], self.input_dims[1], 3], dtype=tf.float32, name='input')
        label = tf.placeholder(shape=[None, self.num_classes], dtype=tf.float32, name='label')
        is_training = tf.placeholder(dtype=tf.bool, name='is_training')
        
        logits, output = self.model(x, is_training)
        
        total_loss, _, _, _, _, _, _ = self.get_loss_and_accuracy(label, logits, output)
        
        global_step = tf.Variable(initial_value=0, dtype=tf.int32, trainable=False, name='global_step')
        lr = tf.train.exponential_decay(config.start_lr, global_step, config.decay_steps, config.decay_rate)
        
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            opt = tf.train.AdamOptimizer(lr, beta1=0.9).minimize(total_loss, global_step)

        train_initializer, img_train_data, label_train_data = get_batch(config.train_tfrecord_list, config.batch_size, augment=True)

        ### Restrict the GPU usage for training if possible. ###
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4
        
        with tf.Session(config=sess_config) as sess:
            sess.run(tf.global_variables_initializer())
            writer_train = tf.summary.FileWriter(summaries_path)
            #writer_train.add_graph(sess.graph)
            for i in range(config.total_steps):                
                try:
                    imgs, labels = sess.run([img_train_data, label_train_data])
                    _, l = sess.run([opt, total_loss], feed_dict={x: imgs, label: labels, is_training: True})
                    
                    self.custom_summary(writer_train, tf.train.global_step(sess, global_step), None, l, None, None, None, None, None, sess.run(lr))                    
                    
                    print("Step: {}, Loss: {:.2f}".format(tf.train.global_step(sess, global_step), l))

                except:
                    sess.run(train_initializer.initializer)
            print("Iteration Completed.")
コード例 #3
0
    def train(self,
              resume=False,
              resume_from_eps=0,
              resume_from_gstep=0,
              restore_ckpt=None):
        ### Preprocessing stuff ###
        time = str(datetime.datetime.now())
        time = time.replace(":",
                            "_").replace(" ",
                                         "_").replace("-",
                                                      "_").replace(".", "_")
        path = config.summary_path + "/" + self.model_name
        os.makedirs(path, exist_ok=True)
        summaries_path = config.summary_path + "/" + self.model_name + "/" + time + "_training_summary"
        os.makedirs(summaries_path, exist_ok=True)
        ckpt_path = summaries_path + "/best_checkpoint/"
        os.makedirs(ckpt_path, exist_ok=True)

        current_files = glob.glob("*")
        for i in range(len(current_files)):
            if os.path.isfile(current_files[i]):
                shutil.copy2(current_files[i], summaries_path)

        model_files = glob.glob("./models/*")
        os.makedirs(summaries_path + "/models", exist_ok=True)
        for model_file in model_files:
            if self.model_name in model_file.lower():
                shutil.copy2(model_file, summaries_path + "/models/")

        ### Placeholder definitions ###
        x = tf.placeholder(
            shape=[None, self.input_dims[0], self.input_dims[1], 3],
            dtype=tf.float32,
            name='input')
        label = tf.placeholder(shape=[None, self.num_classes],
                               dtype=tf.float32,
                               name='label')
        is_training = tf.placeholder(dtype=tf.bool, name='is_training')
        lr = tf.placeholder(dtype=tf.float32, name='learning_rate')

        ### Model definitions ###
        logits, output = self.model(x, is_training)

        ### Loss and accuracy definitions ###
        total_loss, loss_cls, loss_reg, top_1_accuracy, top_5_accuracy, top_1_correct, top_5_correct = self.get_loss_and_accuracy(
            label, logits, output)

        ### Optimizer definitions ###
        global_step = tf.Variable(initial_value=resume_from_gstep,
                                  dtype=tf.int32,
                                  trainable=False,
                                  name='global_step')

        with tf.control_dependencies(tf.get_collection(
                tf.GraphKeys.UPDATE_OPS)):
            opt = tf.train.AdamOptimizer(lr, beta1=0.9).minimize(
                total_loss, global_step)

        ### TF Records reading ###
        train_initializer, img_train_data, label_train_data = get_batch(
            config.train_tfrecord_list, config.batch_size, augment=True)
        img_val_data, label_val_data = get_batch(config.test_tfrecord_list,
                                                 config.batch_size,
                                                 augment=False,
                                                 is_validation_set=True)
        test_initializer, img_test_data, label_test_data = get_batch(
            config.test_tfrecord_list, config.batch_size, augment=False)

        ### For Learning rate scheduling ###
        steps_per_epoch = int(np.ceil(config.train_img_cnt /
                                      config.batch_size))
        self.step_size = self.step_factor * steps_per_epoch

        ### Checkpoint Saving mechanism ###
        saver = tf.train.Saver()
        restorer = tf.train.Saver()
        # restore all variables

        ### Best accuracy is set to 0.0 before starting training. ###
        best_acc = 0

        ### Restrict the GPU usage for training if possible. ###
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.4

        ### Start training. ###
        with tf.Session(config=sess_config) as sess:
            if resume:
                restorer.restore(sess, restore_ckpt)
            else:
                sess.run(tf.global_variables_initializer())
            writer_train = tf.summary.FileWriter(summaries_path + "/train")
            writer_test = tf.summary.FileWriter(summaries_path + "/test")
            writer_valid = tf.summary.FileWriter(summaries_path + "/valid")
            writer_train.add_graph(sess.graph)

            for eps in range(resume_from_eps, config.epochs):
                file = open(summaries_path + "/log.txt", 'a')
                sess.run(train_initializer.initializer)

                for batch in range(steps_per_epoch):
                    if (tf.train.global_step(sess, global_step) +
                            1) % (2 * self.step_size) == 0:
                        if self.maxLR > self.minLR:
                            multiplier = (tf.train.global_step(
                                sess, global_step) + 1) // (2 * self.step_size)
                            self.maxLR = (self.maxLR) * (0.99**multiplier)
                        else:
                            self.maxLR = self.minLR

                    imgs_tr, label_tr = sess.run(
                        [img_train_data, label_train_data])
                    _, l, l_cls, l_reg, b_acc_t_1, b_acc_t_5 = sess.run(
                        [
                            opt, total_loss, loss_cls, loss_reg,
                            top_1_accuracy, top_5_accuracy
                        ],
                        feed_dict={
                            x:
                            imgs_tr,
                            label:
                            label_tr,
                            is_training:
                            True,
                            lr:
                            self.get_clr(
                                tf.train.global_step(sess, global_step))
                        })

                    self.custom_summary(
                        writer_train,
                        tf.train.global_step(sess, global_step),
                        None,
                        l,
                        l_cls,
                        l_reg,
                        b_acc_t_1,
                        b_acc_t_5,
                        image=None,
                        lr=self.get_clr(tf.train.global_step(
                            sess, global_step)))

                    print(
                        "Epoch: {}/{}, Batch No.: {}/{}, Total Loss: {:.2f}, Loss Cls: {:.2f}, Loss Reg: {:.2f}, Top-1 Accuracy: {:.2f}, Top-5 Accuracy: {:.2f}"
                        .format(eps + 1, config.epochs, batch + 1,
                                steps_per_epoch, l, l_cls, l_reg, b_acc_t_1,
                                b_acc_t_5))
                    file.write(
                        "Epoch: {}/{}, Batch No.: {}/{}, Total Loss: {:.2f}, Loss Cls: {:.2f}, Loss Reg: {:.2f}, Top-1 Accuracy: {:.2f}, Top-5 Accuracy: {:.2f}\n"
                        .format(eps + 1, config.epochs, batch + 1,
                                steps_per_epoch, l, l_cls, l_reg, b_acc_t_1,
                                b_acc_t_5))

                    if (batch + 1) % 100 == 0:
                        # get loss on 10 validation batches
                        val_l_total = []
                        val_l_cls_total = []
                        val_l_reg_total = []
                        val_acc_t_1 = []
                        val_acc_t_5 = []
                        for _ in range(10):
                            imgs_val, label_val = sess.run(
                                [img_val_data, label_val_data])
                            val_l, val_l_cls, val_l_reg, v_acc_t_1, v_acc_t_5 = sess.run(
                                [
                                    total_loss, loss_cls, loss_reg,
                                    top_1_accuracy, top_5_accuracy
                                ],
                                feed_dict={
                                    x: imgs_val,
                                    label: label_val,
                                    is_training: False
                                })
                            val_l_total.append(val_l)
                            val_l_cls_total.append(val_l_cls)
                            val_l_reg_total.append(val_l_reg)
                            val_acc_t_1.append(v_acc_t_1)
                            val_acc_t_5.append(v_acc_t_5)

                        self.custom_summary(
                            writer_valid,
                            tf.train.global_step(sess, global_step), None,
                            np.mean(val_l_total), np.mean(val_l_cls_total),
                            np.mean(val_l_reg_total), np.mean(val_acc_t_1),
                            np.mean(val_acc_t_5), None, None)
                        print(
                            "Epoch: {}/{} completed, Last Train Loss: {:.2f}, Valid Loss: {:.2f}, Accuracy: {:.2f}"
                            .format(eps + 1, config.epochs, l,
                                    np.mean(val_l_total), np.mean(val_acc_t_1),
                                    np.mean(val_acc_t_5)))
                        file.write(
                            "Epoch: {}/{} completed, Last Train Loss: {:.2f}, Valid Loss: {:.2f}, Accuracy: {:.2f}\n"
                            .format(eps + 1, config.epochs, l,
                                    np.mean(val_l_total), np.mean(val_acc_t_1),
                                    np.mean(val_acc_t_5)))

                        for ctr in range(
                                8
                        ):  ### Total 4 images will be displayed as batch_size is 4
                            idx = np.random.choice(imgs_tr.shape[0], 1)[0]
                            single_img = np.uint8(imgs_tr[idx] * 255)
                            self.custom_summary(
                                writer_train,
                                tf.train.global_step(sess, global_step),
                                'image_train_' + str(ctr), None, None, None,
                                None, None, single_img, None)

                test_l_total = []
                test_l_cls_total = []
                test_l_reg_total = []
                test_acc_t_1 = 0
                test_acc_t_5 = 0
                sess.run(test_initializer.initializer)
                for _ in tqdm(
                        range(
                            int(
                                np.ceil(config.test_img_cnt /
                                        config.batch_size)))):
                    imgs_test, label_test = sess.run(
                        [img_test_data, label_test_data])
                    test_l, test_l_cls, test_l_reg, t_cnt_t_1, t_cnt_t_5 = sess.run(
                        [
                            total_loss, loss_cls, loss_reg, top_1_correct,
                            top_5_correct
                        ],
                        feed_dict={
                            x: imgs_test,
                            label: label_test,
                            is_training: False
                        })
                    test_l_total.append(test_l)
                    test_l_cls_total.append(test_l_cls)
                    test_l_reg_total.append(test_l_reg)
                    test_acc_t_1 += t_cnt_t_1
                    test_acc_t_5 += t_cnt_t_5

                test_acc_t_1 = (test_acc_t_1 * 100 / config.test_img_cnt)
                test_acc_t_5 = (test_acc_t_5 * 100 / config.test_img_cnt)

                self.custom_summary(writer_test,
                                    tf.train.global_step(sess,
                                                         global_step), None,
                                    np.mean(test_l_total),
                                    np.mean(test_l_cls_total),
                                    np.mean(test_l_reg_total),
                                    np.mean(test_acc_t_1),
                                    np.mean(test_acc_t_5), None, None)
                print(
                    "Epoch: {}/{}, Test Loss: {:.2f}, Top-1 Accuaracy: {:.2f}, Top-5 Accuracy: {:.2f}"
                    .format(eps + 1, config.epochs, np.mean(test_l_total),
                            np.mean(test_acc_t_1), np.mean(test_acc_t_5)))
                file.write(
                    "Epoch: {}/{}, Test Loss: {:.2f}, Top-1 Accuaracy: {:.2f}, Top-5 Accuracy: {:.2f}\n"
                    .format(eps + 1, config.epochs, np.mean(test_l_total),
                            np.mean(test_acc_t_1), np.mean(test_acc_t_5)))

                if np.mean(test_acc_t_1) > best_acc:
                    save_path = ckpt_path + self.model_name + "_eps{}-test_loss_{:.2f}-test_top_1_acc_{:.2f}.ckpt".format(
                        eps + 1, np.mean(test_l_total), np.mean(test_acc_t_1))
                    saver.save(sess, save_path)
                    print("Best Checkpoint saved.")
                    file.write("Best Checkpoint saved.\n")
                    best_acc = np.mean(test_acc_t_1)
                print("Epoch {} completed.".format(eps + 1))
                file.write("Epoch {} completed.\n".format(eps + 1))
                file.close()
            file = open(summaries_path + "/log.txt", 'a')
            print("Training Completed.")
            file.write("Training Completed.\n")
            file.close()