def main(cfg):

    # cfg.num_classes = 1001
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu
    output_dir = cfg.output_dir

    os_utils.touch_dir(output_dir)

    args_file = os.path.join(cfg.output_dir, 'args.json')
    with open(args_file, 'w') as f:
        json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True)

    log_file = os.path.join(cfg.output_dir, cfg.log_filename + '.txt')

    logger = log_utils.create_logger(log_file)

    img_name_ext = cfg.img_name
    img_name, _ = os.path.splitext(img_name_ext)
    datasets_dir = './input_imgs'
    test_img = imageio.imread('{}/{}'.format(datasets_dir, img_name_ext))
    test_img = cv2.resize(test_img, (const.frame_height, const.frame_height))
    with tf.Graph().as_default():

        images_ph = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, const.frame_height,
                                                    const.frame_height,
                                                    const.num_channels),
                                             name='input_img')
        lbls_ph = tf.compat.v1.placeholder(tf.int32,
                                           shape=(None, cfg.num_classes),
                                           name='class_lbls')
        logits_ph = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, cfg.num_classes),
                                             name='logits_lbls')
        per_class_logits_ph = tf.compat.v1.placeholder(tf.float32,
                                                       shape=(None,
                                                              cfg.num_classes),
                                                       name='logits_lbls')
        input_ph = nn_utils.adjust_color_space(images_ph, cfg.preprocess_func)
        network_class = locate(cfg.network_name)
        model = network_class(cfg, images_ph=input_ph, lbls_ph=lbls_ph)

        pre_atten_feat_map_tf = tf.compat.v1.get_default_graph(
        ).get_tensor_by_name(cfg.replicate_net_at)
        pre_atten_feat_map_tf_shape = pre_atten_feat_map_tf.shape
        sub_feat_map_ph = tf.compat.v1.placeholder(
            tf.float32,
            shape=[
                None, pre_atten_feat_map_tf_shape[1],
                pre_atten_feat_map_tf_shape[2], pre_atten_feat_map_tf_shape[3]
            ],
            name='feat_map_input')
        sub_network_class = locate(cfg.sub_network_name)
        sub_model = sub_network_class(cfg,
                                      images_ph=sub_feat_map_ph,
                                      lbls_ph=lbls_ph)
        sub_logits = sub_model.val_logits

        logits = model.val_logits

        sess = tf.compat.v1.InteractiveSession()

        atten_filter_position = cfg.atten_filter_position

        tf_atten_var = [
            v for v in tf.compat.v1.global_variables()
            if atten_filter_position.format('atten') in v.name
        ][-1]
        ## Didn't make a difference for tf_atten_var becuase tf_atten_var is created using get_varibale, i.e., shared
        tf_gate_atten_var = [
            v for v in tf.compat.v1.global_variables()
            if atten_filter_position.format('gate') in v.name
        ][-1]
        # print(tf_gate_atten_var)
        # optimizer = tf.train.AdamOptimizer(0.01)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        logger.info('Learning rate {} {}'.format(cfg.learning_rate,
                                                 cfg.max_iters))
        learning_rate = tf_utils.poly_lr(global_step, cfg)
        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate)

        class_specific = True if cfg.caf_variant == 'cls_specific' else False
        if class_specific:
            logger.info(
                'Solving class specific optimization problem -- classification network'
            )

            mult_logits_2 = per_class_logits_ph * sub_logits
            loss_sub = tf.reduce_sum(mult_logits_2)
            grads = optimizer.compute_gradients(loss_sub,
                                                var_list=[tf_atten_var])
            train_op = optimizer.apply_gradients(grads,
                                                 global_step=global_step)
        else:
            raise NotImplementedError('cls_oblivious version implemented yet')

        # train_op = optimizer.minimize(loss, var_list=[tf_atten_var])
        tf.compat.v1.global_variables_initializer().run()
        ckpt_file = tf.train.latest_checkpoint(output_dir)
        logger.info('Model Path {}'.format(ckpt_file))
        saver = tf.compat.v1.train.Saver(
        )  # saves variables learned during training
        load_model_msg = model.load_model(output_dir,
                                          ckpt_file,
                                          sess,
                                          saver,
                                          load_logits=True)
        logger.info(load_model_msg)

        class_predictions, ground_logits = sess.run(
            [model.val_class_prediction, logits],
            feed_dict={images_ph: np.expand_dims(test_img, 0)})

        class_predictions = class_predictions[0]
        # print('Class Prediction {}'.format(imagenet_lbls[class_predictions]))

        k = 1
        top_k = np.argsort(np.squeeze(ground_logits))[::-1][:k]
        # top_k = [235,282,94,1,225]
        logger.info('Top K={} {}'.format(k, [imagenet_lbls[i] for i in top_k]))

        filter_type = cfg.filter_type
        if filter_type == 'gauss':
            rand_initilzalier = np.random.normal(
                0, 1, (tf_atten_var.shape[0], tf_atten_var.shape[1]))
        else:
            rand_initilzalier = np.random.normal(
                0, 1, (tf_atten_var.shape[0], tf_atten_var.shape[1], 1))

        close_gate = tf.compat.v1.assign(tf_gate_atten_var, False)
        open_gate = tf.compat.v1.assign(tf_gate_atten_var, True)
        random_init = tf.compat.v1.assign(tf_atten_var, rand_initilzalier)
        lr_reset = tf.compat.v1.assign(global_step, 0)
        MAX_INT = np.iinfo(np.int16).max
        # output_dir = cfg.output_dir
        for top_i in top_k:
            # top_i  = 207  # To control which top_i to work on directly
            sess.run([open_gate, random_init, lr_reset])
            # sess.run(open_gate)

            iteration = 0
            prev_loss = MAX_INT
            event_gif_images = []
            per_class_maximization = np.ones((1, cfg.num_classes))
            per_class_maximization[0, top_i] = -1

            while iteration < cfg.max_iters:

                if iteration == 0:
                    sess.run([close_gate])
                    _pre_atten_feat_map_tf, _atten_var = sess.run(
                        [pre_atten_feat_map_tf, tf_atten_var],
                        feed_dict={
                            # sub_feat_map_ph: _pre_atten_feat_map_tf,
                            images_ph: np.expand_dims(test_img, 0),
                            per_class_logits_ph: per_class_maximization
                        })
                    sess.run([open_gate])
                _atten_var, _sub_logits, _loss, _ = sess.run(
                    [tf_atten_var, sub_logits, loss_sub, train_op],
                    feed_dict={
                        sub_feat_map_ph: _pre_atten_feat_map_tf,
                        # images_ph:np.expand_dims(img_crops[crop_idx,:,:,:],0),
                        per_class_logits_ph: per_class_maximization
                    })

                if iteration % 50 == 0:
                    logger.info('Iter {0:2d}: {1:.5f} Top {2:3d} {3}'.format(
                        iteration, _loss, top_i, imagenet_lbls[top_i]))
                    # print(np.round(np.reshape(_atten_var,(7,7)),2))
                    if cfg.save_gif:
                        frame_mask = normalize_filter(filter_type, _atten_var,
                                                      tf_atten_var.shape[0],
                                                      tf_atten_var.shape[1])
                        if class_specific:
                            #
                            # heatmap_utils.save_heatmap(frame_mask,save=output_dir + img_name +'_msk_cls_{}_{}.png'.format(top_i,filter_type))
                            plt = heatmap_utils.apply_heatmap(
                                test_img / 255.0,
                                frame_mask,
                                alpha=0.7,
                                save=output_dir + img_name +
                                '_cls_{}_{}.png'.format(top_i, filter_type),
                                axis='off',
                                cmap='bwr')
                        else:
                            plt = heatmap_utils.apply_heatmap(
                                test_img / 255.0,
                                frame_mask,
                                alpha=0.7,
                                save=output_dir + img_name +
                                '_{}.png'.format(filter_type),
                                axis='off',
                                cmap='bwr')

                        fig = plt.gcf()
                        data = np.fromstring(fig.canvas.tostring_rgb(),
                                             dtype=np.uint8,
                                             sep='')
                        w, h = fig.canvas.get_width_height()
                        data_img = data.reshape((h, w, 3))
                        event_gif_images.append(data_img)
                        # imageio.imwrite(dump_dir + '{}_test.jpg'.format(iteration),data_img)
                        plt.close()

                    if np.abs(_loss - prev_loss) < 10e-5:
                        break

                    prev_loss = _loss

                iteration += 1

            frame_mask = normalize_filter(filter_type, _atten_var,
                                          tf_atten_var.shape[0],
                                          tf_atten_var.shape[1])
            if class_specific:
                # imageio.imwrite(output_dir + img_name + '_msk_cls_{}_{}.png'.format(top_i, filter_type), frame_mask)
                heatmap_utils.apply_heatmap(
                    test_img / 255.0,
                    frame_mask,
                    alpha=0.6,
                    save=output_dir + img_name +
                    '_cls_{}_{}.png'.format(top_i, filter_type),
                    axis='off',
                    cmap='bwr')
            else:
                heatmap_utils.apply_heatmap(test_img / 255.0,
                                            frame_mask,
                                            alpha=0.6,
                                            save=output_dir + img_name +
                                            '_{}.png'.format(filter_type),
                                            axis='off',
                                            cmap='bwr')
            if cfg.save_gif:
                if class_specific:
                    imageio.mimsave(
                        output_dir + img_name + '_cls_{}_{}.gif'.format(
                            top_i,
                            atten_filter_position[:-2].format('').replace(
                                '/', '')),
                        event_gif_images,
                        duration=1.0)
                else:
                    imageio.mimsave(
                        output_dir + img_name + '_cls_{}_{}.gif'.format(
                            filter_type,
                            atten_filter_position[:-2].format('').replace(
                                '/', '')),
                        event_gif_images,
                        duration=1.0)
def main(argv):

    cfg = BaseConfig().parse(argv)
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu

    img_generator_class = locate(cfg.db_tuple_loader)
    args = dict()
    args['db_path'] = cfg.db_path
    args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size
    args['preprocess_func'] = cfg.preprocess_func
    args['batch_size'] = cfg.batch_size
    args['shuffle'] = False
    args['img_size'] = const.max_frame_size
    args['gen_hot_vector'] = True
    args['csv_file'] = cfg.train_csv_file
    train_iter = img_generator_class(args)

    args['csv_file'] = cfg.test_csv_file
    val_iter = img_generator_class(args)

    train_imgs, train_lbls = train_iter.imgs_and_lbls()
    val_imgs, val_lbls = val_iter.imgs_and_lbls()

    # Where to save the trained model
    save_model_dir = cfg.checkpoint_dir
    model_basename = os.path.basename(save_model_dir)
    touch_dir(save_model_dir)


    ## Log experiment
    args_file = os.path.join(cfg.checkpoint_dir, 'args.json')
    with open(args_file, 'w') as f:
        json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True)
    # os_utils.touch_dir(save_model_dir)

    log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '.txt')
    os_utils.touch_dir(cfg.checkpoint_dir)

    logger = log_utils.create_logger(log_file)


    with tf.Graph().as_default():

        # Create train and val dataset following tensorflow Data API
        ## A dataset element has an image and lable
        train_dataset = TensorflowTupleLoader(train_imgs, train_lbls,cfg, is_training=True).dataset
        val_dataset = TensorflowTupleLoader(val_imgs, val_lbls,cfg, is_training=False, batch_size=cfg.batch_size,
                                       repeat=False).dataset

        handle = tf.placeholder(tf.string, shape=[])

        iterator = tf.data.Iterator.from_string_handle(
            handle, train_dataset.output_types, train_dataset.output_shapes)
        images_ph, lbls_ph = iterator.get_next()

        training_iterator = train_dataset.make_one_shot_iterator()
        validation_iterator = val_dataset.make_initializable_iterator()

        ## Load a pretrained network {resnet_v2 or densenet161} based on config.network_name configuration
        network_class = locate(cfg.network_name)
        model = network_class(cfg, is_training=True, images_ph=images_ph, lbls_ph=lbls_ph)


        trainable_vars = tf.trainable_variables()
        if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
            ## Creation of a list of variables with the same shape as the trainable ones
            # initialized with 0s
            accum_vars = [tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in trainable_vars]
            zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):

            global_step = tf.Variable(0, name='global_step', trainable=False)
            learning_rate = tf_utils.poly_lr(global_step,cfg)
            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)

            if cfg.caffe_iter_size > 1:  ## Accumulated Gradient

                grads = optimizer.compute_gradients(model.train_loss, trainable_vars)
                # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order)
                accum_ops = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(grads)]
                iter_size = cfg.caffe_iter_size
                # Define the training step (part with variable value update)
                train_op = optimizer.apply_gradients([(accum_vars[i] / iter_size, gv[1]) for i, gv in enumerate(grads)],
                                                     global_step=global_step)

            else: # If accumulated gradient disabled, do regular training

                grads = optimizer.compute_gradients(model.train_loss)
                train_op = optimizer.apply_gradients(grads, global_step=global_step)

        # logger.info('=========================================================')
        # for v in tf.trainable_variables():
        #     mprint('trainable_variables:  {0} \t {1}'.format(str(v.name),str(v.shape)))


        sess = tf.InteractiveSession()
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()

        training_handle = sess.run(training_iterator.string_handle())
        validation_handle = sess.run(validation_iterator.string_handle())


        # now = datetime.now()
        # if (config.tensorbaord_file == None):
        #     tb_path = config.tensorbaord_dir + now.strftime("%Y%m%d-%H%M%S")
        # else:
        #     tb_path = config.tensorbaord_dir + config.tensorbaord_file

        start_iter = 1 # No Resume in this code version

        # train_writer = tf.summary.FileWriter(tb_path, sess.graph)

        saver = tf.train.Saver()  # saves variables learned during training

        ckpt_file = os.path.join(save_model_dir, cfg.checkpoint_filename)
        print('Model Path ', ckpt_file)



        load_model_msg = model.load_model(save_model_dir, ckpt_file, sess, saver, is_finetuning=True)
        logger.info(load_model_msg)


        val_loss = tf.summary.scalar('Val_Loss', model.val_loss)
        val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy)
        model_acc_op = tf.summary.scalar('Split_Val_Accuracy', model.val_accumulated_accuracy)

        logger.info('Start Training ***********')
        best_acc = 0
        best_model_step = 0
        for current_iter in range(start_iter, cfg.train_iters+1):
            start_time_train = time.time()
            feed_dict = {handle: training_handle}

            ## Here is where training and backpropagation start

            # In case accumulated gradient enabled, i.e. config.caffe_iter_size > 1
            for mini_batch in range(cfg.caffe_iter_size - 1):
                sess.run(accum_ops, feed_dict)


            model_loss_value, accuracy_value, _ = sess.run([model.train_loss, model.train_accuracy, train_op],
                                                           feed_dict)

            # In case accumulated gradient enabled, reset shadow variables
            if cfg.caffe_iter_size > 1:
                sess.run(zero_ops)

            ## Here is where training and backpropagation end

            train_time = time.time() - start_time_train


            if (current_iter % cfg.logging_threshold == 0 or current_iter ==1):
                logger.info(
                    'i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'.format(current_iter, model_loss_value, accuracy_value,
                                                                                train_time))

                if (current_iter % cfg.test_interval == 0):
                    # run_metadata = tf.RunMetadata()

                    tf.local_variables_initializer().run()
                    sess.run(validation_iterator.initializer)

                    while True:
                        try:
                            feed_dict = {handle: validation_handle}
                            val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat = sess.run(
                                [val_loss, model.val_accuracy, model_acc_op, val_acc_op, model.val_accumulated_accuracy,
                                 model.val_confusion_mat], feed_dict)
                        except tf.errors.OutOfRangeError:
                            logger.info('Val Acc {0}'.format(_val_acc))
                            break



                    # train_writer.add_run_metadata(run_metadata, 'step%03d' % current_iter)
                    # train_writer.add_summary(val_loss_op, current_iter)
                    # train_writer.add_summary(_val_acc_op, current_iter)
                    # train_writer.add_summary(accuracy_op, current_iter)
                    #
                    # train_writer.flush()


                    if (current_iter % cfg.logging_threshold == 0):
                        saver.save(sess, ckpt_file)
                        if best_acc < _val_acc:
                            saver.save(sess, ckpt_file + 'best')
                            best_acc = _val_acc
                            best_model_step = current_iter
                        ## Early dropping style.
                        logger.info('Best Acc {0} at {1} == {2}'.format(best_acc, best_model_step, model_basename))

        saver.save(sess, ckpt_file)  ## Save final ckpt before closing
        sess.close()
Example #3
0
def main(argv):
    cfg = BaseConfig().parse(argv)
    os.environ["CUDA_VISIBLE_DEVICES"] = cfg.gpu
    save_model_dir = cfg.checkpoint_dir
    model_basename = os.path.basename(save_model_dir)
    touch_dir(save_model_dir)

    args_file = os.path.join(cfg.checkpoint_dir, 'args.json')
    with open(args_file, 'w') as f:
        json.dump(vars(cfg), f, ensure_ascii=False, indent=2, sort_keys=True)
    # os_utils.touch_dir(save_model_dir)

    log_file = os.path.join(cfg.checkpoint_dir, cfg.log_filename + '.txt')
    os_utils.touch_dir(cfg.checkpoint_dir)
    logger = log_utils.create_logger(log_file)

    img_generator_class = locate(cfg.db_tuple_loader)
    args = dict()
    args['db_path'] = cfg.db_path
    args['tuple_loader_queue_size'] = cfg.tuple_loader_queue_size
    args['preprocess_func'] = cfg.preprocess_func
    args['batch_size'] = cfg.batch_size
    args['shuffle'] = False
    args['csv_file'] = cfg.train_csv_file
    args['img_size'] = const.max_frame_size
    args['gen_hot_vector'] = True
    train_iter = img_generator_class(args)
    args['batch_size'] = cfg.batch_size
    args['csv_file'] = cfg.test_csv_file
    val_iter = img_generator_class(args)

    trn_images, trn_lbls = train_iter.imgs_and_lbls()
    val_imgs, val_lbls = val_iter.imgs_and_lbls()

    with tf.Graph().as_default():
        if cfg.train_mode == 'semi_hard' or cfg.train_mode == 'hard' or cfg.train_mode == 'cntr':
            train_dataset = TripletTupleLoader(trn_images, trn_lbls,
                                               cfg).dataset
        elif cfg.train_mode == 'vanilla':
            train_dataset = QuickTupleLoader(trn_images,
                                             trn_lbls,
                                             cfg,
                                             is_training=True,
                                             shuffle=True,
                                             repeat=True).dataset
        else:
            raise NotImplementedError('{} is not a valid train mode'.format(
                cfg.train_mode))

        val_dataset = QuickTupleLoader(val_imgs,
                                       val_lbls,
                                       cfg,
                                       is_training=False,
                                       repeat=False).dataset
        handle = tf.placeholder(tf.string, shape=[])
        iterator = tf.data.Iterator.from_string_handle(
            handle, train_dataset.output_types, train_dataset.output_shapes)
        images_ph, lbls_ph = iterator.get_next()

        network_class = locate(cfg.network_name)
        model = network_class(cfg, images_ph=images_ph, lbls_ph=lbls_ph)

        # Which loss fn to impose. For example, softmax only is applied in vanilla mode,
        # while softmax + semi-hard triplet is applied in semi_hard mode.
        if cfg.train_mode == 'semi_hard':
            pre_logits = model.train_pre_logits
            _, w, h, channels = pre_logits.shape
            embed_dim = cfg.emb_dim
            embedding_net = ConvEmbed(emb_dim=embed_dim,
                                      n_input=channels,
                                      n_h=h,
                                      n_w=w)
            embedding = embedding_net.forward(pre_logits)
            embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10)
            margin = cfg.margin
            gt_lbls = tf.argmax(model.gt_lbls, 1)
            metric_loss = triplet_semi.triplet_semihard_loss(
                gt_lbls, embedding, margin)
            logger.info('Triplet loss lambda {}, with margin {}'.format(
                cfg.triplet_loss_lambda, margin))
            total_loss = model.train_loss + cfg.triplet_loss_lambda * tf.reduce_mean(
                metric_loss)
        elif cfg.train_mode == 'hard':
            pre_logits = model.train_pre_logits
            _, w, h, channels = pre_logits.shape
            embed_dim = cfg.emb_dim
            embedding_net = ConvEmbed(emb_dim=embed_dim,
                                      n_input=channels,
                                      n_h=h,
                                      n_w=w)
            embedding = embedding_net.forward(pre_logits)
            embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10)
            margin = cfg.margin

            logger.info('Triplet loss lambda {}, with margin {}'.format(
                cfg.triplet_loss_lambda, margin))
            gt_lbls = tf.argmax(model.gt_lbls, 1)
            metric_loss = triplet_hard.batch_hard(gt_lbls, embedding, margin)
            total_loss = model.train_loss + cfg.triplet_loss_lambda * tf.reduce_mean(
                metric_loss)
        elif cfg.train_mode == 'cntr':

            pre_logits = model.train_pre_logits
            _, w, h, channels = pre_logits.shape
            embed_dim = cfg.emb_dim
            embedding_net = ConvEmbed(emb_dim=embed_dim,
                                      n_input=channels,
                                      n_h=h,
                                      n_w=w)
            embedding = embedding_net.forward(pre_logits)
            embedding = tf.nn.l2_normalize(embedding, axis=-1, epsilon=1e-10)
            CENTER_LOSS_LAMBDA = 0.003
            CENTER_LOSS_ALPHA = 0.5
            num_fg_classes = cfg.num_classes
            gt_lbls = tf.argmax(model.gt_lbls, 1)
            center_loss_order, centroids, centers_update_op, appear_times, diff = center_loss.get_center_loss(
                embedding, gt_lbls, CENTER_LOSS_ALPHA, num_fg_classes)
            # sample_centroid = tf.reshape(tf.gather(centroids, gt_lbls), [-1, config.emb_dim])
            # center_loss_order = center_loss.center_loss(sample_centroid , embedding)
            logger.info('Center loss lambda {}'.format(CENTER_LOSS_LAMBDA))
            total_loss = model.train_loss + CENTER_LOSS_LAMBDA * tf.reduce_mean(
                center_loss_order)

        elif cfg.train_mode == 'vanilla':
            total_loss = model.train_loss

        logger.info('Train Mode {}'.format(cfg.train_mode))
        # variables_to_train = model.var_2_train();
        # logger.info('variables_to_train  ' + str(variables_to_train))

        trainable_vars = tf.trainable_variables()
        if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
            ## Creation of a list of variables with the same shape as the trainable ones
            # initialized with 0s
            accum_vars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in trainable_vars
            ]
            zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if cfg.train_mode == const.Train_Mode.CNTR:
            update_ops.append(centers_update_op)

        # print(update_ops)

        with tf.control_dependencies(update_ops):

            global_step = tf.Variable(0, name='global_step', trainable=False)
            learning_rate = tf_utils.poly_lr(global_step, cfg)
            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)

            if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
                # grads = tf.Print(grads,[grads],'Grad Print');
                grads = optimizer.compute_gradients(total_loss, trainable_vars)
                # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order)
                accum_ops = [
                    accum_vars[i].assign_add(gv[0])
                    for i, gv in enumerate(grads)
                ]
                iter_size = cfg.caffe_iter_size
                # Define the training step (part with variable value update)
                train_op = optimizer.apply_gradients(
                    [(accum_vars[i] / iter_size, gv[1])
                     for i, gv in enumerate(grads)],
                    global_step=global_step)

            else:
                grads = optimizer.compute_gradients(total_loss)
                train_op = optimizer.apply_gradients(grads,
                                                     global_step=global_step)

        sess = tf.InteractiveSession()
        training_iterator = train_dataset.make_one_shot_iterator()
        validation_iterator = val_dataset.make_initializable_iterator()
        training_handle = sess.run(training_iterator.string_handle())
        validation_handle = sess.run(validation_iterator.string_handle())

        tb_path = save_model_dir
        logger.info(tb_path)
        start_iter = tb_utils.get_latest_iteration(tb_path)

        train_writer = tf.summary.FileWriter(tb_path, sess.graph)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver()  # saves variables learned during training

        ckpt_file = tf.train.latest_checkpoint(save_model_dir)
        logger.info('Model Path {}'.format(ckpt_file))
        load_model_msg = model.load_model(save_model_dir,
                                          ckpt_file,
                                          sess,
                                          saver,
                                          load_logits=False)
        logger.info(load_model_msg)

        ckpt_file = os.path.join(save_model_dir, cfg.checkpoint_filename)

        val_loss = tf.summary.scalar('Val_Loss', model.val_loss)
        val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy)
        model_acc_op = tf.summary.scalar('Split_Val_Accuracy',
                                         model.val_accumulated_accuracy)

        best_model_step = 0
        best_acc = 0
        logger.info('Start Training from {}, till {}'.format(
            start_iter, cfg.train_iters))
        # Start Training
        for step in range(start_iter + 1, cfg.train_iters + 1):

            start_time_train = time.time()

            # Update network weights while supporting caffe_iter_size
            for mini_batch in range(cfg.caffe_iter_size - 1):
                feed_dict = {handle: training_handle}
                model_loss_value, accuracy_value, _ = sess.run(
                    [model.train_loss, model.train_accuracy, accum_ops],
                    feed_dict)

            feed_dict = {handle: training_handle}
            model_loss_value, accuracy_value, _ = sess.run(
                [model.train_loss, model.train_accuracy, train_op], feed_dict)
            if cfg.caffe_iter_size > 1:  ## Accumulated Gradient
                sess.run(zero_ops)

            train_time = time.time() - start_time_train

            if (step == 1 or step % cfg.logging_threshold == 0):
                logger.info(
                    'i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'.
                    format(step, model_loss_value, accuracy_value, train_time))

                if (step % cfg.test_interval == 0):
                    run_metadata = tf.RunMetadata()
                    tf.local_variables_initializer().run()
                    sess.run(validation_iterator.initializer)

                    _val_acc_op = 0
                    while True:
                        try:

                            # Eval network on validation/testing split
                            feed_dict = {handle: validation_handle}
                            val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat, macro_acc = sess.run(
                                [
                                    val_loss, model.val_accuracy, model_acc_op,
                                    val_acc_op, model.val_accumulated_accuracy,
                                    model.val_confusion_mat,
                                    model.val_per_class_acc_acc
                                ], feed_dict)
                        except tf.errors.OutOfRangeError:
                            logger.info('Val Acc {0}, Macro Acc: {1}'.format(
                                _val_acc, macro_acc))
                            break

                    train_writer.add_run_metadata(run_metadata,
                                                  'step%03d' % step)
                    train_writer.add_summary(val_loss_op, step)
                    train_writer.add_summary(_val_acc_op, step)
                    train_writer.add_summary(accuracy_op, step)
                    train_writer.flush()

                    if (step % 100 == 0):
                        saver.save(sess, ckpt_file)
                        if best_acc < _val_acc:
                            saver.save(sess, ckpt_file + 'best')
                            best_acc = _val_acc
                            best_model_step = step

                        logger.info('Best Acc {0} at {1} == {2}'.format(
                            best_acc, best_model_step, model_basename))

        logger.info('Triplet loss lambda {}'.format(cfg.triplet_loss_lambda))
        logger.info('Mode {}'.format(cfg.train_mode))
        logger.info('Loop complete')
        sess.close()
Example #4
0
def main():

    img_generator_class = locate(config.db_tuple_loader)
    args = dict()
    args['csv_file'] = config.train_csv_file
    train_iter = img_generator_class(args)
    args['csv_file'] = config.test_csv_file
    val_iter = img_generator_class(args)

    train_imgs, train_lbls = train_iter.imgs_and_lbls()
    val_imgs, val_lbls = val_iter.imgs_and_lbls()

    save_model_dir = config.model_save_path

    with tf.Graph().as_default():

        train_dataset = TensorflowTupleLoader(train_imgs,
                                              train_lbls,
                                              is_training=True).dataset
        val_dataset = TensorflowTupleLoader(val_imgs,
                                            val_lbls,
                                            is_training=False,
                                            batch_size=config.batch_size,
                                            repeat=False).dataset

        handle = tf.placeholder(tf.string, shape=[])

        iterator = tf.data.Iterator.from_string_handle(
            handle, train_dataset.output_types, train_dataset.output_shapes)
        images_ph, lbls_ph = iterator.get_next()

        training_iterator = train_dataset.make_one_shot_iterator()
        validation_iterator = val_dataset.make_initializable_iterator()

        network_class = locate(config.network_name)
        model = network_class(num_classes=config.num_classes,
                              is_training=True,
                              images_ph=images_ph,
                              lbls_ph=lbls_ph)

        trainable_vars = tf.trainable_variables()
        if config.caffe_iter_size > 1:  ## Accumulated Gradient
            ## Creation of a list of variables with the same shape as the trainable ones
            # initialized with 0s
            accum_vars = [
                tf.Variable(tf.zeros_like(tv.initialized_value()),
                            trainable=False) for tv in trainable_vars
            ]
            zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):

            global_step = tf.Variable(0, name='global_step', trainable=False)
            learning_rate = tf_utils.poly_lr(global_step)
            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)

            if config.caffe_iter_size > 1:  ## Accumulated Gradient

                grads = optimizer.compute_gradients(model.train_loss,
                                                    trainable_vars)
                # Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order)
                accum_ops = [
                    accum_vars[i].assign_add(gv[0])
                    for i, gv in enumerate(grads)
                ]
                iter_size = config.caffe_iter_size
                # Define the training step (part with variable value update)
                train_op = optimizer.apply_gradients(
                    [(accum_vars[i] / iter_size, gv[1])
                     for i, gv in enumerate(grads)],
                    global_step=global_step)

            else:
                grads = optimizer.compute_gradients(model.train_loss)
                train_op = optimizer.apply_gradients(grads,
                                                     global_step=global_step)

        # logger.info('=========================================================')
        # for v in tf.trainable_variables():
        #     mprint('trainable_variables:  {0} \t {1}'.format(str(v.name),str(v.shape)))

        sess = tf.InteractiveSession()
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        training_handle = sess.run(training_iterator.string_handle())
        validation_handle = sess.run(validation_iterator.string_handle())

        now = datetime.now()
        if (config.tensorbaord_file == None):
            tb_path = config.tensorbaord_dir + now.strftime("%Y%m%d-%H%M%S")
        else:
            tb_path = config.tensorbaord_dir + config.tensorbaord_file

        start_iter = 0  # No Resume in this code version

        train_writer = tf.summary.FileWriter(tb_path, sess.graph)

        saver = tf.train.Saver()  # saves variables learned during training

        ckpt_file = os.path.join(save_model_dir, config.model_save_name)
        print('Model Path ', ckpt_file)

        load_model_msg = model.load_model(save_model_dir,
                                          ckpt_file,
                                          sess,
                                          saver,
                                          is_finetuning=True)
        mprint(load_model_msg)

        val_loss = tf.summary.scalar('Val_Loss', model.val_loss)
        val_acc_op = tf.summary.scalar('Batch_Val_Acc', model.val_accuracy)
        model_acc_op = tf.summary.scalar('Split_Val_Accuracy',
                                         model.val_accumulated_accuracy)

        mprint('Start Training ***********')
        best_acc = 0
        best_model_step = 0
        for current_iter in range(start_iter, config.max_iter):
            start_time_train = time.time()

            for mini_batch in range(config.caffe_iter_size - 1):
                feed_dict = {handle: training_handle}
                sess.run(accum_ops, feed_dict)

            feed_dict = {handle: training_handle}
            model_loss_value, accuracy_value, _ = sess.run(
                [model.train_loss, model.train_accuracy, train_op], feed_dict)

            if config.caffe_iter_size > 1:  ## Accumulated Gradient
                sess.run(zero_ops)

            train_time = time.time() - start_time_train

            if (current_iter % config.logging_threshold == 0):
                mprint('i {0:04d} loss {1:4f} Acc {2:2f} Batch Time {3:3f}'.
                       format(current_iter, model_loss_value, accuracy_value,
                              train_time))

                if (current_iter != 0):
                    run_metadata = tf.RunMetadata()
                    tf.local_variables_initializer().run()
                    sess.run(validation_iterator.initializer)

                    while True:
                        try:
                            feed_dict = {handle: validation_handle}
                            val_loss_op, batch_accuracy, accuracy_op, _val_acc_op, _val_acc, c_cnf_mat = sess.run(
                                [
                                    val_loss, model.val_accuracy, model_acc_op,
                                    val_acc_op, model.val_accumulated_accuracy,
                                    model.val_confusion_mat
                                ], feed_dict)
                        except tf.errors.OutOfRangeError:
                            mprint('Val Acc {0} {1}'.format(
                                _val_acc, batch_accuracy))
                            break

                    train_writer.add_run_metadata(run_metadata,
                                                  'step%03d' % current_iter)
                    train_writer.add_summary(val_loss_op, current_iter)
                    train_writer.add_summary(_val_acc_op, current_iter)
                    train_writer.add_summary(accuracy_op, current_iter)

                    train_writer.flush()

                    if (current_iter % config.logging_threshold == 0):
                        saver.save(sess, ckpt_file)
                        if best_acc < _val_acc:
                            saver.save(sess, ckpt_file + 'best')
                            best_acc = _val_acc
                            best_model_step = current_iter
                        ## Early dropping style.
                        mprint('Best Acc {0} at {1} == {2}'.format(
                            best_acc, best_model_step, config.model_filename))

        saver.save(sess, ckpt_file)  ## Save final ckpt before closing
        ckpt = os.path.join(save_model_dir, str(current_iter),
                            config.model_save_name)
        saver.save(sess, ckpt)
        sess.close()