コード例 #1
0
def main():
	"""Create the model and start the training."""
	args = get_arguments()
	
	h, w = map(int, args.input_size.split(','))
	input_size = (h, w)
	
	coord = tf.train.Coordinator()
	
	with tf.name_scope("create_inputs"):
		reader = ImageReader(
			DATA_DIR,
			DATA_LIST_PATH,
			input_size,
			args.random_scale,
			args.random_mirror,
			args.ignore_label,
			IMG_MEAN,
			coord)
		image_batch, label_batch = reader.dequeue(args.batch_size)

	#with g.as_default():
	net = ICNet_BN({'data': image_batch}, is_training=True, num_classes=args.num_classes, filter_scale=args.filter_scale)
	sub4_out = net.layers['sub4_out']
	sub24_out = net.layers['sub24_out']
	sub124_out = net.layers['conv6_cls']

	restore_var = tf.global_variables()
	all_trainable = [v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma]

	loss_sub4 = create_loss(sub4_out, label_batch, args.num_classes, args.ignore_label)
	loss_sub24 = create_loss(sub24_out, label_batch, args.num_classes, args.ignore_label)
	loss_sub124 = create_loss(sub124_out, label_batch, args.num_classes, args.ignore_label)

	l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
		
	reduced_loss = LAMBDA1 * loss_sub4 +  LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 + tf.add_n(l2_losses)

	# Using Poly learning rate policy 
	base_lr = tf.constant(args.learning_rate)
	step_ph = tf.placeholder(dtype=tf.float32, shape=())
	learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
		
	# Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
	if args.update_mean_var == False:
		update_ops = None
	else:
		update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

	with tf.control_dependencies(update_ops):
		opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
		grads = tf.gradients(reduced_loss, all_trainable)
		train_op = opt_conv.apply_gradients(zip(grads, all_trainable))
コード例 #2
0
ファイル: run.py プロジェクト: veronicachelu/catzndogs
def run():
    recreate_directory_structure()
    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader("./train.npy", True, coord)

        image_batch, label_list_batch = reader.dequeue(FLAGS.batch_size)

    global_step = tf.Variable(0,
                              dtype=tf.int32,
                              name='global_step',
                              trainable=False)
    net = CatznDogs({'data': image_batch}, global_step)
    net.train(image_batch, label_list_batch, coord)
コード例 #3
0
def test_image_queue(h=321, w=321):
    input_size = (h, w)

    # Create queue coordinator
    coord = tf.train.Coordinator()

    # Load Image Reader
    with tf.name_scope('create_inputs'):
        reader = ImageReader(INDEX_FILE, DATA_DIRECTORY, MASK_DIRECTORY,
                             input_size, True, True, IGNORE_LABEL, IMG_MEAN,
                             coord)

        image_batch, mask_batch = reader.dequeue(BATCH_SIZE)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)

        for _ in range(10):
            images, masks = sess.run([image_batch, mask_batch])
            # img = sess.run(mask_batch)
            print np.unique(masks)
コード例 #4
0
def main():
    """Create the model and start the training."""
    args = get_arguments()
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    
    #tf.set_random_seed(args.random_seed)
    
    coord = tf.train.Coordinator()

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Using Poly learning rate policy 
        base_lr = tf.constant(args.learning_rate)
        step_ph = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.train.exponential_decay(base_lr,
                                    step_ph,
                                    20000,
                                    0.5,
                                    staircase=True)

        tf.summary.scalar('lr', learning_rate)

        opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

        #opt = tf.train.RMSPropOptimizer(learning_rate, 0.9, momentum=0.9, epsilon=1e-10)

        #opt = tf.train.AdamOptimizer(learning_rate)

        losses = []
        train_op = []

        total_batch_size = args.batch_size*args.gpu_nums

        with tf.name_scope('DeepLabResNetModel') as scope:
            with tf.name_scope("create_inputs"):
                reader = ImageReader(
                    args.data_dir,
                    args.data_list,
                    input_size,
                    args.random_blur,
                    args.random_scale,
                    args.random_mirror,
                    args.random_rotate,
                    args.ignore_label,
                    IMG_MEAN,
                    coord)
                image_batch, label_batch = reader.dequeue(total_batch_size)

                images_splits = tf.split(axis=0, num_or_size_splits=args.gpu_nums, value=image_batch)
                labels_splits = tf.split(axis=0, num_or_size_splits=args.gpu_nums, value=label_batch)
   
            net = DeepLabResNetModel({'data': images_splits}, is_training=True, num_classes=args.num_classes)
    
            raw_output_list = net.layers['fc_voc12']

            num_valide_pixel = 0
            for i in range(len(raw_output_list)):
                with tf.device('/gpu:%d' % i):
                    raw_output_up = tf.image.resize_bilinear(raw_output_list[i], size=input_size, align_corners=True)

                    tf.summary.image('images_{}'.format(i), images_splits[i]+IMG_MEAN, max_outputs = 4)
                    tf.summary.image('labels_{}'.format(i), labels_splits[i], max_outputs = 4)

                    tf.summary.image('predict_{}'.format(i), tf.cast(tf.expand_dims(tf.argmax(raw_output_up, -1),3),tf.float32), max_outputs = 4)

                    all_trainable = [v for v in tf.trainable_variables()]

                    # Predictions: ignoring all predictions with labels greater or equal than n_classes
                    raw_prediction = tf.reshape(raw_output_up, [-1, args.num_classes])
                    label_proc = prepare_label(labels_splits[i], tf.stack(raw_output_up.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w]
                    raw_gt = tf.reshape(label_proc, [-1,])
                    #indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
                    indices = tf.where(tf.logical_and(tf.less(raw_gt, args.num_classes), tf.greater_equal(raw_gt, 0)))
                    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
                    prediction = tf.gather(raw_prediction, indices)
                    mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(tf.argmax(tf.nn.softmax(prediction), axis=-1), gt, num_classes=args.num_classes)
                    tf.summary.scalar('mean IoU_{}'.format(i), mIoU)
                    train_op.append(update_op)
                                                                                             
                    # Pixel-wise softmax loss.
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
                    num_valide_pixel += tf.shape(gt)[0]
 
                    losses.append(tf.reduce_sum(loss))

            l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
            reduced_loss = tf.truediv(tf.reduce_sum(losses), tf.cast(num_valide_pixel, tf.float32)) + tf.add_n(l2_losses)
            tf.summary.scalar('average_loss', reduced_loss) 

        grads = tf.gradients(reduced_loss, all_trainable, colocate_gradients_with_ops=True)

        variable_averages = tf.train.ExponentialMovingAverage(0.99, step_ph)

        variables_to_average = (tf.trainable_variables() + tf.moving_average_variables())
        variables_averages_op = variable_averages.apply(variables_to_average)

        train_op = tf.group(opt.apply_gradients(zip(grads, all_trainable)), *train_op)
        
        train_op = tf.group(train_op, variables_averages_op)

        summary_op = tf.summary.merge_all()
    
        # Set up tf session and initialize variables. 
        config = tf.ConfigProto()
        config.allow_soft_placement=True
        sess = tf.Session(config=config)
        init = [tf.global_variables_initializer(),tf.local_variables_initializer()]
        sess.run(init)
        # Saver for storing checkpoints of the model.
        saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=2)

        
        #restore from resnet imagenet, bised and local_step is in moving_average
        #restore_var = [v for v in tf.trainable_variables() if 'fc' not in v.name]+[v for v in tf.global_variables() if ('moving_mean' in v.name or 'moving_variance' in v.name) and ('biased' not in v.name and 'local_step' not in v.name)]
        restore_var = [v for v in tf.trainable_variables() if 'fc' not in v.name]

        ckpt = tf.train.get_checkpoint_state(args.restore_from)
        if ckpt and ckpt.model_checkpoint_path:
            loader = tf.train.Saver(var_list=restore_var)
            load(loader, sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found.')

        """
        #restore from snapshot
        restore_var = tf.global_variables()

        ckpt = tf.train.get_checkpoint_state(args.snapshot_dir)
        if ckpt and ckpt.model_checkpoint_path:
            loader = tf.train.Saver(var_list=restore_var, allow_empty=True)
            load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
            load(loader, sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found.')
            load_step = 0
        """
        # Start queue threads.
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)

        summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=sess.graph)
        # Iterate over training steps.
        for step in range(args.num_steps):
            start_time = time.time()
        
            feed_dict = {step_ph: step}
            if step % args.save_pred_every == 0 and step != 0:
                loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict)
                save(saver, sess, args.snapshot_dir, step)
            elif step%100 == 0:
                summary_str, loss_value, _, IOU = sess.run([summary_op, reduced_loss, train_op, mIoU], feed_dict=feed_dict)
                duration = time.time() - start_time
                summary_writer.add_summary(summary_str, step)
                print('step {:d} \t loss = {:.3f}, mean_IoU = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, IOU, duration))
            else:
                loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict)
        
        coord.request_stop()
        coord.join(threads)
コード例 #5
0
ファイル: train.py プロジェクト: alexw92/masterarbeit
def main():
    """Create the model and start the training."""
    args = get_arguments()
    print("SAVE TO " + args.snapshot_dir)
    datalists_epoch = {
        1: args.datalist_path_epoch1,
        2: args.datalist_path_epoch2,
        3: args.datalist_path_epoch3,
        4: args.datalist_path_epoch4,
        5: args.datalist_path_epoch5
    }
    if args.cross_val:
        val_epoch = int(args.cross_val)
        train_epochs = [1, 2, 3, 4, 5]
        train_epochs.remove(val_epoch)
        train_lists = [datalists_epoch[i] for i in train_epochs]
        val_lists = datalists_epoch[val_epoch]
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    max_runtime = args.max_runtime
    max_time_seconds = 3600 * max_runtime
    epochs_until_val = 3

    global dataset_class_weights
    if args.weights_for_dataset is None:
        dataset_class_weights = None
    elif args.weights_for_dataset == 'de_top15':
        dataset_class_weights = weights_detop15
    elif args.weights_for_dataset == 'eu_top25':
        dataset_class_weights = weights_eutop25
    elif args.weights_for_dataset == 'world2k':
        dataset_class_weights = weights_world2k
    elif args.weights_for_dataset == 'kaggle_dstl':
        dataset_class_weights = weights_kaggledstl
    elif args.weights_for_dataset == 'vaihingen':
        dataset_class_weights = weights_vaihingen
    elif args.weights_for_dataset == 'de_top15_nores':
        dataset_class_weights = weights_detop15_nores
    elif args.weights_for_dataset == 'eu_top25_nores':
        dataset_class_weights = weights_eutop25_nores
    elif args.weights_for_dataset == 'world2k_nores':
        dataset_class_weights = weights_world2k_nores

    coord = tf.train.Coordinator()

    if args.cross_val:
        with tf.name_scope("create_inputs"):
            reader = ImageReader(args.datadir, train_lists, input_size,
                                 args.random_scale, args.random_mirror,
                                 args.ignore_label, IMG_MEAN, coord)
            image_batch, label_batch = reader.dequeue(args.batch_size)

            # for validation
            reader_val = ImageReader(args.datadir, val_lists, input_size,
                                     args.random_scale, args.random_mirror,
                                     args.ignore_label, IMG_MEAN, coord)
            image_batch_val, label_batch_val = reader_val.dequeue(
                args.batch_size)
    else:

        with tf.name_scope("create_inputs"):
            reader = ImageReader(args.datadir, args.datalist_path, input_size,
                                 args.random_scale, args.random_mirror,
                                 args.ignore_label, IMG_MEAN, coord)
            image_batch, label_batch = reader.dequeue(args.batch_size)

            # for validation
            reader_val = ImageReader(args.datadir, args.datalist_path_val,
                                     input_size, args.random_scale,
                                     args.random_mirror, args.ignore_label,
                                     IMG_MEAN, coord)
            image_batch_val, label_batch_val = reader_val.dequeue(
                args.batch_size)

    net = ICNet_BN({'data': image_batch},
                   is_training=True,
                   num_classes=args.num_classes,
                   filter_scale=args.filter_scale)
    with tf.variable_scope("val"):
        net_val = ICNet_BN({'data': image_batch_val},
                           is_training=True,
                           num_classes=args.num_classes,
                           filter_scale=args.filter_scale)

    sub4_out = net.layers['sub4_out']
    sub24_out = net.layers['sub24_out']
    sub124_out = net.layers['conv6_cls']

    # early stop variables
    last_val_loss_tf = tf.Variable(10000.0, name="last_loss")
    steps_total_tf = tf.Variable(0, name="steps_total")
    val_increased_t_tf = tf.Variable(0, name="loss_increased_t")

    if args.not_restore_last:
        restore_var = [
            v for v in tf.global_variables() if 'conv6_cls' not in v.name
            and 'val' not in v.name and 'sub4_out' not in v.name
            and 'sub24_out' not in v.name and 'sub124_out' not in v.name
        ]
    else:
        # to load last layer, the line 78 in network.py has to be removed too and ignore_missing set to False
        # see https://github.com/hellochick/ICNet-tensorflow/issues/50 BCJuan
        # don't restore val vars
        restore_var = [
            v for v in tf.trainable_variables() if 'val' not in v.name
        ]  #tf.global_variables()
        # don't train val variables
    all_trainable = [
        v for v in tf.trainable_variables()
        if (('beta' not in v.name and 'gamma' not in v.name)
            or args.train_beta_gamma) and 'val' not in v.name
    ]
    # all_trainable = [v for v in tf.trainable_variables() if
    #                  ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma]

    # print([v for v in tf.global_variables() if v.name in["last_val_loss","steps_total","val_increased_t"]])
    # restore_var.extend([v for v in tf.global_variables() if v.name in["last_val_loss","steps_total","val_increased_t"]])

    # assert not np.any(np.isnan(sub4_out))
    loss_sub4 = create_loss(sub4_out, label_batch, args.num_classes,
                            args.ignore_label)
    loss_sub24 = create_loss(sub24_out, label_batch, args.num_classes,
                             args.ignore_label)
    loss_sub124 = create_loss(sub124_out, label_batch, args.num_classes,
                              args.ignore_label)
    # l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
    l2_losses = [
        args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if ('weights' in v.name and 'val' not in v.name)
    ]
    reduced_loss = LAMBDA1 * loss_sub4 + LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 + tf.add_n(
        l2_losses)

    ####################### Loss Calculation FOR VALIDATION

    sub4_out_val = net_val.layers['sub4_out']
    sub24_out_val = net_val.layers['sub24_out']
    sub124_out_val = net_val.layers['conv6_cls']

    loss_sub4_val = create_loss(sub4_out_val, label_batch_val,
                                args.num_classes, args.ignore_label)
    loss_sub24_val = create_loss(sub24_out_val, label_batch_val,
                                 args.num_classes, args.ignore_label)
    loss_sub124_val = create_loss(sub124_out_val, label_batch_val,
                                  args.num_classes, args.ignore_label)
    l2_losses_val = [
        args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if ('weights' in v.name and 'val' in v.name)
    ]

    reduced_loss_val = LAMBDA1 * loss_sub4_val + LAMBDA2 * loss_sub24_val + LAMBDA3 * loss_sub124_val + tf.add_n(
        l2_losses_val)
    ####################### End Loss Calculation FOR VALIDATION

    # Using Poly learning rate policy
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(
        base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))

    # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
    if args.update_mean_var == False:
        update_ops = None
    else:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
        grads = tf.gradients(reduced_loss, all_trainable)
        train_op = opt_conv.apply_gradients(zip(grads, all_trainable))

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # start time
    glob_start_time = time.time()

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)

    if '.npy' not in args.restore_from:
        ckpt = tf.train.get_checkpoint_state(args.restore_from)
    else:
        ckpt = tf.train.get_checkpoint_state(args.snapshot_dir)
    if ckpt and ckpt.model_checkpoint_path:
        vars_to_restore = get_tensors_in_checkpoint_file(
            file_name=ckpt.model_checkpoint_path)
        # print(vars_to_restore)
        # print([v.name for v in restore_var])
        # thanks to https://stackoverflow.com/a/50216949/8862202
        # v.name[:-2] to transform 'conv1_1_3x3_s2/weights:0' to 'conv1_1_3x3_s2/weights'
        vars_to_restore = [
            v for v in restore_var
            if 'val' not in v.name and v.name[:-2] in vars_to_restore
        ]
        # print(vars_to_restore)
        #loader = tf.train.Saver(var_list=restore_var)
        loader = tf.train.Saver(var_list=vars_to_restore)
        load_step = int(
            os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('Restore from pre-trained model...')
        net.load(args.restore_from, sess)
    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    if args.reset_patience:
        z = tf.assign(val_increased_t_tf, 0)
        sess.run(z)

    print(sess.run(last_val_loss_tf))
    print(sess.run(steps_total_tf))
    print(sess.run(val_increased_t_tf))

    if not args.cross_val:
        val_epoch_len = len(reader_val.image_list)
        val_num_steps = val_epoch_len // args.batch_size
        # Iterate over training steps.
        last_val_loss = sess.run(last_val_loss_tf)
        val_increased_t = sess.run(val_increased_t_tf)
        best_model_step = 0
        total_steps = sess.run(steps_total_tf)
        for step in range(total_steps, args.num_steps + total_steps):
            start_time = time.time()
            feed_dict = {step_ph: step}
            if step % args.save_pred_every == 0:

                # validating
                if args.validate:
                    print("validating: ")
                    print_assign_vars(sess)
                    print("Assigned vars for validation. ")
                    loss_sum = 0
                    for val_step in trange(val_num_steps,
                                           desc='validation',
                                           leave=True):
                        loss_value_v, loss1_v, loss2_v, loss3_v = sess.run(
                            [
                                reduced_loss_val, loss_sub4_val,
                                loss_sub24_val, loss_sub124_val
                            ],
                            feed_dict=feed_dict)
                        loss_sum = loss_sum + loss_value_v
                    loss_avg = loss_sum / val_num_steps

                    if loss_avg > last_val_loss:
                        val_increased_t = val_increased_t + 1
                        if val_increased_t >= args.patience:
                            print(
                                "Terminated Training, Best Model (at step %d) saved 4 validations ago"
                                % best_model_step)
                            f = open("./FINISHED_ICNET", "w+")
                            f.close()
                            break

                    else:
                        val_increased_t = 0
                        best_model_step = step

                    print(
                        'VALIDATION COMPLETE step {:d}\tVal_Loss Increased {:d}/{:d} times\t total loss = {:.3f}'
                        ' last loss = {:.3f}'.format(step, val_increased_t,
                                                     args.patience, loss_avg,
                                                     last_val_loss))

                    last_val_loss = loss_avg
                    steps_assign = tf.assign(steps_total_tf, step)
                    last_val_assign = tf.assign(last_val_loss_tf,
                                                last_val_loss)
                    increased_assign = tf.assign(val_increased_t_tf,
                                                 val_increased_t)
                    print("loss avg " + str(loss_avg))
                    print(sess.run(steps_assign))
                    print(sess.run(last_val_assign))
                    print(sess.run(increased_assign))

                # Saving

                loss_value, loss1, loss2, loss3, _ = sess.run(
                    [
                        reduced_loss, loss_sub4, loss_sub24, loss_sub124,
                        train_op
                    ],
                    feed_dict=feed_dict)
                save(saver, sess, args.snapshot_dir, step)

                # check if max run time is already over
                elapsed = time.time() - glob_start_time
                if (elapsed + 300) > max_time_seconds:
                    print("Training stopped: max run time elapsed")
                    os.remove("./RUNNING_ICNET")
                    break
            else:
                loss_value, loss1, loss2, loss3, _ = sess.run(
                    [
                        reduced_loss, loss_sub4, loss_sub24, loss_sub124,
                        train_op
                    ],
                    feed_dict=feed_dict)
            duration = time.time() - start_time
            print(
                'step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)'
                .format(step, loss_value, loss1, loss2, loss3, duration))
        train_duration = time.time() - glob_start_time
        print('Total training time: ' + str(train_duration))
    else:
        # Training with cross validation
        print("Training-Mode CROSS VALIDATION")
        val_epoch_len = len(reader_val.image_list)
        val_num_steps = val_epoch_len // args.batch_size
        print("Val epoch length %d, Num steps %d" %
              (val_epoch_len, val_num_steps))
        last_val_loss = math.inf
        val_not_imp_t = 0

        # train

        for step in range(1000000):
            feed_dict = {step_ph: step}
            train_start = time.time()
            loss_value, loss1, loss2, loss3, _ = sess.run(
                [reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op],
                feed_dict=feed_dict)
            duration_t = time.time() - train_start
            if args.print_steps:
                print(
                    'step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)'
                    .format(step, loss_value, loss1, loss2, loss3, duration_t))

            if step % args.save_pred_every == 0:
                # save and validate
                # SAVE previously trained model
                save(saver, sess, args.snapshot_dir, step)
                # Validate
                print("validating: ")
                start_time = time.time()
                print_assign_vars(sess)
                print("Assigned vars for validation. ")
                loss_sum = 0
                for val_step in trange(val_num_steps,
                                       desc='validation',
                                       leave=True):
                    loss_value_v, loss1_v, loss2_v, loss3_v = sess.run(
                        [
                            reduced_loss_val, loss_sub4_val, loss_sub24_val,
                            loss_sub124_val
                        ],
                        feed_dict=feed_dict)
                    loss_sum = loss_sum + loss_value_v
                duration = time.time() - start_time
                loss_avg = loss_sum / val_num_steps
                print(
                    'VALIDATION COMPLETE step {:d} \t total loss = {:.3f} \t duration = {:.3f}'
                    .format(step, loss_avg, duration))

            if loss_avg >= last_val_loss:
                val_not_imp_t = val_not_imp_t + 1
                if val_not_imp_t >= 4:
                    print(
                        "Terminated Training, Best Model saved 5 validations before"
                    )
                    f = open("./FINISHED_ICNET", "w+")
                    f.close()
                    break

            else:
                val_not_imp_t = 0

            last_val_loss = loss_avg

            # check if max run time is already over
            elapsed = time.time() - glob_start_time
            if (elapsed + 300) > max_time_seconds:
                print("Training stopped: max run time elapsed")
                os.remove("./RUNNING_ICNET")
                break

    coord.request_stop()
    coord.join(threads)
コード例 #6
0
    def _tf_common_init(self):
        gpu_count = len(self.device_ids)
        src_size = self.config['input_size']
        input_size_wh = (src_size['width'], src_size['height'])
        init_lr = self.config['lr']
        power = self.config['lr_decreasing']['power']
        momentum = self.config['momentum']
        weight_decay = self.config['weight_decay']
        num_classes = len(self.out_classes)
        train_beta_gamma = self.config['train_beta_gamma']
        update_mean_var = self.config['update_mean_var']

        with tf.device('/cpu:0'):
            self.coord = tf.train.Coordinator()
            splitted_images = {}
            splitted_labels = {}

            with tf.name_scope("create_inputs"):
                for name, need_shuffle in [
                    ('train', True),
                    ('val', False),
                ]:
                    reader = ImageReader(
                        ia_descrs=self.samples_dct[name],
                        input_size_wh=input_size_wh,
                        random_scale=False,
                        random_mirror=False,
                        img_mean=IMG_MEAN,
                        coord=self.coord,
                        in_pr_meta=self.helper.in_project_meta,
                        class_to_idx=self.class_title_to_idx,
                        shuffle=need_shuffle
                    )
                    batch_sz = self.config['batch_size'][name]
                    img_batch, lbl_batch = reader.dequeue(batch_sz * gpu_count)
                    split_images = tf.split(img_batch, gpu_count, 0)
                    split_labels = tf.split(lbl_batch, gpu_count, 0)
                    splitted_images[name] = split_images
                    splitted_labels[name] = split_labels

            global_step = tf.get_variable(
                'global_step', [],
                initializer=tf.constant_initializer(0), trainable=False, dtype=tf.int32)

            self.tf_label = tf.placeholder(dtype=tf.int32)  # , shape=[None])
            self.tf_prediction = tf.placeholder(dtype=tf.int32)  # , shape=[None])
            self.tf_metric, self.tf_metric_update = tf.metrics.accuracy(
                self.tf_label, self.tf_prediction, name="use_metric_acc"
            )
            running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="use_metric_acc")
            self.running_vars_initializer = tf.variables_initializer(var_list=running_vars)

            base_lr = tf.constant(init_lr)
            self.step_ph = tf.placeholder(dtype=tf.float32, shape=())
            learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - self.step_ph / self.total_train_iters), power))
            opt_conv = tf.train.MomentumOptimizer(learning_rate, momentum)
            opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, momentum)
            opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, momentum)

            all_grads_conv = []
            all_grads_fc_w = []
            all_grads_fc_b = []
            losses = []
            with tf.variable_scope(tf.get_variable_scope()):
                for curr_dev_id in self.device_ids:
                    with tf.device('/gpu:{}'.format(curr_dev_id)):
                        with tf.name_scope('clone_{}'.format(curr_dev_id)) as scope:
                            spl_img = splitted_images['train'][curr_dev_id]
                            spl_lbl = splitted_labels['train'][curr_dev_id]

                            net = get_model(spl_img, num_classes)

                            prediction, gt, self.v1, self.v2 = forward(net, spl_lbl, num_classes)
                            # print('shapes', tf.shape(prediction), tf.shape(gt), tf.shape(split_labels[i]))
                            reduced_loss = get_loss(prediction, gt, weight_decay)
                            losses.append(reduced_loss)
                            tf.get_variable_scope().reuse_variables()

                            grads_conv, grads_fc_w, grads_fc_b = get_grads(reduced_loss, train_beta_gamma, update_mean_var)
                            all_grads_conv.append(grads_conv)
                            all_grads_fc_w.append(grads_fc_w)
                            all_grads_fc_b.append(grads_fc_b)

            self.total_loss = tf.stack(values=losses)
            self.total_loss = tf.reduce_mean(self.total_loss)

            mean_grads_conv = average_gradients(all_grads_conv)
            mean_grads_fc_w = average_gradients(all_grads_fc_w)
            mean_grads_fc_b = average_gradients(all_grads_fc_b)

            conv_trainable, fc_w_trainable, fc_b_trainable = get_trainable_vars(train_beta_gamma)

            # Apply the gradients to adjust the shared variables.
            apply_gradient_conv_op = opt_conv.apply_gradients(zip(mean_grads_conv, conv_trainable), global_step=global_step)
            apply_gradient_fc_w_op = opt_fc_w.apply_gradients(zip(mean_grads_fc_w, fc_w_trainable), global_step=global_step)
            apply_gradient_fc_b_op = opt_fc_b.apply_gradients(zip(mean_grads_fc_b, fc_b_trainable), global_step=global_step)

            # Group all updates to into a single train op.
            self.train_op = tf.group(apply_gradient_conv_op, apply_gradient_fc_w_op,
                                     apply_gradient_fc_b_op)

            self.total_val_loss, self.v1_val, self.v2_val = get_val_loss(
                splitted_images['val'], splitted_labels['val'],
                num_classes, weight_decay, self.device_ids
            )

            # Set up tf session and initialize variables.
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            config.allow_soft_placement = True
            config.log_device_placement = False
            self.sess = tf.Session(config=config)
            init = tf.global_variables_initializer()

            self.sess.run(init)

            # Saver for storing checkpoints of the model.
            self.saver = tf.train.Saver(var_list=tf.global_variables(), save_relative_paths=True)
コード例 #7
0
def main():
    # lr_decay = 0.5
    # decay_every = 100
    """Create the model and start the training."""
    args = get_arguments()
    
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    
    tf.set_random_seed(args.random_seed)
    
    coord = tf.train.Coordinator()
    
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_list,
            input_size,
            args.random_scale,
            args.random_mirror,
            args.ignore_label,
            IMG_MEAN,
            coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)
    
    # Set up tf session and initialize variables. 
    config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True
    # config.allow_soft_placement = True
    # config.intra_op_parallelism_threads = 1
    sess = tf.Session(config = config)
    net = unext(image_batch, is_train = True, reuse = False, n_out = NUM_CLASSES)
    
    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_output = net.outputs
    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w]
    raw_gt = tf.reshape(label_proc, [-1,])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
    gt = tf.cast(tf.gather(raw_gt, indices), dtype = tf.int32)
    prediction = tf.gather(raw_prediction, indices)
                                                                                            
    main_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = prediction, labels = gt)

    t_vars = tf.trainable_variables()
    l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in t_vars if 'kernel' in v.name]
    #reduced_loss = 0.5 * tf.reduce_mean(main_loss) + generalised_dice_loss(prediction, gt) + tf.add_n(l2_losses)
    reduced_loss = tf.reduce_mean(main_loss) + tf.add_n(l2_losses)

    # Processed predictions: for visualisation.
    raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
    raw_output_up = tf.argmax(raw_output_up, dimension = 3)
    pred = tf.expand_dims(raw_output_up, dim = 3)
    
    # Image summary.
    images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
    labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8)
    preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8)
    
    total_summary = tf.summary.image('images', 
                                     tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), 
                                     max_outputs=args.save_num_images) # Concatenate row-wise.
    loss_summary = tf.summary.scalar('TotalLoss', reduced_loss)
    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())

    # Using Poly learning rate policy 
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.train.exponential_decay(base_lr, step_ph, args.num_steps, args.power)

    lr_summary = tf.summary.scalar('LearningRate', learning_rate)
    #train_op = tf.train.MomentumOptimizer(learning_rate, args.momentum).minimize(reduced_loss, var_list = t_vars)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(reduced_loss, var_list = t_vars)
    init = tf.global_variables_initializer()
    sess.run(init)
    
    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list = tf.global_variables(), max_to_keep = 10)

    ckpt = tf.train.get_checkpoint_state(SNAPSHOT_DIR)
    if ckpt and ckpt.model_checkpoint_path:
        #restore_vars = list([t for t in tf.global_variables() if not 'uconv1' in t.name])
        loader = tf.train.Saver(var_list = tf.global_variables())
        load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('No checkpoint file found.')
        load_step = 0

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord = coord, sess = sess)

    # Iterate over training steps.
    save_summary_every = 10
    for step in range(args.num_steps):
        start_time = time.time()
        
        feed_dict = {step_ph: step}
        if not step % args.save_pred_every == 0:
            loss_value, _, l_summary, lr_summ = sess.run([reduced_loss, train_op, loss_summary, lr_summary], feed_dict=feed_dict)
            duration = time.time() - start_time
        elif step % args.save_pred_every == 0:
            loss_value, _, summary, l_summary, lr_summ = sess.run([reduced_loss, train_op, total_summary, loss_summary, lr_summary], feed_dict=feed_dict)
            duration = time.time() - start_time
            save(saver, sess, args.snapshot_dir, step)
            summary_writer.add_summary(summary, step)

        if step % save_summary_every == 0:
    
            summary_writer.add_summary(l_summary, step)
            summary_writer.add_summary(lr_summ, step)
        
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration))
        
    coord.request_stop()
    coord.join(threads)
コード例 #8
0
def main():
    """Create the model and start the training."""
    args = get_arguments()
    
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    
    coord = tf.train.Coordinator()
    
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_list,
            input_size,
            args.random_scale,
            args.random_mirror,
            args.ignore_label,
            IMG_MEAN,
            coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)
    
    net = ICNet_BN({'data': image_batch}, is_training=True, num_classes=args.num_classes)
    
    sub4_out = net.layers['sub4_out']
    sub24_out = net.layers['sub24_out']
    sub124_out = net.layers['conv6_cls']

    fc_list = ['conv6_cls']

    restore_var = tf.global_variables()
    all_trainable = [v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma]
    restore_var = [v for v in tf.global_variables() if not (len([f for f in fc_list if f in v.name])) or not args.not_restore_last]
   
    for v in restore_var:
        print(v.name)

    loss_sub4 = create_loss(sub4_out, label_batch, args.num_classes, args.ignore_label, args.use_class_weights)
    loss_sub24 = create_loss(sub24_out, label_batch, args.num_classes, args.ignore_label, args.use_class_weights)
    loss_sub124 = create_loss(sub124_out, label_batch, args.num_classes, args.ignore_label, args.use_class_weights)
    l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
    
    loss = LAMBDA1 * loss_sub4 +  LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124

    reduced_loss = loss + tf.add_n(l2_losses)


    ##############################
    # visualization and summary
    ##############################


    # Processed predictions: for visualisation.

    # Sub 4
    raw_output_up4 = tf.image.resize_bilinear(sub4_out, tf.shape(image_batch)[1:3,])
    raw_output_up4 = tf.argmax(raw_output_up4, dimension = 3)
    pred4 = tf.expand_dims(raw_output_up4, dim = 3)
    # Sub 24
    raw_output_up24 = tf.image.resize_bilinear(sub24_out, tf.shape(image_batch)[1:3,])
    raw_output_up24 = tf.argmax(raw_output_up24, dimension=3)
    pred24 = tf.expand_dims(raw_output_up24, dim=3)
    # Sub 124
    raw_output_up124 = tf.image.resize_bilinear(sub124_out, tf.shape(image_batch)[1:3,])
    raw_output_up124 = tf.argmax(raw_output_up124, dimension=3)
    pred124 = tf.expand_dims(raw_output_up124, dim=3)

    images_summary = tf.py_func(inv_preprocess, [image_batch, SAVE_NUM_IMAGES, IMG_MEAN], tf.uint8)
    labels_summary = tf.py_func(decode_labels, [label_batch,SAVE_NUM_IMAGES, args.num_classes], tf.uint8)

    preds_summary4 = tf.py_func(decode_labels, [pred4, SAVE_NUM_IMAGES, args.num_classes], tf.uint8)
    preds_summary24 = tf.py_func(decode_labels, [pred24, SAVE_NUM_IMAGES, args.num_classes], tf.uint8)
    preds_summary124 = tf.py_func(decode_labels, [pred124, SAVE_NUM_IMAGES, args.num_classes], tf.uint8)
    
    total_images_summary = tf.summary.image('images', 
                                     tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary124]), 
                                     max_outputs=SAVE_NUM_IMAGES) # Concatenate row-wise.

    total_summary = [total_images_summary]

    loss_summary = tf.summary.scalar('Total_loss', reduced_loss)

    total_summary.append(loss_summary)
    
    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())
    ##############################
    ##############################

    # Using Poly learning rate policy 
    if LR_SHEDULE == {}:
        base_lr = tf.constant(args.learning_rate)
        step_ph = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
    else:
        step_ph = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.Variable(LR_SHEDULE.popitem()[1], tf.float32)

    lr_summary = tf.summary.scalar('Learning_rate', learning_rate)
    total_summary.append(lr_summary)
    
    # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
    if args.update_mean_var == False:
        update_ops = None
    else:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
        grads = tf.gradients(reduced_loss, all_trainable)
        train_op = opt_conv.apply_gradients(zip(grads, all_trainable))
        
    # Set up tf session and initialize variables. 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    
    sess.run(init)
    
    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list = tf.global_variables(), max_to_keep = 10)

    ckpt = tf.train.get_checkpoint_state(args.snapshot_dir)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=restore_var)
        load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('Restore from pre-trained model...')
        net.load(args.restore_from, sess, ignore_layers = fc_list)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)
    summ_op = tf.summary.merge(total_summary)
    
    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()
        
        if LR_SHEDULE != {}:
            if step == LR_SHEDULE.keys()[0]:
                tf.assign(learning_rate, LR_SHEDULE.popitem()[0])

        feed_dict = {step_ph: step}
        if step % args.save_pred_every == 0:
            
            loss_value, loss1, loss2, loss3, _, summary =\
                sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op, summ_op], feed_dict = feed_dict)

            save(saver, sess, args.snapshot_dir, step)
            summary_writer.add_summary(summary, step)

        else:
            loss_value, loss1, loss2, loss3, _ = sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict)
            
        duration = time.time() - start_time
        #print('shape', sess.run(tf.shape(sub124_out)))
        #quit()
        print('step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)'.format(step, loss_value, loss1, loss2, loss3, duration))
        
    coord.request_stop()
    coord.join(threads)
コード例 #9
0
ファイル: train.py プロジェクト: tuckerdarby/tf-deeplab-v3
def main():
    # Create model and start training
    args = get_arguments()

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    tf.set_random_seed(args.random_seed)

    # Create queue coordinator
    coord = tf.train.Coordinator()

    # Load Image Reader
    with tf.name_scope('create_inputs'):
        reader = ImageReader(
            args.index_loc,
            args.data_dir,
            args.mask_dir,
            input_size,
            args.random_scale,
            args.random_mirror,
            args.ignore_label,
            IMG_MEAN,
            coord)

        image_batch, label_batch = reader.dequeue(args.batch_size)

    mode = tf.contrib.learn.ModeKeys.TRAIN
    net = DeepLabResNetModel(image_batch, mode, args.num_classes, args.atrous_blocks)

    raw_output = net.output

    # Trainable Variables
    restore_vars = [v for v in tf.global_variables() if 'fc' not in v.name or not args.not_restore_last]
    all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
    fc_trainable = [v for v in all_trainable if 'fc' in v.name]
    conv_trainable = [v for v in all_trainable if 'fc' not in v.name]
    fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name]
    fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name]

    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_labels(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False)
    raw_gt = tf.reshape(label_proc, [-1,])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    prediction = tf.gather(raw_prediction, indices)

    # Pixel-wise Softmax Loss
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
    l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
    reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)
    variable_summaries(reduced_loss, name='loss')

    # Processed predictions: for visualization
    raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Define loss and optimization parameters
    base_lr = tf.constant(args.learning_rate, tf.float64)
    global_step = tf.Variable(0, trainable=False, name='global_step')
    increment_step = tf.assign(global_step, global_step + 1)
    learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - global_step / args.num_steps), args.power))
    learning_rate = tf.maximum(learning_rate, 8e-7)

    opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
    opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 5.0, args.momentum)
    opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum)

    grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
    grads_conv = grads[:len(conv_trainable)]
    grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))]
    grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]

    train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
    train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
    train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))

    train_op = tf.group(increment_step, train_op_conv, train_op_fc_w, train_op_fc_b)

    # initial_learning_rate = 1e-2
    # learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step, 300, 0.96)
    # adam = tf.train.AdamOptimizer(learning_rate).minimize(reduced_loss, global_step=global_step)

    # Image Summary
    model_dir = args.snapshot_dir + args.model_name

    images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
    preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8)
    labels_summary = tf.py_func(decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8)

    image_summaries = [images_summary, preds_summary, labels_summary]
    image_summary = tf.summary.image('images',
                                     tf.concat(axis=2, values=image_summaries),
                                     max_outputs=args.save_num_images)

    # Variable Summary
    variable_summaries(fc_w_trainable, 'fc_w')
    variable_summaries(fc_b_trainable, 'fc_b')
    variable_summaries(learning_rate, 'learning_rate')
    # variable_summaries(net.weights, 'aconv_w')
    # variable_summaries(net.biases, 'aconv_b')

    total_summary = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(model_dir, graph=tf.get_default_graph())

    # Set up session

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(max_to_keep=3)
        if args.snapshot_dir is not None and args.model_name is not None and os.path.exists(model_dir):
            loader = tf.train.Saver()
            load_model(loader, sess, model_dir)

        threads = tf.train.start_queue_runners(coord=coord, sess=sess)
        # train_op = adam

        for step in range(args.num_steps):
            start_time = time.time()

            if step % args.save_pred_every == 0:
                feed = [reduced_loss, image_batch, label_batch, pred, total_summary, global_step, train_op]
                loss_value, images, labels, preds, summary, total_steps, _ = sess.run(feed)
                summary_writer.add_summary(summary, total_steps)
                save_model(saver, sess, model_dir, global_step)
            else:
                feed = [reduced_loss, global_step, train_op]
                loss_value, total_steps, _ = sess.run(feed)

            duration = time.time() - start_time
            results = 'global step: {:d}, step: {:d} \t loss = {:.3f}, ({:.3f} secs)'\
                .format(total_steps, step, loss_value, duration)
            if step % WRITE_EVERY == 0:
                with open(WRITE_FILE, 'a') as f:
                    f.write(results + '\n')
            print(results)

        coord.request_stop()
        coord.join(threads)
コード例 #10
0
def evaluate_checkpoint(model_path, args):
    coord = tf.train.Coordinator()

    tf.reset_default_graph()

    reader = ImageReader(
            args.data_list,
            INPUT_SIZE,
            random_scale = False,
            random_mirror = False,
            ignore_label = IGNORE_LABEL,
            img_mean = IMG_MEAN,
            coord = coord,
            train = False)
    image_batch, label_batch = reader.dequeue(batch_size)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord = coord, sess = sess)

    # Create network.
    net = ICNet_BN({'data': image_batch}, num_classes = num_classes)
    # Which variables to load.
    restore_var = tf.global_variables()

    # Predictions.
    raw_output = net.layers['conv6_cls']

    raw_output_up = tf.image.resize_bilinear(raw_output, size = INPUT_SIZE, align_corners = True)
    raw_output_up = tf.argmax(raw_output_up, dimension = 3)
    pred = tf.expand_dims(raw_output_up, dim = 3)

    # mIoU
    pred_flatten = tf.reshape(pred, [-1,])
    raw_gt = tf.reshape(label_batch, [-1,])
    if args.ignore_zero:
        indices = tf.squeeze(tf.where(
            tf.logical_and(
                tf.less_equal(raw_gt, num_classes - 1),
                tf.greater(raw_gt, 0)
                ),), 
            1)
    else:
        indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, num_classes - 1)), 1)

    #indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, num_classes - 1)), 1)

    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    pred = tf.gather(pred_flatten, indices)

    metric, op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes = num_classes)

    mIoU, update_op = metric, op
    
    # Summaries
    miou_op = tf.summary.scalar('mIOU', mIoU)
    start = time.time()
    logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                        time.gmtime()))
    
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    saver = tf.train.Saver(var_list = restore_var)
    load(saver, sess, model_path)
    

    for step in range(num_steps):
        preds, _ = sess.run([pred, update_op])

        if step % 500 == 0:
            print('Finish {0}/{1}'.format(step + 1, num_steps))

    iou, summ = sess.run([mIoU, miou_op])

    sess.close()

    coord.request_stop()
    #coord.join(threads)

    return summ, iou
コード例 #11
0
def main():
    """Create the model and start the training."""
    args = get_arguments()
    
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    if args.center_crop_size is None:
        center_crop_size = None
    else:
        hc, wc = map(int, args.center_crop_size.split(','))
        center_crop_size = (hc, wc)

    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            DATA_DIR,
            DATA_LIST_PATH,
            input_size,
            center_crop_size,
            args.random_scale,
            args.random_mirror,
            args.ignore_label,
            IMG_MEAN)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    net = ICNet_BN({'data': image_batch}, is_training=True, num_classes=args.num_classes, filter_scale=args.filter_scale)

    sub4_recls, sub24_recls, sub124_recls = bn_common.extend_reclassifier(net)

    restore_var = tf.global_variables()
    all_trainable = [v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma]
   
    loss_sub4 = create_loss(sub4_recls, label_batch, args)
    loss_sub24 = create_loss(sub24_recls, label_batch, args)
    loss_sub124 = create_loss(sub124_recls, label_batch, args)
    
    l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables()
                 if ('weights' in v.name) or ('kernel' in v.name)]
    
    reduced_loss = LAMBDA1 * loss_sub4 +  LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 + tf.add_n(l2_losses)

    # print(tf.get_variable_scope().name)
    # print(','.join([v.__op.original_name_scope for v in l2_losses]))
    # print(','.join([v for v in tf.trainable_variables() if ('beta' in v.name or 'gamma' in v.name)]))
    # tf.summary.FileWriter('./summary', tf.get_default_graph())
    # exit(0)

    # Using Poly learning rate policy 
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
    
    # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
    if args.update_mean_var == False:
        update_ops = None
    else:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
        grads = tf.gradients(reduced_loss, all_trainable)
        train_op = opt_conv.apply_gradients(zip(grads, all_trainable))

    # Set up tf session and initialize variables. 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    
    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=99)

    ckpt = tf.train.get_checkpoint_state(args.snapshot_dir)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('Restore from pre-trained model...')
        net.load(args.restore_from, sess)

    # Start queue threads.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()
        
        feed_dict = {step_ph: step}
        if step % args.save_pred_every == 0:
            loss_value, loss1, loss2, loss3, _ = sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict)
            save(saver, sess, args.snapshot_dir, step)
        else:
            loss_value, loss1, loss2, loss3, _ = sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict)
        duration = time.time() - start_time
        print('step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)'.format(step, loss_value, loss1, loss2, loss3, duration))
        
    coord.request_stop()
    coord.join(threads)

    sess.close()
コード例 #12
0
    def run(self):
        tf.set_random_seed(self.random_seed)
        coord = tf.train.Coordinator()

        # 读取数据
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.data_dir, self.data_train_list,
                                 self.input_size, self.random_scale,
                                 self.random_mirror, self.ignore_label,
                                 self.img_mean, coord)
            image_batch, label_batch = reader.dequeue(self.batch_size)

        # 网络
        net = PSPNet({'data': image_batch},
                     is_training=True,
                     num_classes=self.num_classes)
        raw_output = net.layers['conv6']

        # According from the prototxt in Caffe implement, learning rate must multiply by 10.0 in pyramid module
        fc_list = [
            'conv5_3_pool1_conv', 'conv5_3_pool2_conv', 'conv5_3_pool3_conv',
            'conv5_3_pool6_conv', 'conv6', 'conv5_4'
        ]
        # 所有的变量
        restore_var = [v for v in tf.global_variables()]
        # 所有可训练变量
        all_trainable = [
            v for v in tf.trainable_variables()
            if ('beta' not in v.name and 'gamma' not in v.name)
            or self.train_beta_gamma
        ]
        # fc_list中的全连接层可训练变量和卷积可训练变量
        fc_trainable = [
            v for v in all_trainable if v.name.split('/')[0] in fc_list
        ]
        conv_trainable = [
            v for v in all_trainable if v.name.split('/')[0] not in fc_list
        ]  # lr * 1.0
        fc_w_trainable = [v for v in fc_trainable
                          if 'weights' in v.name]  # lr * 10.0
        fc_b_trainable = [v for v in fc_trainable
                          if 'biases' in v.name]  # lr * 20.0
        # 验证
        assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable))
        assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

        # Predictions: ignoring all predictions with labels greater or equal than n_classes
        raw_prediction = tf.reshape(raw_output, [-1, self.num_classes])
        label_process = prepare_label(label_batch,
                                      tf.stack(raw_output.get_shape()[1:3]),
                                      num_classes=self.num_classes,
                                      one_hot=False)  # [batch_size, h, w]
        raw_gt = tf.reshape(label_process, [
            -1,
        ])
        indices = tf.squeeze(
            tf.where(tf.less_equal(raw_gt, self.num_classes - 1)), 1)
        gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
        prediction = tf.gather(raw_prediction, indices)

        # Pixel-wise softmax loss.
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction, labels=gt)
        l2_losses = [
            self.weight_decay * tf.nn.l2_loss(v)
            for v in tf.trainable_variables() if 'weights' in v.name
        ]
        reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

        # Using Poly learning rate policy
        base_lr = tf.constant(self.learning_rate)
        step_ph = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.scalar_mul(
            base_lr, tf.pow((1 - step_ph / self.num_steps), self.power))

        # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
        update_ops = None if not self.update_mean_var else tf.get_collection(
            tf.GraphKeys.UPDATE_OPS)

        # 对变量以不同的学习率优化:分别求梯度、应用梯度
        with tf.control_dependencies(update_ops):
            opt_conv = tf.train.MomentumOptimizer(learning_rate, self.momentum)
            opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0,
                                                  self.momentum)
            opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0,
                                                  self.momentum)

            grads = tf.gradients(
                reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
            grads_conv = grads[:len(conv_trainable)]
            grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) +
                                                    len(fc_w_trainable))]
            grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]

            train_op_conv = opt_conv.apply_gradients(
                zip(grads_conv, conv_trainable))
            train_op_fc_w = opt_fc_w.apply_gradients(
                zip(grads_fc_w, fc_w_trainable))
            train_op_fc_b = opt_fc_b.apply_gradients(
                zip(grads_fc_b, fc_b_trainable))

            train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)
            pass

        sess = tf.Session(config=self.config)
        sess.run(tf.global_variables_initializer())

        # Saver for storing checkpoints of the model.
        saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)

        # 加载模型
        ckpt = tf.train.get_checkpoint_state(self.log_dir)
        if ckpt and ckpt.model_checkpoint_path:
            tf.train.Saver(var_list=restore_var).restore(
                sess, ckpt.model_checkpoint_path)
            Tools.print_info("Restored model parameters from {}".format(
                ckpt.model_checkpoint_path))
        else:
            Tools.print_info('No checkpoint file found.')
            pass

        # Start queue threads.
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)

        # Iterate over training steps.
        for step in range(self.num_steps):
            start_time = time.time()
            if step % self.save_pred_freq == 0:
                loss_value, _ = sess.run([reduced_loss, train_op],
                                         feed_dict={step_ph: step})
                saver.save(sess, self.checkpoint_path, global_step=step)
                Tools.print_info('The checkpoint has been created.')
            else:
                loss_value, _ = sess.run([reduced_loss, train_op],
                                         feed_dict={step_ph: step})
            duration = time.time() - start_time
            Tools.print_info(
                'step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(
                    step, loss_value, duration))

        coord.request_stop()
        coord.join(threads)
        pass
コード例 #13
0
def main():

    temp_flags = FLAGS.__flags.items()
    temp_flags.sort()
    for params, value in FLAGS.__flags.items():
        print('{}: {}'.format(params, value))

    input_size = (FLAGS.train_image_size, FLAGS.train_image_size)

    tf.set_random_seed(1234)

    coord = tf.train.Coordinator()

    reader = ImageReader(FLAGS.data_dir, FLAGS.data_list, input_size,
                         FLAGS.random_scale, FLAGS.random_mirror,
                         FLAGS.ignore_label, IMG_MEAN, coord)
    image_batch, label_batch = reader.dequeue(FLAGS.batch_size)

    raw_output = MobileNet(image_batch,
                           isTraining=True,
                           updateBeta=FLAGS.update_beta)

    psp_list = [
        'conv_ds_15a', 'conv_ds_15b', 'conv_ds_15c', 'conv_ds_15d',
        'conv_ds_16', 'conv_ds_17'
    ]
    all_trainable = [v for v in tf.trainable_variables()]
    if FLAGS.update_beta == False:
        all_trainable = [v for v in all_trainable if 'beta' not in v.name]
    psp_trainable = [
        v for v in all_trainable if v.name.split('/')[1] in psp_list and (
            'weights' in v.name or 'biases' in v.name)
    ]
    conv_trainable = [v for v in all_trainable
                      if v not in psp_trainable]  # lr * 1.0
    psp_w_trainable = [v for v in psp_trainable
                       if 'weights' in v.name]  # lr * 10.0
    psp_b_trainable = [v for v in psp_trainable
                       if 'biases' in v.name]  # lr * 20.0

    assert (len(all_trainable) == len(psp_trainable) + len(conv_trainable))
    assert (len(psp_trainable) == len(psp_w_trainable) + len(psp_b_trainable))

    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_prediction = tf.reshape(raw_output, [-1, FLAGS.num_classes])
    label_proc = prepare_label(label_batch,
                               tf.stack(raw_output.get_shape()[1:3]),
                               num_classes=FLAGS.num_classes,
                               one_hot=False)  # [batch_size, h, w]
    raw_gt = tf.reshape(label_proc, [
        -1,
    ])
    indices = tf.squeeze(
        tf.where(tf.less_equal(raw_gt, FLAGS.num_classes - 1)), 1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    prediction = tf.gather(raw_prediction, indices)

    # Pixel-wise softmax loss.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction,
                                                          labels=gt)
    # Regularisation loss
    l2_losses = [
        FLAGS.weight_decay * tf.nn.l2_loss(v)
        for v in tf.trainable_variables() if 'weights' in v.name
    ]
    reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)
    #TODO  auxilary loss

    #Using Poly learning rate policy
    current_epoch = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.train.polynomial_decay(
        FLAGS.start_learning_rate,
        current_epoch,
        FLAGS.decay_steps,
        end_learning_rate=FLAGS.end_learning_rate,
        power=FLAGS.learning_rate_decay_power,
        name="poly_learning_rate")

    if FLAGS.update_mean_var == False:
        update_ops = None
    else:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        if FLAGS.optimizer == 'momentum':
            opt_conv = tf.train.MomentumOptimizer(learning_rate,
                                                  FLAGS.momentum)
            opt_psp_w = tf.train.MomentumOptimizer(learning_rate * 10.0,
                                                   FLAGS.momentum)
            opt_psp_b = tf.train.MomentumOptimizer(learning_rate * 20.0,
                                                   FLAGS.momentum)
        elif FLAGS.optimizer == 'rmsprop':
            opt_conv = tf.train.RMSPropOptimizer(
                learning_rate,
                decay=FLAGS.rmsprop_decay,
                momentum=FLAGS.rmsprop_momentum,
                epsilon=FLAGS.opt_epsilon)
            opt_psp_w = tf.train.RMSPropOptimizer(
                learning_rate * 10.0,
                decay=FLAGS.rmsprop_decay,
                momentum=FLAGS.rmsprop_momentum,
                epsilon=FLAGS.opt_epsilon)
            opt_psp_b = tf.train.RMSPropOptimizer(
                learning_rate * 20.0,
                decay=FLAGS.rmsprop_decay,
                momentum=FLAGS.rmsprop_momentum,
                epsilon=FLAGS.opt_epsilon)

        grads = tf.gradients(
            reduced_loss, conv_trainable + psp_w_trainable + psp_b_trainable)
        grads_conv = grads[:len(conv_trainable)]
        grads_psp_w = grads[len(conv_trainable):(len(conv_trainable) +
                                                 len(psp_w_trainable))]
        grads_psp_b = grads[(len(conv_trainable) + len(psp_w_trainable)):]

        train_op_conv = opt_conv.apply_gradients(
            zip(grads_conv, conv_trainable))
        train_op_psp_w = opt_psp_w.apply_gradients(
            zip(grads_psp_w, psp_w_trainable))
        train_op_psp_b = opt_psp_b.apply_gradients(
            zip(grads_psp_b, psp_b_trainable))

        train_op = tf.group(train_op_conv, train_op_psp_w, train_op_psp_b)

    restore_var = tf.global_variables()

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=500)

    load(sess, FLAGS.pretrained_checkpoint, restore_var)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for epoch in range(FLAGS.start_epoch,
                       FLAGS.start_epoch + FLAGS.num_epochs):

        total_loss = 0.0
        for step in range(1, FLAGS.num_steps + 1):

            start_time = time.time()

            feed_dict = {current_epoch: epoch}
            loss_value, _ = sess.run([reduced_loss, train_op],
                                     feed_dict=feed_dict)

            duration = time.time() - start_time
            print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(
                step, loss_value, duration))
            #TODO ignore NaN loss
            total_loss += loss_value

        save(saver, sess, FLAGS.log_dir, epoch)
        total_loss /= FLAGS.num_steps
        print('Epoch {:d} completed! Total Loss = {:.3f}'.format(
            epoch, total_loss))

    coord.request_stop()
    coord.join(threads)
コード例 #14
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    tf.set_random_seed(args.random_seed)

    coord = tf.train.Coordinator()

    with tf.name_scope("create_inputs"):
        reader = ImageReader(args.data_list, input_size, args.random_scale,
                             args.random_mirror, args.ignore_label, IMG_MEAN,
                             coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    net = PSPNet50({'data': image_batch},
                   is_training=True,
                   num_classes=args.num_classes)

    raw_output = net.layers['conv6']

    # According from the prototxt in Caffe implement, learning rate must multiply by 10.0 in pyramid module
    fc_list = [
        'conv5_3_pool1_conv', 'conv5_3_pool2_conv', 'conv5_3_pool3_conv',
        'conv5_3_pool6_conv', 'conv6', 'conv5_4'
    ]
    restore_var = [
        v for v in tf.global_variables()
        if not (len([f for f in fc_list
                     if f in v.name])) or not args.not_restore_last
    ]
    all_trainable = [
        v for v in tf.trainable_variables()
        if 'gamma' not in v.name and 'beta' not in v.name
    ]
    fc_trainable = [
        v for v in all_trainable if v.name.split('/')[0] in fc_list
    ]
    conv_trainable = [
        v for v in all_trainable if v.name.split('/')[0] not in fc_list
    ]  # lr * 1.0
    fc_w_trainable = [v for v in fc_trainable
                      if 'weights' in v.name]  # lr * 10.0
    fc_b_trainable = [v for v in fc_trainable
                      if 'biases' in v.name]  # lr * 20.0
    assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable))
    assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch,
                               tf.stack(raw_output.get_shape()[1:3]),
                               num_classes=args.num_classes,
                               one_hot=False)  # [batch_size, h, w]
    raw_gt = tf.reshape(label_proc, [
        -1,
    ])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)),
                         1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    prediction = tf.gather(raw_prediction, indices)

    # Pixel-wise softmax loss.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction,
                                                          labels=gt)

    # Make mistakes for class N more important for network
    if USE_CLASS_WEIGHTS:
        if len(CLASS_WEIGHTS) != NUM_CLASSES:
            print('Incorrect class weights, it will be not used')
        else:
            mask = tf.zeros_like(loss)

            for i, w in enumerate(CLASS_WEIGHTS):
                mask = mask + tf.cast(tf.equal(gt, i),
                                      tf.float32) * tf.constant(w)
            loss = loss * mask

    l2_losses = [
        args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'weights' in v.name
    ]
    reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

    # Processed predictions: for visualisation.
    raw_output_up = tf.image.resize_bilinear(raw_output,
                                             tf.shape(image_batch)[1:3, ])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Image summary.
    images_summary = tf.py_func(inv_preprocess,
                                [image_batch, args.save_num_images, IMG_MEAN],
                                tf.uint8)
    labels_summary = tf.py_func(
        decode_labels, [label_batch, args.save_num_images, args.num_classes],
        tf.uint8)
    preds_summary = tf.py_func(decode_labels,
                               [pred, args.save_num_images, args.num_classes],
                               tf.uint8)

    total_summary = tf.summary.image(
        'images',
        tf.concat(axis=2,
                  values=[images_summary, labels_summary, preds_summary]),
        max_outputs=args.save_num_images)  # Concatenate row-wise.
    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())

    # Using Poly learning rate policy
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(
        base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))

    # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
    if args.update_mean_var == False:
        update_ops = None
    else:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
        opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0,
                                              args.momentum)
        opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0,
                                              args.momentum)

        grads = tf.gradients(reduced_loss,
                             conv_trainable + fc_w_trainable + fc_b_trainable)
        grads_conv = grads[:len(conv_trainable)]
        grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) +
                                                len(fc_w_trainable))]
        grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]

        train_op_conv = opt_conv.apply_gradients(
            zip(grads_conv, conv_trainable))
        train_op_fc_w = opt_fc_w.apply_gradients(
            zip(grads_fc_w, fc_w_trainable))
        train_op_fc_b = opt_fc_b.apply_gradients(
            zip(grads_fc_b, fc_b_trainable))

        train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True
    # config.allow_soft_placement = True
    # config.intra_op_parallelism_threads = 1
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)

    ckpt = tf.train.get_checkpoint_state(SNAPSHOT_DIR)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=restore_var)
        load_step = int(
            os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('No checkpoint file found.')
        load_step = 0

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()

        feed_dict = {step_ph: step}
        if step % args.save_pred_every == 0:
            loss_value, _, summary = sess.run(
                [reduced_loss, train_op, total_summary], feed_dict=feed_dict)
            summary_writer.add_summary(summary, step)
            save(saver, sess, args.snapshot_dir, step)
        else:
            z, t, o, p, loss_value, _ = sess.run(
                [raw_gt, raw_output, gt, prediction, reduced_loss, train_op],
                feed_dict=feed_dict)
            print(z.shape, t.shape, o.shape, p.shape)
        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(
            step, loss_value, duration))

    coord.request_stop()
    coord.join(threads)
コード例 #15
0
def evaluate_checkpoint(model_path, args):
    coord = tf.train.Coordinator()

    tf.reset_default_graph()

    reader = ImageReader(
            args.data_list,
            INPUT_SIZE,
            random_scale = False,
            random_mirror = False,
            ignore_label = IGNORE_LABEL,
            img_mean = IMG_MEAN,
            coord = coord,
            train = False)
    image_batch, label_batch = reader.dequeue(args.batch_size)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord = coord, sess = sess)

    # Create network.
    #net = ICNet_BN({'data': image_batch}, is_training = False, num_classes = num_classes)
    net = unext(image_batch, is_train = False, n_out = NUM_CLASSES)

    # Predictions.
    #raw_output = net.layers['conv6']
    raw_output = net.outputs

    raw_output_up = tf.image.resize_bilinear(raw_output, size = INPUT_SIZE, align_corners = True)
    raw_output_up = tf.argmax(raw_output_up, dimension = 3)
    pred = tf.expand_dims(raw_output_up, dim = 3)

    # mIoU
    pred_flatten = tf.reshape(pred, [-1,])
    raw_gt = tf.reshape(label_batch, [-1,])
    indices = tf.squeeze(tf.where(tf.not_equal(raw_gt, IGNORE_LABEL)), 1)

    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    pred = tf.gather(pred_flatten, indices)

    iou_metric, iou_op = tf.metrics.mean_iou(pred, gt, num_classes = num_classes)
    acc_metric, acc_op = tf.metrics.accuracy(pred, gt)

    # Summaries
    iou_summ_op = tf.summary.scalar('mIOU', iou_metric)
    acc_summ_op = tf.summary.scalar('Accuracy', acc_metric)
    start = time.time()
    logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                        time.gmtime()))

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    saver = tf.train.Saver(var_list = tf.global_variables())
    load(saver, sess, model_path)

    for step in range(int(num_steps / batch_size)):
        preds, _, _ = sess.run([raw_output_up, iou_op, acc_op])

        if step % int(100 / batch_size) == 0:
            print('Finish {0}/{1}'.format(step + 1, int(num_steps / batch_size)))

    iou, iou_summ, acc, acc_summ = sess.run([iou_metric, iou_summ_op, acc_metric, acc_summ_op])

    sess.close()

    coord.request_stop()
    #coord.join(threads)

    return iou, iou_summ, acc, acc_summ
コード例 #16
0
def main(argv=None):

    input_size = (cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH)
    # Create queue coordinator.
    coord = tf.train.Coordinator()
    # Load reader.
    # Train
    print('Train ' + cfg.train_data_list)
    with tf.name_scope("create_inputs"):
        reader = ImageReader(cfg.train_data_dir, cfg.train_data_list,
                             input_size, cfg.random_scale, cfg.random_resize,
                             cfg.random_mirror, cfg.random_color,
                             cfg.random_crop_pad, cfg.ignore_label,
                             cfg.IMG_MEAN, coord)
        image_batch, label_batch = reader.dequeue(cfg.batch_size)

    # Define Network
    pred_annotation, logits = inference_deeplabv3_plus_16(
        image_batch, is_training=True)  # Modified
    logits_loss = cross_entropy_loss(logits,
                                     label_batch)  # loss1 for ECP dataset
    # logits_loss = weighted_cross_entropy_loss(logits, label_batch)                # loss2 for RueMonge dataset
    # logits_loss = weighted_cross_entropy_loss_4class(logits, label_batch)

    # # PSPNet
    # pred_annotation, logits, logits_dsn = inference_pspnet(image_batch, is_training=True)  # PSPNet
    # # logits_loss = cross_entropy_loss(logits, label_batch) + \
    # #               cross_entropy_loss(logits_dsn, label_batch)  # loss1 for ECP dataset
    # logits_loss = weighted_cross_entropy_loss(logits, label_batch) + \
    #               weighted_cross_entropy_loss(logits_dsn, label_batch)  # loss2 for RueMonge dataset

    ce_loss = logits_loss  # cross entropy loss

    # Show acc for validation or train dataset
    if cfg.is_time_acc or cfg.is_epoch_acc:
        with tf.variable_scope('', reuse=True):
            val_image_batch = tf.placeholder(
                tf.float32,
                shape=[1, IMAGE_HEIGHT, IMAGE_WIDTH, 3],
                name="input_image")
            f = open(cfg.val_data_list, 'r')
            val_img_list = []
            val_label_list = []
            for line in f:
                try:
                    image_name, label = line.strip("\n").split(' ')
                except ValueError:  # Adhoc for test.
                    image_name = label = line.strip("\n")
                val_img_list.append(cfg.val_data_dir + image_name)
                val_label_list.append(cfg.val_data_dir + label)

            _, val_logits = inference_deeplabv3_plus_16_init(
                val_image_batch, is_training=False)  # Modified
            # _, val_logits, _ = inference_pspnet(val_image_batch, is_training=False)                   # PSPNet

            val_logits_softmax = tf.nn.softmax(val_logits)
    tf.group()

    l2_loss = [
        weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'weights' or 'w' in v.name or 'W' in v.name
    ]  # encode: W, facade: weights
    l2_losses = tf.add_n(l2_loss)
    # Total loss
    loss = ce_loss + l2_losses  # + stru_loss

    tf.summary.scalar("loss_ce", ce_loss)
    tf.summary.scalar("l2_losses", l2_losses)
    tf.summary.scalar("total_loss", loss)

    step_ph = tf.placeholder(dtype=tf.float32, shape=())

    # Using Poly learning rate policy
    base_lr = tf.constant(cfg.learning_rate)
    learning_rate = tf.scalar_mul(base_lr,
                                  tf.pow((1 - step_ph / global_step), power))

    trainable_var = tf.trainable_variables()

    # Optimizer
    if cfg.optimizer == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate)
        print('Optimizer: Adam')
    elif cfg.optimizer == 'Adam2':
        optimizer = tf.train.AdamOptimizer(learning_rate,
                                           beta1=0.9,
                                           beta2=0.99)
    elif cfg.optimizer == 'SGD':
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    elif cfg.optimizer == 'Momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
        print('Optimizer: Momentum')
    elif cfg.optimizer == 'RMSProp':
        optimizer = tf.train.RMSPropOptimizer(learning_rate)

    # grads = optimizer.compute_gradients(loss, var_list=trainable_var)
    # train_op = optimizer.apply_gradients(grads)

    ## Optimizer definition - nothing different from any classical example
    opt = optimizer

    ## Retrieve all trainable variables you defined in your graph
    if cfg.freeze_bn:
        tvs = [
            v for v in tf.trainable_variables()
            if 'beta' not in v.name and 'gamma' not in v.name
        ]
    else:
        tvs = [v for v in tf.trainable_variables()]

    ## Creation of a list of variables with the same shape as the trainable ones
    # initialized with 0s
    accum_vars = [
        tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False)
        for tv in tvs
    ]
    zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]

    ## Calls the compute_gradients function of the optimizer to obtain... the list of gradients
    gvs = opt.compute_gradients(loss, tvs)

    ## Adds to each element from the list you initialized earlier with zeros its gradient (works because accum_vars and gvs are in the same order)
    accum_ops = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(gvs)]

    ## Define the training step (part with variable value update)
    train_step = opt.apply_gradients([(accum_vars[i], gv[1])
                                      for i, gv in enumerate(gvs)])

    print("Setting up summary op...")
    summary_op = tf.summary.merge_all()

    # Set gpu usage
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 1.0
    sess = tf.Session(config=config)
    print("Setting up Saver...")
    saver = tf.train.Saver(max_to_keep=cfg.model_save_num)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # create two summary writers to show training loss and validation loss in the same graph
    # need to create two folders 'train' and 'validation' inside FLAGS.logs_dir
    if not os.path.exists(cfg.logs_dir):
        os.makedirs(cfg.logs_dir)
    train_writer = tf.summary.FileWriter(cfg.logs_dir + 'train', sess.graph)
    test_writer = tf.summary.FileWriter(cfg.logs_dir + 'test')

    if not os.path.exists(cfg.save_dir):
        os.makedirs(cfg.save_dir)
    count = 0
    files = os.path.join(cfg.save_dir + 'model.ckpt-*.index')
    sfile = glob.glob(files)
    if len(sfile) > 0:
        sess.run(tf.global_variables_initializer())
        sfile = glob.glob(files)
        steps = []
        for s in sfile:
            part = s.split('.')
            step = int(part[1].split('-')[1])
            steps.append(step)
        count = max(steps)
        model = cfg.save_dir + 'model.ckpt-' + str(count)
        print('\nRestoring weights from: ' + model)
        saver.restore(sess, model)
        print('End Restore')
    else:
        # # restore from pre-train on imagenet
        variables = tf.global_variables()
        sess.run(tf.variables_initializer(variables, name='init'))

        # # tensorflow                          1
        if os.path.exists(cfg.pre_trained_model) or os.path.exists(
                cfg.pre_trained_model + '.index'):
            var_keep_dic = get_variables_in_checkpoint_file(
                cfg.pre_trained_model)
            # Get the variables to restore, ignoring the variables to fix
            variables_to_restore = get_variables_to_restore(
                variables, var_keep_dic)
            if len(variables_to_restore) > 0:
                restorer = tf.train.Saver(variables_to_restore)
                restorer.restore(sess, cfg.pre_trained_model)
                print('Model pre-train loaded from ' + cfg.pre_trained_model)
            else:
                print('Model inited random.')
        else:
            print('Model inited random.')

        # RGB -> BGR
        if 'res' in cfg.pre_trained_model:
            conv1_rgb = tf.get_variable("conv1_rgb", [7, 7, 3, 64],
                                        trainable=False)
            restorer_fc = tf.train.Saver(
                {'resnet_v1_50/conv1/weights': conv1_rgb})
            restorer_fc.restore(sess, cfg.pre_trained_model)
            sess.run(tf.assign(variables[0], tf.reverse(conv1_rgb, [2])))
            print('ResNet Conv 1 RGB->BGR')
        elif 'vgg' in cfg.pre_trained_model:
            conv1_rgb = tf.get_variable("conv1_rgb", [3, 3, 3, 64],
                                        trainable=False)
            restorer_fc = tf.train.Saver(
                {'vgg_16/conv1/conv1_1/weights': conv1_rgb})
            restorer_fc.restore(sess, cfg.pre_trained_model)
            sess.run(tf.assign(variables[0], tf.reverse(conv1_rgb, [2])))
            print('Vgg Conv 1 RGB->BGR')

    _mask = pred_annotation[0]
    _img = image_batch[0]
    _gt = label_batch[0]
    if not os.path.exists(cfg.save_dir + 'temp_img'):
        os.mkdir(cfg.save_dir + 'temp_img')

    print('Start train ' + cfg.data_dir)
    print('---------------Hyper Paras---------------')
    print('-- batch_size: ', cfg.batch_size)
    print('-- Gradient Accumulation: ', cfg.Gradient_Accumulation)
    print('-- image height: ', cfg.IMAGE_HEIGHT)
    print('-- image width: ', cfg.IMAGE_WIDTH)
    print('-- learning rate: ', cfg.learning_rate)
    print('-- GPU: ', cfg.use_gpu)
    print('-- optimizer: ', cfg.optimizer)
    print('-- class num: ', cfg.NUM_OF_CLASSESS)
    print('-- total iter: ', cfg.total_iter)
    print('-- Time acc: ', cfg.is_time_acc)
    print('-- Acc interval: ', cfg.acc_interval)
    print('-- Start Acc iter: ', cfg.start_show_iter)
    print('-- Is save step: ', cfg.is_save_step)
    print('-- Start save step: ', cfg.start_save_step)
    print('-- save ecpoch: ', cfg.save_step_inter)
    print('-- model save num: ', cfg.model_save_num)
    print('-- summary interval: ', cfg.summary_interval)
    print('-- weight decay: ', cfg.weight_decay)
    print('-- Freeze BN: ', cfg.freeze_bn)
    print('-- Decay rate: ', cfg.decay_rate)
    print('-- minScale: ', cfg.minScale)
    print('-- maxScale: ', cfg.maxScale)
    print('-- random scale: ', cfg.random_scale)
    print('-- random mirror: ', cfg.random_mirror)
    print('-- random crop: ', cfg.random_crop_pad)
    print('-- Validation on :' + str(cfg.val_data_list))
    print('-- Pre-trained: ' + cfg.pre_trained_model)
    print('----------------End---------------------')
    fcfg = open(cfg.save_dir + 'cfg.txt', 'w')
    fcfg.write('-- batch_size: ' + str(cfg.batch_size) + '\n')
    fcfg.write('-- Gradient Accumulation: ' + str(cfg.Gradient_Accumulation) +
               '\n')
    fcfg.write('-- image height: ' + str(cfg.IMAGE_HEIGHT) + '\n')
    fcfg.write('-- image width: ' + str(cfg.IMAGE_WIDTH) + '\n')
    fcfg.write('-- learning rate: ' + str(cfg.learning_rate) + '\n')
    fcfg.write('-- GPU: ' + str(cfg.use_gpu) + '\n')
    fcfg.write('-- optimizer: ' + str(cfg.optimizer) + '\n')
    fcfg.write('-- class num: ' + str(cfg.NUM_OF_CLASSESS) + '\n')
    fcfg.write('-- total iter: ' + str(cfg.total_iter) + '\n')
    fcfg.write('-- Time acc: ' + str(cfg.is_time_acc) + '\n')
    fcfg.write('-- Acc interval: ' + str(cfg.acc_interval) + '\n')
    fcfg.write('-- Start Acc iter: ' + str(cfg.start_show_iter) + '\n')
    fcfg.write('-- Is save step: ' + str(cfg.is_save_step) + '\n')
    fcfg.write('-- Start save step: ' + str(cfg.start_save_step) + '\n')
    fcfg.write('-- save ecpoch: ' + str(cfg.save_step_inter) + '\n')
    fcfg.write('-- model save num: ' + str(cfg.model_save_num) + '\n')
    fcfg.write('-- summary interval: ' + str(cfg.summary_interval) + '\n')
    fcfg.write('-- weight decay: ' + str(cfg.weight_decay) + '\n')
    fcfg.write('-- Freeze BN: ' + str(cfg.freeze_bn) + '\n')
    fcfg.write('-- Decay rate: ' + str(cfg.decay_rate) + '\n')
    fcfg.write('-- minScale: ' + str(cfg.minScale) + '\n')
    fcfg.write('-- maxScale: ' + str(cfg.maxScale) + '\n')
    fcfg.write('-- random scale: ' + str(cfg.random_scale) + '\n')
    fcfg.write('-- random mirror: ' + str(cfg.random_mirror) + '\n')
    fcfg.write('-- random crop: ' + str(cfg.random_crop_pad) + '\n')
    fcfg.write('-- Validation on :' + str(cfg.val_data_list) + '\n')
    fcfg.write('-- Pre-trained: ' + cfg.pre_trained_model + '\n')
    fcfg.close()

    last_summary_time = time.time()
    last_acc_time = time.time()
    record = train_number / cfg.batch_size  # iter number of each epoch
    if cfg.is_save_step:  # save with step
        running_count = count
        epo = int(count / record)
    if cfg.is_save_epoch:  # save with epoch
        running_count = int(epo * record)
        epo = count

    best_acc = 0.5
    best_step = 0
    train_start_time = time.time()
    start_step = running_count
    lossTr_list = []
    stepes = []
    Acc_val_list = []

    # Change the graph for read only
    sess.graph.finalize()
    while running_count < cfg.total_iter:
        time_start = time.time()
        itr = 0
        while itr < int(record):
            itr += 1
            running_count += 1

            # log last 10 model
            if running_count > (cfg.total_iter -
                                10) and cfg.is_save_last10_model:
                saver.save(sess, cfg.save_dir + 'model.ckpt',
                           int(running_count))
                print('Model has been saved:' + str(running_count))

            # more than total iter, stopping training
            if running_count > cfg.total_iter:
                break

            feed_dict = {step_ph: (running_count)}

            # save summary
            now = time.time()
            if now - last_summary_time > cfg.summary_interval:
                summary_str = sess.run(summary_op,
                                       feed_dict={step_ph: running_count})
                train_writer.add_summary(summary_str, running_count)
                last_summary_time = now
                score_map, img, gt = sess.run([_mask, _img, _gt],
                                              feed_dict=feed_dict)
                img = np.array(img + cfg.IMG_MEAN, np.uint8)
                score_map = score_map * 20
                gt = gt * 20

                save_temp = np.zeros(
                    (cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH * 3, 3), np.uint8)
                save_temp[0:cfg.IMAGE_HEIGHT, 0:cfg.IMAGE_WIDTH, :] = img
                save_temp[0:cfg.IMAGE_HEIGHT,
                          cfg.IMAGE_WIDTH:cfg.IMAGE_WIDTH * 2, :] = gt
                save_temp[0:cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH *
                          2:cfg.IMAGE_WIDTH * 3, :] = score_map
                cv2.imwrite(
                    cfg.save_dir + 'temp_img/' + str(now) + '_mask.jpg',
                    save_temp)

            time_s = time.time()

            # Run the zero_ops to initialize it
            sess.run(zero_ops)

            # Accumulate the gradients 'n_minibatches' times in accum_vars using accum_ops
            for i in range(cfg.Gradient_Accumulation):
                sess.run(accum_ops, feed_dict=feed_dict)
            train_loss, ls_ce, ls_l2, lr = sess.run(
                [loss, ce_loss, l2_losses, learning_rate], feed_dict=feed_dict)
            if running_count > 50:
                lossTr_list.append(ls_ce)
                if start_step == 0:
                    start_step = 50

            # Run the train_step ops to update the weights based on your accumulated gradients
            sess.run(train_step, feed_dict=feed_dict)

            time_e = time.time()

            print(
                "Epo: %d, Step: %d, Train_loss:%g, ce: %g, l2:%g,  lr:%g, time:%g"
                % (epo, running_count, train_loss, ls_ce, ls_l2, lr,
                   time_e - time_s))

            # check accuracy per step of training data
            if cfg.is_time_acc and running_count >= cfg.start_show_iter and \
                            running_count <= cfg.total_iter and (now-last_acc_time) > cfg.acc_interval:
                # Test accuracy in val
                hist = np.zeros((cfg.NUM_OF_CLASSESS, cfg.NUM_OF_CLASSESS))
                for i, img_name in enumerate(val_img_list):
                    true_val = np.expand_dims(misc.imread(val_label_list[i]),
                                              axis=2)
                    pred_val = evaluate_accuracy(val_logits_softmax, sess,
                                                 val_image_batch, img_name)
                    hist += fast_hist(true_val.flatten(), pred_val.flatten(),
                                      cfg.NUM_OF_CLASSESS)

                hist[0, :] = 0
                # overall accuracy
                over_acc = np.diag(hist).sum() / hist.sum()
                print('>>> Step', running_count, 'overall accuracy', over_acc)
                if over_acc > best_acc:
                    saver.save(sess, cfg.save_dir + 'best.ckpt')
                    best_acc = over_acc
                    best_step = running_count
                    fshow = open(
                        cfg.save_dir + 'acc: ' + str(best_acc) + ', step: ' +
                        str(best_step), 'w')

                print('>>> best acc: ', best_acc, 'best step: ', best_step)

                # per-class accuracy
                acc = np.diag(hist) / hist.sum(0)
                print('>>> Step', running_count, 'mean accuracy', acc)
                last_acc_time = now

                stepes.append(running_count)
                Acc_val_list.append(over_acc)
                # draw plots for visualization ----------------------------

                # Plot the figures per 60s
                import matplotlib.pyplot as plt
                fig1, ax1 = plt.subplots(figsize=(11, 8))

                ax1.plot(range(start_step, running_count), lossTr_list)
                ax1.set_title("Average training loss vs steps")
                ax1.set_xlabel("Steps")
                ax1.set_ylabel("Current loss")

                plt.savefig(cfg.save_dir + "loss_vs_steps.png")

                plt.clf()

                fig2, ax2 = plt.subplots(figsize=(11, 8))

                ax2.plot(stepes, Acc_val_list, label="Val total acc.")
                ax2.set_title(" Acc vs steps")
                ax2.set_xlabel("Steps")
                ax2.set_ylabel("Current Acc")
                plt.legend(loc='lower right')

                plt.savefig(cfg.save_dir + "acc_vs_steps.png")

                plt.close('all')
                # ----------------------------------------------------------

            # Save step model
            if cfg.is_save_step and (running_count % cfg.save_step_inter) == 0 \
                    and running_count >= cfg.start_save_step:
                saver.save(sess, cfg.save_dir + 'model.ckpt',
                           int(running_count))
                print('Model has been saved:' + str(running_count))
                files = os.path.join(cfg.save_dir +
                                     'model.ckpt-*.data-00000-of-00001')
                sfile = glob.glob(files)
                if len(sfile) > cfg.model_save_num:
                    steps = []
                    for s in sfile:
                        part = s.split('.')
                        re = int(part[1].split('-')[1])
                        steps.append(re)
                    re = min(steps)
                    model = cfg.save_dir + 'model.ckpt-' + str(re)
                    os.remove(model + '.data-00000-of-00001')
                    os.remove(model + '.index')
                    os.remove(model + '.meta')
                    print('Remove Model:' + model)

        # Check accuracy per Epoch of training data
        if cfg.is_epoch_acc and running_count >= cfg.start_show_iter \
                and running_count <= cfg.total_iter:
            # Test accuracy in val
            hist = np.zeros((cfg.NUM_OF_CLASSESS, cfg.NUM_OF_CLASSESS))
            for i, img_name in enumerate(val_img_list):
                true_val = np.expand_dims(misc.imread(val_label_list[i]),
                                          axis=2)
                pred_val = evaluate_accuracy(val_logits_softmax, sess,
                                             val_image_batch, img_name)
                hist += fast_hist(pred_val.flatten(), true_val.flatten(),
                                  cfg.NUM_OF_CLASSESS)

            hist[:, 0] = 0
            # overall accuracy
            over_acc = np.diag(hist).sum() / hist.sum()
            print('>>> Step', running_count, 'overall accuracy', over_acc)
            if over_acc > best_acc:
                saver.save(sess, cfg.save_dir + 'best.ckpt')
                best_acc = over_acc
                best_step = running_count
                fshow = open(
                    cfg.save_dir + 'acc: ' + str(best_acc) + ', step: ' +
                    str(best_step), 'w')

            print('>>> best acc: ', best_acc, 'best step: ', best_step)

            # per-class accuracy
            acc = np.diag(hist) / hist.sum(0)
            print('>>> Step', running_count, 'mean accuracy', acc)

        epo += 1
        # Save epoch model
        if cfg.is_save_epoch and (epo % cfg.save_epoch_inter
                                  ) == 0 and epo >= cfg.start_save_epoch:
            saver.save(sess, cfg.save_dir + 'model.ckpt', epo)
            print('Model has been saved:' + str(epo))
            files = os.path.join(cfg.save_dir +
                                 'model.ckpt-*.data-00000-of-00001')
            sfile = glob.glob(files)
            if len(sfile) > cfg.model_save_num:
                steps = []
                for s in sfile:
                    part = s.split('.')
                    re = int(part[1].split('-')[1])
                    steps.append(re)
                re = min(steps)
                model = cfg.save_dir + 'model.ckpt-' + str(re)
                os.remove(model + '.data-00000-of-00001')
                os.remove(model + '.index')
                os.remove(model + '.meta')
                print('Remove Model:' + model)

        time_end = time.time()
        print('Epo ' + str(epo) + ' use time: ' + str(time_end - time_start))

    # saver.save(sess, cfg.save_dir + 'last.ckpt')    # save last model

    train_end_time = time.time()
    print('Train total use: ' +
          str((train_end_time - train_start_time) / 3600) + ' h')
    coord.request_stop()
    coord.join(threads)
コード例 #17
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    coord = tf.train.Coordinator()

    with tf.name_scope("create_inputs"):
        reader = ImageReader(DATA_DIR, DATA_LIST_PATH, input_size,
                             args.random_scale, args.random_mirror,
                             args.ignore_label, IMG_MEAN, coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    net = ICNet_BN({'data': image_batch},
                   is_training=True,
                   num_classes=args.num_classes,
                   filter_scale=args.filter_scale)

    sub4_out = net.layers['sub4_out']
    sub24_out = net.layers['sub24_out']
    sub124_out = net.layers['conv6_cls']

    restore_var = tf.global_variables()
    all_trainable = [
        v for v in tf.trainable_variables()
        if ('beta' not in v.name and 'gamma' not in v.name)
        or args.train_beta_gamma
    ]

    with tf.name_scope('loss'):
        loss_sub4 = create_loss(sub4_out, label_batch, args.num_classes,
                                args.ignore_label)
        loss_sub24 = create_loss(sub24_out, label_batch, args.num_classes,
                                 args.ignore_label)
        loss_sub124 = create_loss(sub124_out, label_batch, args.num_classes,
                                  args.ignore_label)
        l2_losses = [
            args.weight_decay * tf.nn.l2_loss(v)
            for v in tf.trainable_variables() if 'weights' in v.name
        ]

        reduced_loss = LAMBDA1 * loss_sub4 + LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 + tf.add_n(
            l2_losses)

        tf.summary.scalar('sub4', loss_sub4)
        tf.summary.scalar('sub24', loss_sub24)
        tf.summary.scalar('sub124', loss_sub124)
        tf.summary.scalar('total_loss', reduced_loss)

    # Using Poly learning rate policy
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(
        base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))

    # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
    if args.update_mean_var == False:
        update_ops = None
    else:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
        grads = tf.gradients(reduced_loss, all_trainable)
        train_op = opt_conv.apply_gradients(zip(grads, all_trainable))

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=20)
    summ = tf.summary.merge_all()
    tenboard_dir = tfboard_dir + str(LEARNING_RATE) + '_' + str(NUM_STEPS)

    writer = tf.summary.FileWriter(tenboard_dir)
    writer.add_graph(sess.graph)
    ckpt = tf.train.get_checkpoint_state(args.snapshot_dir)
    # net.load(args.restore_from, sess)

    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=restore_var)
        load_step = int(
            os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess,
             './snapshots/3wDataSet/model.ckpt-' + str(START_STEP))
    else:
        print('Restore from pre-trained model...')
        net.load(args.restore_from, sess)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(START_STEP, args.num_steps):
        start_time = time.time()

        feed_dict = {step_ph: step}
        if step % args.save_pred_every == 0:
            s, loss_value, loss1, loss2, loss3, _ = sess.run(
                [
                    summ, reduced_loss, loss_sub4, loss_sub24, loss_sub124,
                    train_op
                ],
                feed_dict=feed_dict)
            save(saver, sess, args.snapshot_dir, step)
            writer.add_summary(s, step)
        else:
            s, loss_value, loss1, loss2, loss3, _ = sess.run(
                [
                    summ, reduced_loss, loss_sub4, loss_sub24, loss_sub124,
                    train_op
                ],
                feed_dict=feed_dict)
            writer.add_summary(s, step)
        duration = time.time() - start_time
        print(
            'step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)'
            .format(step, loss_value, loss1, loss2, loss3, duration))

    coord.request_stop()
    coord.join(threads)
コード例 #18
0
def main():
    """Create the model and start the training."""
    args = get_arguments()
    
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)
    
    coord = tf.train.Coordinator()
    
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            ' ',
            args.data_list,
            input_size,
            args.random_scale,
            args.random_mirror,
            args.ignore_label,
            IMG_MEAN,
            coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)
    
    net = ICNet_BN({'data': image_batch}, is_training=True, num_classes=args.num_classes)
    
    sub4_out = net.layers['sub4_out']
    sub24_out = net.layers['sub24_out']
    sub124_out = net.layers['conv6_cls']

    restore_var = tf.global_variables()
    all_trainable = [v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma]
   
    loss_sub4 = create_loss(sub4_out, label_batch, args.num_classes, args.ignore_label)
    loss_sub24 = create_loss(sub24_out, label_batch, args.num_classes, args.ignore_label)
    loss_sub124 = create_loss(sub124_out, label_batch, args.num_classes, args.ignore_label)
    l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
    
    reduced_loss = LAMBDA1 * loss_sub4 +  LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 + tf.add_n(l2_losses)

    # Using Poly learning rate policy 
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
    
    # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
    if args.update_mean_var == False:
        update_ops = None
    else:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
        grads = tf.gradients(reduced_loss, all_trainable)
        train_op = opt_conv.apply_gradients(zip(grads, all_trainable))
        
    # Set up tf session and initialize variables. 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    
    sess.run(init)
    
    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)

    ckpt = tf.train.get_checkpoint_state(args.snapshot_dir)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=restore_var)
        load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('Restore from pre-trained model...')
        net.load(args.restore_from, sess)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()
        
        feed_dict = {step_ph: step}
        if step % args.save_pred_every == 0:
            loss_value, loss1, loss2, loss3, _ = sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict)
            save(saver, sess, args.snapshot_dir, step)
        else:
            loss_value, loss1, loss2, loss3, _ = sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, train_op], feed_dict=feed_dict)
        duration = time.time() - start_time
        print('step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f} ({:.3f} sec/step)'.format(step, loss_value, loss1, loss2, loss3, duration))
        
    coord.request_stop()
    coord.join(threads)
コード例 #19
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    tf.set_random_seed(args.random_seed)

    coord = tf.train.Coordinator()

    with tf.name_scope("create_inputs"):
        reader = ImageReader(args.data_dir, args.data_list, input_size,
                             args.random_scale, args.random_mirror,
                             args.ignore_label, IMG_MEAN, coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    net = PSPNet101({'data': image_batch},
                    is_training=True,
                    num_classes=args.num_classes)

    raw_output = net.layers['conv6']

    # According from the prototxt in Caffe implement, learning rate must multiply by 10.0 in pyramid module
    fc_list = [
        'conv5_3_pool1_conv', 'conv5_3_pool2_conv', 'conv5_3_pool3_conv',
        'conv5_3_pool6_conv', 'conv6', 'conv5_4'
    ]
    restore_var = [v for v in tf.global_variables()]
    all_trainable = [
        v for v in tf.trainable_variables()
        if ('beta' not in v.name and 'gamma' not in v.name)
        or args.train_beta_gamma
    ]
    fc_trainable = [
        v for v in all_trainable if v.name.split('/')[0] in fc_list
    ]
    conv_trainable = [
        v for v in all_trainable if v.name.split('/')[0] not in fc_list
    ]  # lr * 1.0
    fc_w_trainable = [v for v in fc_trainable
                      if 'weights' in v.name]  # lr * 10.0
    fc_b_trainable = [v for v in fc_trainable
                      if 'biases' in v.name]  # lr * 20.0
    assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable))
    assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch,
                               tf.stack(raw_output.get_shape()[1:3]),
                               num_classes=args.num_classes,
                               one_hot=False)  # [batch_size, h, w]
    raw_gt = tf.reshape(label_proc, [
        -1,
    ])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)),
                         1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    prediction = tf.gather(raw_prediction, indices)

    # Pixel-wise softmax loss.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction,
                                                          labels=gt)
    l2_losses = [
        args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'weights' in v.name
    ]
    reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

    # Using Poly learning rate policy
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(
        base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))

    # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
    if args.update_mean_var == False:
        update_ops = None
    else:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
        opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0,
                                              args.momentum)
        opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0,
                                              args.momentum)

        grads = tf.gradients(reduced_loss,
                             conv_trainable + fc_w_trainable + fc_b_trainable)
        grads_conv = grads[:len(conv_trainable)]
        grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) +
                                                len(fc_w_trainable))]
        grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]

        train_op_conv = opt_conv.apply_gradients(
            zip(grads_conv, conv_trainable))
        train_op_fc_w = opt_fc_w.apply_gradients(
            zip(grads_fc_w, fc_w_trainable))
        train_op_fc_b = opt_fc_b.apply_gradients(
            zip(grads_fc_b, fc_b_trainable))

        train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)

    ckpt = tf.train.get_checkpoint_state(SNAPSHOT_DIR)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=restore_var)
        load_step = int(
            os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('No checkpoint file found.')
        load_step = 0

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()

        feed_dict = {step_ph: step}
        if step % args.save_pred_every == 0:
            loss_value, _ = sess.run([reduced_loss, train_op],
                                     feed_dict=feed_dict)
            save(saver, sess, args.snapshot_dir, step)
        else:
            loss_value, _ = sess.run([reduced_loss, train_op],
                                     feed_dict=feed_dict)
        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(
            step, loss_value, duration))

    coord.request_stop()
    coord.join(threads)
コード例 #20
0
class Evaluator():
    def __init__(self):
        self.stop = False

        if not tf.gfile.Exists(FLAGS.test_load_queue_path):
            self.settings = {
                "best_acc": None,
                "best_checkpoint": None,
                "last_checkpoint": None,
                "acc_increasing": None,
                "last_accs": deque()
            }
        else:
            self.settings = np.load(FLAGS.test_load_queue_path)[()]

        self.setup()

    def setup(self):
        self.recreate_directory_structure()
        self.coord = tf.train.Coordinator()
        # Load reader.
        with tf.name_scope("create_inputs"):
            self.reader = ImageReader("./val.npy", True, self.coord)
            self.image_batch, self.label_list_batch = self.reader.dequeue(
                FLAGS.batch_size)

        global_step = tf.Variable(0,
                                  dtype=tf.int32,
                                  name='global_step',
                                  trainable=False)
        self.net = CatznDogs({'data': self.image_batch}, global_step)

        # Set up tf session and initialize variables.
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)

        # Start queue threads.
        self.threads = tf.train.start_queue_runners(coord=self.coord,
                                                    sess=self.sess)

        self.ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)

    def run(self):

        while not self.stop:

            all_models_paths = self.ckpt.all_model_checkpoint_paths
            index_current_model = list(all_models_paths).index(
                self.ckpt.model_checkpoint_path)
            if self.settings["last_checkpoint"]:
                index_last_evaluated_model = list(all_models_paths).index(
                    self.settings["last_checkpoint"])
            else:
                index_last_evaluated_model = -1

            if index_current_model != index_last_evaluated_model:
                index_model_under_evaluation = index_last_evaluated_model + 1
                self.settings["last_checkpoint"] = all_models_paths[
                    index_model_under_evaluation]

                print("Evaluator started evaluating")
                acc_pred = self.net.test(
                    self.image_batch,
                    self.label_list_batch,
                    self.coord,
                    self.sess,
                    self.reader.nb_samples,
                    checkpoint_iteration=index_model_under_evaluation)
                self.settings["last_accs"].append(acc_pred)

                if not self.settings[
                        "best_acc"] or acc_pred < self.settings["best_acc"]:
                    self.settings["best_acc"] = acc_pred
                    self.settings["best_checkpoint"] = self.settings[
                        "last_checkpoint"]
                    self.settings["acc_increasing"] = 0
                else:
                    self.settings["acc_increasing"] += 1

                if self.settings["acc_increasing"] >= 5:
                    self.stop = 1

                np.save(FLAGS.test_load_queue_path, self.settings)
                print("Best model is {} with best Acc {}".format(
                    self.settings["best_checkpoint"],
                    self.settings["best_acc"]))
            else:
                time.sleep(10)

        self.coord.request_stop()
        self.coord.join(self.threads)
        print("Best model is {} with best Acc {}".format(
            self.settings["best_checkpoint"], self.settings["best_acc"]))

    def recreate_directory_structure(self):
        if not tf.gfile.Exists(FLAGS.test_summaries_dir):
            tf.gfile.MakeDirs(FLAGS.test_summaries_dir)
        else:
            tf.gfile.DeleteRecursively(FLAGS.test_summaries_dir)
            tf.gfile.MakeDirs(FLAGS.test_summaries_dir)