コード例 #1
0
def main(args):
    train_loader, val_loader = load_data(args)
    # args.weight_labels = torch.tensor(calculate_weigths_labels('cityscape', train_loader, args.n_classes)).float()
    if args.cuda:
        # args.weight_labels = args.weight_labels.cuda()
        pass

    model = PSPNet()
    if args.cuda:
        model = model.cuda()
    if args.distributed:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank],
                                                    output_device=args.local_rank,
                                                    find_unused_parameters=True)

    if args.evaluation:
        checkpoint = torch.load('./checkpoint/checkpoint_model_50000.pth', map_location='cpu')
        model.load_state_dict(checkpoint['model_state_dict'])
        eval(val_loader, model, args)
    else:
        # criterion = SegmentationLosses(weight=args.weight_labels, cuda=True)
        criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_mask, weight=None).cuda()

        backbone_params = nn.ParameterList()
        decoder_params = nn.ParameterList()

        for name, param in model.named_parameters():
            if 'backbone' in name:
                backbone_params.append(param)
            else:
                decoder_params.append(param)

        params_list = [{'params': backbone_params},
                       {'params': decoder_params, 'lr': args.lr * 10}]

        optimizer = optim.SGD(params_list,
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay,
                              nesterov=True)
        scheduler = PolyLr(optimizer, gamma=args.gamma,
                           max_iteration=args.max_iteration,
                           warmup_iteration=args.warmup_iteration)

        global_step = 0
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        while global_step < args.max_iteration:
            global_step = train(global_step, train_loader, model, optimizer, criterion, scheduler, args)
        eval(val_loader, model, args)
コード例 #2
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    tf.set_random_seed(args.random_seed)

    coord = tf.train.Coordinator()

    with tf.name_scope("create_inputs"):
        reader = ImageReader(args.data_list, input_size, args.random_scale,
                             args.random_mirror, args.ignore_label, IMG_MEAN,
                             coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)

    net = PSPNet({'data': image_batch},
                 is_training=True,
                 num_classes=args.num_classes)

    raw_output = net.layers['conv6']

    # According from the prototxt in Caffe implement, learning rate must multiply by 10.0 in pyramid module
    fc_list = [
        'conv5_3_pool1_conv', 'conv5_3_pool2_conv', 'conv5_3_pool3_conv',
        'conv5_3_pool6_conv', 'conv6', 'conv5_4'
    ]
    restore_var = [
        v for v in tf.global_variables()
        if not (len([f for f in fc_list
                     if f in v.name])) or not args.not_restore_last
    ]
    all_trainable = [
        v for v in tf.trainable_variables()
        if 'gamma' not in v.name and 'beta' not in v.name
    ]
    fc_trainable = [
        v for v in all_trainable if v.name.split('/')[0] in fc_list
    ]
    conv_trainable = [
        v for v in all_trainable if v.name.split('/')[0] not in fc_list
    ]  # lr * 1.0
    fc_w_trainable = [v for v in fc_trainable
                      if 'weights' in v.name]  # lr * 10.0
    fc_b_trainable = [v for v in fc_trainable
                      if 'biases' in v.name]  # lr * 20.0
    assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable))
    assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    label_proc = prepare_label(label_batch,
                               tf.stack(raw_output.get_shape()[1:3]),
                               num_classes=args.num_classes,
                               one_hot=False)  # [batch_size, h, w]
    raw_gt = tf.reshape(label_proc, [
        -1,
    ])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)),
                         1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    prediction = tf.gather(raw_prediction, indices)

    # Pixel-wise softmax loss.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction,
                                                          labels=gt)

    # Make mistakes for class N more important for network
    if USE_CLASS_WEIGHTS:
        if len(CLASS_WEIGHTS) != NUM_CLASSES:
            print('Incorrect class weights, it will be not used')
        else:
            mask = tf.zeros_like(loss)

            for i, w in enumerate(CLASS_WEIGHTS):
                mask = mask + tf.cast(tf.equal(gt, i),
                                      tf.float32) * tf.constant(w)
            loss = loss * mask

    l2_losses = [
        args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'weights' in v.name
    ]
    reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

    # Processed predictions: for visualisation.
    raw_output_up = tf.image.resize_bilinear(raw_output,
                                             tf.shape(image_batch)[1:3, ])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Image summary.
    images_summary = tf.py_func(inv_preprocess,
                                [image_batch, args.save_num_images, IMG_MEAN],
                                tf.uint8)
    labels_summary = tf.py_func(
        decode_labels, [label_batch, args.save_num_images, args.num_classes],
        tf.uint8)
    preds_summary = tf.py_func(decode_labels,
                               [pred, args.save_num_images, args.num_classes],
                               tf.uint8)

    total_summary = tf.summary.image(
        'images',
        tf.concat(axis=2,
                  values=[images_summary, labels_summary, preds_summary]),
        max_outputs=args.save_num_images)  # Concatenate row-wise.
    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())

    # Using Poly learning rate policy
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(
        base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))

    # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
    if args.update_mean_var == False:
        update_ops = None
    else:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum)
        opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0,
                                              args.momentum)
        opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0,
                                              args.momentum)

        grads = tf.gradients(reduced_loss,
                             conv_trainable + fc_w_trainable + fc_b_trainable)
        grads_conv = grads[:len(conv_trainable)]
        grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) +
                                                len(fc_w_trainable))]
        grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]

        train_op_conv = opt_conv.apply_gradients(
            zip(grads_conv, conv_trainable))
        train_op_fc_w = opt_fc_w.apply_gradients(
            zip(grads_fc_w, fc_w_trainable))
        train_op_fc_b = opt_fc_b.apply_gradients(
            zip(grads_fc_b, fc_b_trainable))

        train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True
    # config.allow_soft_placement = True
    # config.intra_op_parallelism_threads = 1
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)

    ckpt = tf.train.get_checkpoint_state(SNAPSHOT_DIR)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=restore_var)
        load_step = int(
            os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('No checkpoint file found.')
        load_step = 0

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()

        feed_dict = {step_ph: step}
        if step % args.save_pred_every == 0:
            loss_value, _, summary = sess.run(
                [reduced_loss, train_op, total_summary], feed_dict=feed_dict)
            summary_writer.add_summary(summary, step)
            save(saver, sess, args.snapshot_dir, step)
        else:
            loss_value, _ = sess.run([reduced_loss, train_op],
                                     feed_dict=feed_dict)
        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(
            step, loss_value, duration))

    coord.request_stop()
    coord.join(threads)
コード例 #3
0
							nrows=num_train_samples,
							header=0,
							usecols=[0,2,5])
	rows_train = data_train.values.tolist()

	data_val = pd.read_csv(val_path, 
							skiprows=0,
							nrows=num_val_samples,
							header=0,
							usecols=[0,2,5])
	rows_val = data_val.values.tolist()


	# MODEL
	# -----------------------------
	model = PSPNet(n_classes=10, n_blocks=[3, 4, 6, 3], pyramids=[6, 3, 2, 1])
	if torch.cuda.device_count() > 1:
	  print("Let's use", torch.cuda.device_count(), "GPUs!")
	  # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
	  model = nn.DataParallel(model)
	model.to(device)

	best_acc = 0

	# LOSS
	# -----------------------------
	if loss_type== 'bce':
		criterion = nn.BCELoss()
	else:
		criterion = nn.MSELoss()
	optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
コード例 #4
0
  model.compile(optimizer=sgd,loss=loss_fn(), weighted_metrics=["accuracy", MeanIoU(num_classes)])

  # verbose=2 gives 1 line per epcoh, which is good when logging to a file
  model.fit(x=train_dataset, verbose=1, epochs=epochs, validation_data=val_dataset, callbacks=[TensorBoard(), ModelCheckpoint("backup" + str(epochs)), DisplayCallback(model, train_dataset, saveimg=True)])

# Create dataset loader

# NOTE: A simple overfit-test can be made by loading only 1 image, but the generated 
# images will look bad. This is due to poor estimation of batch norm parameters, and 
# can be hotfixed by switching to training mode during image generation in display.py
loader = DataLoader('cityscapes', '50%', batch_size=8)

# Prepare all the data before usage. This includes:
# - Casting the images to float32.
# - Normalizing all the data according to the normalization strategy for ResNet50. 
# - Applying a random flip to the training data every time it is encountered.
# - Batching the data.
# - Prefetching 1 batch to improve the data throughput.

loader.prepareDatasets()

# Load the datasets
train_ds = loader.getTrainData()
val_ds = loader.getValData()

# Define model
num_classes = 34
model = PSPNet(feature_dim=(256,512,2048), num_classes=num_classes, use_ppm=True, bins=[1, 2, 3, 6])

# Train the model
train_model(model, train_ds, val_ds, num_classes, SparseCategoricalCrossentropy, epochs=60)
コード例 #5
0
ファイル: train.py プロジェクト: mducducd/Segmentation_PSPNet
from model import PSPNet
from torch import nn as nn
import torch.nn.functional as F
from torch import optim 
import math
import torch 
import torch.utils.data as data
import time
from data import make_datapath_list, MyDataset, DataTransform


#model 
model = PSPNet(n_classes=21)

#loss
class PSPLoss(nn.Module):
    def __init__(self, aux_weight=0.4):
        super(PSPLoss, self).__init__()
        self.aux_weight = aux_weight

    def forward(self, outputs, targets):
        loss = F.cross_entropy(outputs[0], targets, reduction='mean')
        loss_aux = F.cross_entropy(outputs[1], targets, reduction='mean')

        return loss + self.aux_weight*loss_aux

criterion = PSPLoss(aux_weight=0.4)

#optimizer
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.SGD([
コード例 #6
0
ファイル: train.py プロジェクト: zhangzheng1993/PSPNet
def train(cfg):
    torch.backends.cudnn.benchmark = True
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = PSPNet(n_classes=cfg.n_classes)
    model = model.to(device)

    train_dataset = MyDataset(img_dir=cfg.img_dir,
                              anno_dir=cfg.anno_dir,
                              phase='train')
    n_train_img = len(train_dataset)
    val_dataset = MyDataset(img_dir=cfg.img_dir,
                            anno_dir=cfg.anno_dir,
                            phase='val')
    n_val_img = len(val_dataset)
    train_loader = DataLoader(train_dataset,
                              batch_size=cfg.batch,
                              shuffle=True,
                              pin_memory=True,
                              num_workers=cfg.num_workers)
    val_loader = DataLoader(val_dataset,
                            batch_size=cfg.batch,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=cfg.num_workers)
    loss_func = PSPLoss(aux_weight=0.4)
    optimizer = optim.Adam(model.parameters(), lr=cfg.lr)
    # scheduler = schedule_func(optimizer)
    min_val_avg_loss, cnt = 1e4, 0

    for epoch in range(1, cfg.epochs + 1):

        train_avg_loss = 0.
        model.train()
        t1 = time()
        for t, (img, anno_img) in enumerate(train_loader, 1):
            if img.size(0) != cfg.batch:
                break  # to avoid batch normalization error
            img = img.to(device)
            anno_img = anno_img.to(device)
            outputs = model(img)
            loss = loss_func(outputs, anno_img.long())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # scheduler.step()

            train_avg_loss += loss.item()

            if t % (cfg.batch_verbose) == 0:
                t2 = time()
                print(
                    'train Epoch: %d, batch: %d, sample: %d/%d, Avg Loss: %1.3f, %dmin%dsec'
                    %
                    (epoch, t, t * cfg.batch, n_train_img, train_avg_loss / t,
                     (t2 - t1) // 60, (t2 - t1) % 60))
                if cfg.islogger:
                    if t == cfg.batch_verbose and epoch == 1:
                        train_csv_path = '%s%s_train.csv' % (
                            cfg.log_dir, cfg.dump_date)  #cfg.log_dir = ./Csv/
                        print(f'generate {train_csv_path}')
                        with open(train_csv_path, 'w') as f:
                            f.write('time,epoch,batch,sample,train avg loss\n')
                    with open(train_csv_path, 'a') as f:
                        f.write('%dmin%dsec,%d,%d,%d,%1.3f\n' %
                                ((t2 - t1) // 60, (t2 - t1) % 60, epoch, t,
                                 t * cfg.batch, train_avg_loss / t))
                t1 = time()

        val_avg_loss = 0.
        model.eval()
        t3 = time()
        t5 = time()
        with torch.no_grad():
            for t, (img, anno_img) in enumerate(val_loader, 1):
                if img.size(0) != cfg.batch:
                    break  # to avoid batch normalization error

                img = img.to(device)
                anno_img = anno_img.to(device)
                outputs = model(img)
                loss = loss_func(outputs, anno_img.long())
                val_avg_loss += loss.item()

                if t % (cfg.batch_verbose) == 0:
                    t4 = time()
                    print(
                        'val Epoch: %d, batch: %d, sample: %d/%d, Avg Loss: %1.3f, %dmin%dsec'
                        %
                        (epoch, t, t * cfg.batch, n_val_img, val_avg_loss / t,
                         (t4 - t3) // 60, (t4 - t3) % 60))
                    """
					if cfg.islogger:
						if t == cfg.batch_verbose and epoch == 1:
							log_path = '%s%s_val.csv'%(cfg.log_dir, cfg.dump_date)#cfg.log_dir = ./Csv/
							print(f'generate {log_path}')
							with open(log_path, 'w') as f:
								f.write('time,epoch,batch,sample,loss\n')
						with open(log_path, 'a') as f:
							f.write('%dmin%dsec,%d,%d,%d, %1.3f\n'%(
								(t4-t3)//60, (t4-t3)%60, epoch, t, t*cfg.batch, val_avg_loss/t))
					"""
                    t3 = time()

        if epoch == 1:
            param_path = '%s%s_param.csv' % (cfg.log_dir, cfg.dump_date
                                             )  # cfg.log_dir = ./Csv/
            print(f'generate {param_path}')
            with open(param_path, 'w') as f:
                f.write(''.join('%s,%s\n' % item
                                for item in vars(cfg).items()))
                f.write('n_train_img,%d\n' % (n_train_img))
                f.write('n_val_img,%d\n' % (n_val_img))

        t6 = time()
        if val_avg_loss / t < min_val_avg_loss:
            min_val_avg_loss = val_avg_loss / t
            print('update min_val_avg_loss: ', min_val_avg_loss)

            weight_path = '%s%s_epoch%s.pt' % (cfg.weight_dir, cfg.dump_date,
                                               epoch)
            torch.save(model.state_dict(), weight_path)
            print(f'generate {weight_path}')

            if cfg.islogger:
                if epoch == 1:
                    val_csv_path = '%s%s_val.csv' % (cfg.log_dir, cfg.dump_date
                                                     )  #cfg.log_dir = ./Csv/
                    print(f'generate {val_csv_path}')
                    with open(val_csv_path, 'w') as f:
                        f.write('time,epoch,val avg loss\n')
                with open(val_csv_path, 'a') as f:
                    f.write('%dmin%dsec,%d,%1.3f\n' %
                            ((t6 - t5) // 60,
                             (t6 - t5) % 60, epoch, val_avg_loss / t))

        else:  # val_avg_loss/t >= min_val_avg_loss:
            cnt += 1
            print(f'current cnt: {cnt}/7')
            if cnt >= 7:
                print('early stopping')
                break
コード例 #7
0
import cv2
from model import PSPNet
import torch
from data import make_datapath_list, DataTransform
from PIL import Image
import numpy as np

# load trained model
net = PSPNet(n_classes=21)
#state_dict = torch.load('./pspnet50_49.pth', map_location={'cuda:0': 'cpu'}) #for cpu
state_dict = torch.load('./pspnet50_49.pth',
                        map_location={'cuda:0': 'cuda: 0'})  #for gpu
net.load_state_dict(state_dict)
net.eval()

# 1500 images for training
# 1500 images for validation
rootpath = "./data/VOCdevkit/VOC2012/"
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(
    rootpath)
color_mean = (0.485, 0.456, 0.406)
color_std = (0.229, 0.224, 0.225)

anno_file_path = val_anno_list[1]
anno_class_img = Image.open(anno_file_path)
p_palatte = anno_class_img.getpalette()

transform = DataTransform(input_size=475,
                          color_mean=color_mean,
                          color_std=color_std)
コード例 #8
0
def main():
    args = get_arguments()
    print(args)

    coord = tf.train.Coordinator()

    tf.reset_default_graph()
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            DATA_DIRECTORY,
            DATA_LIST_PATH,
            input_size,
            None,
            None,
            ignore_label,
            IMG_MEAN,
            coord)
        image, label = reader.image, reader.label
    image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension.

    # Create network.
    net = PSPNet({'data': image_batch}, is_training=False, num_classes=num_classes)

    with tf.variable_scope('', reuse=True):
        flipped_img = tf.image.flip_left_right(image)
        flipped_img = tf.expand_dims(flipped_img, dim=0)
        net2 = PSPNet({'data': flipped_img}, is_training=False, num_classes=num_classes)


    # Which variables to load.
    restore_var = tf.global_variables()

    # Predictions.
    raw_output = net.layers['conv6']

    if args.flipped_eval:
        flipped_output = tf.image.flip_left_right(tf.squeeze(net2.layers['conv6']))
        flipped_output = tf.expand_dims(flipped_output, dim=0)
        raw_output = tf.add_n([raw_output, flipped_output])

    raw_output_up = tf.image.resize_bilinear(raw_output, size=input_size, align_corners=True)
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # mIoU
    pred_flatten = tf.reshape(pred, [-1,])
    raw_gt = tf.reshape(label_batch, [-1,])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, num_classes - 1)), 1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    pred = tf.gather(pred_flatten, indices)

    mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=num_classes)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)
    sess.run(tf.local_variables_initializer())

    restore_var = tf.global_variables()

    ckpt = tf.train.get_checkpoint_state(args.model)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=restore_var)
        load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('No checkpoint file found.')

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    for step in range(num_steps):
        preds, _ = sess.run([pred, update_op])
        
        if step > 0 and args.measure_time:
            calculate_time(sess, net)

        if step % 10 == 0:
            print('Finish {0}/{1}'.format(step, num_steps))
            print('step {0} mIoU: {1}'.format(step, sess.run(mIoU)))

    print('step {0} mIoU: {1}'.format(step, sess.run(mIoU)))

    coord.request_stop()
    coord.join(threads)
コード例 #9
0
def main():
    """Create the model and start the training."""
    args = get_arguments()
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    #tf.set_random_seed(args.random_seed)

    coord = tf.train.Coordinator()

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        # Using Poly learning rate policy
        base_lr = tf.constant(args.learning_rate)
        step_ph = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.train.exponential_decay(base_lr,
                                                   step_ph,
                                                   20000,
                                                   0.5,
                                                   staircase=True)

        tf.summary.scalar('lr', learning_rate)

        opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

        #opt = tf.train.RMSPropOptimizer(learning_rate, 0.9, momentum=0.9, epsilon=1e-10)

        #opt = tf.train.AdamOptimizer(learning_rate)

        losses = []
        train_op = []

        total_batch_size = args.batch_size * args.gpu_nums

        with tf.name_scope('PSPnet') as scope:
            with tf.name_scope("create_inputs"):
                reader = ImageReader(args.data_dir, args.data_list, input_size,
                                     args.random_blur, args.random_scale,
                                     args.random_mirror, args.random_rotate,
                                     args.ignore_label, IMG_MEAN, coord)
                image_batch, label_batch = reader.dequeue(total_batch_size)

                images_splits = tf.split(axis=0,
                                         num_or_size_splits=args.gpu_nums,
                                         value=image_batch)
                labels_splits = tf.split(axis=0,
                                         num_or_size_splits=args.gpu_nums,
                                         value=label_batch)

            net = PSPNet({'data': images_splits},
                         is_training=True,
                         num_classes=args.num_classes)

            raw_output_list = net.layers['conv6']

            add_list = [
                'conv5_3_pool1_conv', 'conv5_3_pool1_conv_bn',
                'conv5_3_pool2_conv', 'conv5_3_pool2_conv_bn',
                'conv5_3_pool3_conv', 'conv5_3_pool3_conv_bn',
                'conv5_3_pool6_conv', 'conv5_3_pool6_conv_bn', 'conv5_4',
                'conv5_4_bn', 'conv6'
            ]

            num_valide_pixel = 0
            for i in range(len(raw_output_list)):
                with tf.device('/gpu:%d' % i):
                    raw_output_up = tf.image.resize_bilinear(
                        raw_output_list[i],
                        size=input_size,
                        align_corners=True)

                    tf.summary.image('images_{}'.format(i),
                                     images_splits[i] + IMG_MEAN,
                                     max_outputs=4)
                    tf.summary.image('labels_{}'.format(i),
                                     labels_splits[i],
                                     max_outputs=4)

                    tf.summary.image('predict_{}'.format(i),
                                     tf.cast(
                                         tf.expand_dims(
                                             tf.argmax(raw_output_up, -1), 3),
                                         tf.float32),
                                     max_outputs=4)

                    all_trainable = [v for v in tf.trainable_variables()]

                    # Predictions: ignoring all predictions with labels greater or equal than n_classes
                    raw_prediction = tf.reshape(raw_output_up,
                                                [-1, args.num_classes])
                    label_proc = prepare_label(
                        labels_splits[i],
                        tf.stack(raw_output_up.get_shape()[1:3]),
                        num_classes=args.num_classes,
                        one_hot=False)  # [batch_size, h, w]
                    raw_gt = tf.reshape(label_proc, [
                        -1,
                    ])
                    #indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1)
                    indices = tf.where(
                        tf.logical_and(tf.less(raw_gt, args.num_classes),
                                       tf.greater_equal(raw_gt, 0)))
                    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
                    prediction = tf.gather(raw_prediction, indices)
                    mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(
                        tf.argmax(tf.nn.softmax(prediction), axis=-1),
                        gt,
                        num_classes=args.num_classes)
                    tf.summary.scalar('mean IoU_{}'.format(i), mIoU)
                    train_op.append(update_op)

                    # Pixel-wise softmax loss.
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=prediction, labels=gt)
                    num_valide_pixel += tf.shape(gt)[0]

                    losses.append(tf.reduce_sum(loss))

            l2_losses = [
                args.weight_decay * tf.nn.l2_loss(v)
                for v in tf.trainable_variables() if 'weights' in v.name
            ]
            reduced_loss = tf.truediv(
                tf.reduce_sum(losses), tf.cast(
                    num_valide_pixel, tf.float32)) + tf.add_n(l2_losses)
            tf.summary.scalar('average_loss', reduced_loss)

        grads = tf.gradients(reduced_loss,
                             all_trainable,
                             colocate_gradients_with_ops=True)

        variable_averages = tf.train.ExponentialMovingAverage(0.99, step_ph)

        variables_to_average = (tf.trainable_variables() +
                                tf.moving_average_variables())
        variables_averages_op = variable_averages.apply(variables_to_average)

        train_op = tf.group(opt.apply_gradients(zip(grads, all_trainable)),
                            *train_op)

        train_op = tf.group(train_op, variables_averages_op)

        summary_op = tf.summary.merge_all()

        # Set up tf session and initialize variables.
        config = tf.ConfigProto()
        config.allow_soft_placement = True
        sess = tf.Session(config=config)
        init = [
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ]
        sess.run(init)
        # Saver for storing checkpoints of the model.
        saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=2)

        #restore from resnet imagenet, bised and local_step is in moving_average
        restore_var = [
            v for v in tf.trainable_variables() if 'conv5_3_pool' not in v.name
            and 'conv5_4' not in v.name and 'conv6' not in v.name
        ] + [
            v for v in tf.global_variables()
            if ('moving_mean' in v.name or 'moving_variance' in v.name) and
            ('conv5_3_pool' not in v.name and 'conv5_4_bn' not in v.name
             and 'biased' not in v.name and 'local_step' not in v.name)
        ]

        ckpt = tf.train.get_checkpoint_state(args.restore_from)
        if ckpt and ckpt.model_checkpoint_path:
            loader = tf.train.Saver(var_list=restore_var)
            load(loader, sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found.')
        """
        #restore from snapshot
        restore_var = tf.global_variables()

        ckpt = tf.train.get_checkpoint_state(args.snapshot_dir)
        if ckpt and ckpt.model_checkpoint_path:
            loader = tf.train.Saver(var_list=restore_var, allow_empty=True)
            load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
            load(loader, sess, ckpt.model_checkpoint_path)
        else:
            print('No checkpoint file found.')
            load_step = 0
        """
        # Start queue threads.
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)

        summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                               graph=sess.graph)
        # Iterate over training steps.
        for step in range(args.num_steps):
            start_time = time.time()

            feed_dict = {step_ph: step}
            if step % args.save_pred_every == 0 and step != 0:
                loss_value, _ = sess.run([reduced_loss, train_op],
                                         feed_dict=feed_dict)
                save(saver, sess, args.snapshot_dir, step)
            elif step % 100 == 0:
                summary_str, loss_value, _, IOU = sess.run(
                    [summary_op, reduced_loss, train_op, mIoU],
                    feed_dict=feed_dict)
                duration = time.time() - start_time
                summary_writer.add_summary(summary_str, step)
                print(
                    'step {:d} \t loss = {:.3f}, mean_IoU = {:.3f}, ({:.3f} sec/step)'
                    .format(step, loss_value, IOU, duration))
            else:
                loss_value, _ = sess.run([reduced_loss, train_op],
                                         feed_dict=feed_dict)

        coord.request_stop()
        coord.join(threads)
コード例 #10
0
import torch
from torch.utils.data import DataLoader
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image

from model import PSPNet
from dataset import MyDataset
from config import test_parser
from visualize import unnormalize_show, PIL_show

if __name__ == '__main__':
    arg = test_parser()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model = PSPNet(n_classes=arg.n_classes)
    if arg.path is not None:
        print('load model ...')
        model.load_state_dict(torch.load(arg.path, map_location=device))

    model = model.to(device)
    test_dataset = MyDataset(img_dir=arg.img_dir,
                             anno_dir=arg.anno_dir,
                             phase='test')
    n_test_img = len(test_dataset)
    test_loader = DataLoader(test_dataset,
                             batch_size=arg.batch,
                             shuffle=True,
                             pin_memory=True,
                             num_workers=arg.num_workers)
    model.eval()
コード例 #11
0
def main():
    args = get_arguments()

    img, filename = load_img(args.img_path)
    img_shape = tf.shape(img)
    h, w = (tf.maximum(crop_size[0],
                       img_shape[0]), tf.maximum(crop_size[1], img_shape[1]))

    img = preprocess(img, h, w)

    # Create network.
    net = PSPNet({'data': img}, is_training=False, num_classes=num_classes)
    with tf.variable_scope('', reuse=True):
        flipped_img = tf.image.flip_left_right(tf.squeeze(img))
        flipped_img = tf.expand_dims(flipped_img, dim=0)
        net2 = PSPNet({'data': flipped_img},
                      is_training=False,
                      num_classes=num_classes)

    raw_output = net.layers['conv6']
    if args.flipped_eval:
        flipped_output = tf.image.flip_left_right(
            tf.squeeze(net2.layers['conv6']))
        flipped_output = tf.expand_dims(flipped_output, dim=0)
        raw_output = tf.add_n([raw_output, flipped_output])

    # Predictions.
    raw_output_up = tf.image.resize_bilinear(raw_output,
                                             size=[h, w],
                                             align_corners=True)
    raw_output_up = tf.image.crop_to_bounding_box(raw_output_up, 0, 0,
                                                  img_shape[0], img_shape[1])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Init tf Session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)

    restore_var = tf.global_variables()

    ckpt = tf.train.get_checkpoint_state(args.model)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=restore_var)
        load_step = int(
            os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('No checkpoint file found.')

    preds = sess.run(pred)

    msk = decode_labels(preds, num_classes=num_classes)
    im = Image.fromarray(msk[0])
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    im.save(args.save_dir + filename)
コード例 #12
0
def main():
    args = get_arguments()
    print(args)

    coord = tf.train.Coordinator()

    tf.reset_default_graph()

    image_list, label_list = read_labeled_image_list(DATA_DIRECTORY, DATA_LIST_PATH)

     # Create network.
    image_batch = tf.placeholder(tf.float32, [None, input_size, input_size, 3])

    net = PSPNet({'data': [image_batch]}, is_training=False, num_classes=num_classes)

    # Which variables to load.
    restore_var = tf.global_variables()

    # Predictions.
    raw_output = net.layers['conv6'][0]

    raw_output_up = tf.image.resize_bilinear(raw_output, size=[input_size,input_size], align_corners=True)

    # mIoU
    pred_all = tf.placeholder(tf.float32, [None,None])
    raw_all = tf.placeholder(tf.float32, [None,None,None, None])
    pred_flatten = tf.reshape(pred_all, [-1,])
    raw_gt = tf.reshape(raw_all, [-1,])
    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, num_classes - 1)), 1)
    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    pred_label = tf.gather(pred_flatten, indices)

    mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred_label, gt, num_classes=num_classes)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)
    sess.run(tf.local_variables_initializer())

    restore_var = tf.global_variables()

    ckpt = tf.train.get_checkpoint_state(args.model)
    if ckpt and ckpt.model_checkpoint_path:
        loader = tf.train.Saver(var_list=restore_var)
        load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])
        load(loader, sess, ckpt.model_checkpoint_path)
    else:
        print('No checkpoint file found.')


    for step in range(len(image_list)):
        image, label = cv2.imread(image_list[step], 1), cv2.imread(label_list[step], 0)
        label = np.reshape(label, [1, label.shape[0], label.shape[1], 1])

        imgsplitter = ImageSplitter(image, 1.0, input_size, IMG_MEAN)
        feed_dict = {image_batch: imgsplitter.get_split_crops()}

        logits= sess.run(raw_output_up, feed_dict=feed_dict)
        total_logits = imgsplitter.reassemble_crops(logits)

        #mirror
        image_mirror = image[:, ::-1]
        imgsplitter_mirror = ImageSplitter(image_mirror, 1.0, input_size, IMG_MEAN)
        feed_dict = {image_batch: imgsplitter_mirror.get_split_crops()}
        logits_mirror = sess.run(raw_output_up,feed_dict=feed_dict)
        logits_mirror = imgsplitter_mirror.reassemble_crops(logits_mirror)
        total_logits += logits_mirror[:, ::-1]

        prediction = np.argmax(total_logits, axis=-1)

        #=====================================================#
        sess.run([update_op], feed_dict ={pred_all:prediction, raw_all:label})
        
        if step > 0 and args.measure_time:
            calculate_time(sess, net)

        if step % 10 == 0:
            print('Finish {0}/{1}'.format(step, len(image_list)))
            print('step {0} mIoU: {1}'.format(step, sess.run(mIoU)))

    print('step {0} mIoU: {1}'.format(step, sess.run(mIoU)))
コード例 #13
0
		Returns
		loss : torch tensor
		"""
        loss = F.cross_entropy(outputs[0], gts, reduction='mean')
        loss_aux = F.cross_entropy(outputs[1], gts, reduction='mean')
        return loss + self.aux_weight * loss_aux


if __name__ == '__main__':
    loss_func = PSPLoss(aux_weight=0.4)
    img_dir = 'data/leftImg8bit/'
    anno_dir = 'data/gtFine/'
    phase = 'val'
    batch = 2
    num_workers = 4
    model = PSPNet(n_classes=35)
    dataset = MyDataset(img_dir=img_dir,
                        anno_dir=anno_dir,
                        phase=phase,
                        h=512,
                        w=1024)
    loader = DataLoader(dataset,
                        batch_size=batch,
                        shuffle=True,
                        pin_memory=True,
                        num_workers=num_workers)

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    for t, (img, anno_img) in enumerate(loader, 1):
        print(img.size())
コード例 #14
0
    def _build_model(self):
        net = PSPNet({'data': self._images},
                     is_training=self.training,
                     num_classes=self.num_classes)

        self.logits = tf.squeeze(net.layers['conv6'], [1, 2])
コード例 #15
0
    #     CLASSES = ['sky', 'building', 'pole', 'road', 'pavement',
    #                'tree', 'signsymbol', 'fence', 'car',
    #                'pedestrian', 'bicyclist', 'unlabelled']

    # Load model
    if FLAGS.resume_train:
        model = torch.load(FLAGS.resume_train)
    else:
        model = PSPNet(
            preupsample=False,
            upsampling=8,
            encoder_name="mobilenetv1",
            encoder_weights=False,
            encoder_depth=5,
            psp_out_channels=512,  # PSP out channels after concat not yet final
            psp_use_batchnorm=True,
            psp_dropout=0.2,
            in_channels=3,
            classes=len(CLASSES),
            activation='sigmoid',  # Optional[Union[str, callable]]
            dilated=False,
            #             aux_params={'classes': 1, 'height': 320,
            #                         'width': 320, 'dropout': 0.2},  # Opt
        )

    # Define parameters
    loss = losses.DiceLoss()
    metrics = [
        metrics.IoU(threshold=0.5),
        metrics.Accuracy(),
        metrics.Recall()
    ]