def main(args): train_loader, val_loader = load_data(args) # args.weight_labels = torch.tensor(calculate_weigths_labels('cityscape', train_loader, args.n_classes)).float() if args.cuda: # args.weight_labels = args.weight_labels.cuda() pass model = PSPNet() if args.cuda: model = model.cuda() if args.distributed: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) if args.evaluation: checkpoint = torch.load('./checkpoint/checkpoint_model_50000.pth', map_location='cpu') model.load_state_dict(checkpoint['model_state_dict']) eval(val_loader, model, args) else: # criterion = SegmentationLosses(weight=args.weight_labels, cuda=True) criterion = nn.CrossEntropyLoss(ignore_index=args.ignore_mask, weight=None).cuda() backbone_params = nn.ParameterList() decoder_params = nn.ParameterList() for name, param in model.named_parameters(): if 'backbone' in name: backbone_params.append(param) else: decoder_params.append(param) params_list = [{'params': backbone_params}, {'params': decoder_params, 'lr': args.lr * 10}] optimizer = optim.SGD(params_list, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) scheduler = PolyLr(optimizer, gamma=args.gamma, max_iteration=args.max_iteration, warmup_iteration=args.warmup_iteration) global_step = 0 if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') while global_step < args.max_iteration: global_step = train(global_step, train_loader, model, optimizer, criterion, scheduler, args) eval(val_loader, model, args)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) tf.set_random_seed(args.random_seed) coord = tf.train.Coordinator() with tf.name_scope("create_inputs"): reader = ImageReader(args.data_list, input_size, args.random_scale, args.random_mirror, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(args.batch_size) net = PSPNet({'data': image_batch}, is_training=True, num_classes=args.num_classes) raw_output = net.layers['conv6'] # According from the prototxt in Caffe implement, learning rate must multiply by 10.0 in pyramid module fc_list = [ 'conv5_3_pool1_conv', 'conv5_3_pool2_conv', 'conv5_3_pool3_conv', 'conv5_3_pool6_conv', 'conv6', 'conv5_4' ] restore_var = [ v for v in tf.global_variables() if not (len([f for f in fc_list if f in v.name])) or not args.not_restore_last ] all_trainable = [ v for v in tf.trainable_variables() if 'gamma' not in v.name and 'beta' not in v.name ] fc_trainable = [ v for v in all_trainable if v.name.split('/')[0] in fc_list ] conv_trainable = [ v for v in all_trainable if v.name.split('/')[0] not in fc_list ] # lr * 1.0 fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0 fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0 assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable)) assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable)) # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output, [-1, args.num_classes]) label_proc = prepare_label(label_batch, tf.stack(raw_output.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [ -1, ]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt) # Make mistakes for class N more important for network if USE_CLASS_WEIGHTS: if len(CLASS_WEIGHTS) != NUM_CLASSES: print('Incorrect class weights, it will be not used') else: mask = tf.zeros_like(loss) for i, w in enumerate(CLASS_WEIGHTS): mask = mask + tf.cast(tf.equal(gt, i), tf.float32) * tf.constant(w) loss = loss * mask l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses) # Processed predictions: for visualisation. raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Image summary. images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8) labels_summary = tf.py_func( decode_labels, [label_batch, args.save_num_images, args.num_classes], tf.uint8) preds_summary = tf.py_func(decode_labels, [pred, args.save_num_images, args.num_classes], tf.uint8) total_summary = tf.summary.image( 'images', tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), max_outputs=args.save_num_images) # Concatenate row-wise. summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=tf.get_default_graph()) # Using Poly learning rate policy base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.scalar_mul( base_lr, tf.pow((1 - step_ph / args.num_steps), args.power)) # Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS if args.update_mean_var == False: update_ops = None else: update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): opt_conv = tf.train.MomentumOptimizer(learning_rate, args.momentum) opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, args.momentum) opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, args.momentum) grads = tf.gradients(reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable) grads_conv = grads[:len(conv_trainable)] grads_fc_w = grads[len(conv_trainable):(len(conv_trainable) + len(fc_w_trainable))] grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):] train_op_conv = opt_conv.apply_gradients( zip(grads_conv, conv_trainable)) train_op_fc_w = opt_fc_w.apply_gradients( zip(grads_fc_w, fc_w_trainable)) train_op_fc_b = opt_fc_b.apply_gradients( zip(grads_fc_b, fc_b_trainable)) train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b) # Set up tf session and initialize variables. config = tf.ConfigProto() # config.gpu_options.allow_growth = True # config.allow_soft_placement = True # config.intra_op_parallelism_threads = 1 sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) ckpt = tf.train.get_checkpoint_state(SNAPSHOT_DIR) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int( os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') load_step = 0 # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0: loss_value, _, summary = sess.run( [reduced_loss, train_op, total_summary], feed_dict=feed_dict) summary_writer.add_summary(summary, step) save(saver, sess, args.snapshot_dir, step) else: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) coord.request_stop() coord.join(threads)
nrows=num_train_samples, header=0, usecols=[0,2,5]) rows_train = data_train.values.tolist() data_val = pd.read_csv(val_path, skiprows=0, nrows=num_val_samples, header=0, usecols=[0,2,5]) rows_val = data_val.values.tolist() # MODEL # ----------------------------- model = PSPNet(n_classes=10, n_blocks=[3, 4, 6, 3], pyramids=[6, 3, 2, 1]) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) model.to(device) best_acc = 0 # LOSS # ----------------------------- if loss_type== 'bce': criterion = nn.BCELoss() else: criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
model.compile(optimizer=sgd,loss=loss_fn(), weighted_metrics=["accuracy", MeanIoU(num_classes)]) # verbose=2 gives 1 line per epcoh, which is good when logging to a file model.fit(x=train_dataset, verbose=1, epochs=epochs, validation_data=val_dataset, callbacks=[TensorBoard(), ModelCheckpoint("backup" + str(epochs)), DisplayCallback(model, train_dataset, saveimg=True)]) # Create dataset loader # NOTE: A simple overfit-test can be made by loading only 1 image, but the generated # images will look bad. This is due to poor estimation of batch norm parameters, and # can be hotfixed by switching to training mode during image generation in display.py loader = DataLoader('cityscapes', '50%', batch_size=8) # Prepare all the data before usage. This includes: # - Casting the images to float32. # - Normalizing all the data according to the normalization strategy for ResNet50. # - Applying a random flip to the training data every time it is encountered. # - Batching the data. # - Prefetching 1 batch to improve the data throughput. loader.prepareDatasets() # Load the datasets train_ds = loader.getTrainData() val_ds = loader.getValData() # Define model num_classes = 34 model = PSPNet(feature_dim=(256,512,2048), num_classes=num_classes, use_ppm=True, bins=[1, 2, 3, 6]) # Train the model train_model(model, train_ds, val_ds, num_classes, SparseCategoricalCrossentropy, epochs=60)
from model import PSPNet from torch import nn as nn import torch.nn.functional as F from torch import optim import math import torch import torch.utils.data as data import time from data import make_datapath_list, MyDataset, DataTransform #model model = PSPNet(n_classes=21) #loss class PSPLoss(nn.Module): def __init__(self, aux_weight=0.4): super(PSPLoss, self).__init__() self.aux_weight = aux_weight def forward(self, outputs, targets): loss = F.cross_entropy(outputs[0], targets, reduction='mean') loss_aux = F.cross_entropy(outputs[1], targets, reduction='mean') return loss + self.aux_weight*loss_aux criterion = PSPLoss(aux_weight=0.4) #optimizer # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) optimizer = optim.SGD([
def train(cfg): torch.backends.cudnn.benchmark = True device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = PSPNet(n_classes=cfg.n_classes) model = model.to(device) train_dataset = MyDataset(img_dir=cfg.img_dir, anno_dir=cfg.anno_dir, phase='train') n_train_img = len(train_dataset) val_dataset = MyDataset(img_dir=cfg.img_dir, anno_dir=cfg.anno_dir, phase='val') n_val_img = len(val_dataset) train_loader = DataLoader(train_dataset, batch_size=cfg.batch, shuffle=True, pin_memory=True, num_workers=cfg.num_workers) val_loader = DataLoader(val_dataset, batch_size=cfg.batch, shuffle=False, pin_memory=True, num_workers=cfg.num_workers) loss_func = PSPLoss(aux_weight=0.4) optimizer = optim.Adam(model.parameters(), lr=cfg.lr) # scheduler = schedule_func(optimizer) min_val_avg_loss, cnt = 1e4, 0 for epoch in range(1, cfg.epochs + 1): train_avg_loss = 0. model.train() t1 = time() for t, (img, anno_img) in enumerate(train_loader, 1): if img.size(0) != cfg.batch: break # to avoid batch normalization error img = img.to(device) anno_img = anno_img.to(device) outputs = model(img) loss = loss_func(outputs, anno_img.long()) optimizer.zero_grad() loss.backward() optimizer.step() # scheduler.step() train_avg_loss += loss.item() if t % (cfg.batch_verbose) == 0: t2 = time() print( 'train Epoch: %d, batch: %d, sample: %d/%d, Avg Loss: %1.3f, %dmin%dsec' % (epoch, t, t * cfg.batch, n_train_img, train_avg_loss / t, (t2 - t1) // 60, (t2 - t1) % 60)) if cfg.islogger: if t == cfg.batch_verbose and epoch == 1: train_csv_path = '%s%s_train.csv' % ( cfg.log_dir, cfg.dump_date) #cfg.log_dir = ./Csv/ print(f'generate {train_csv_path}') with open(train_csv_path, 'w') as f: f.write('time,epoch,batch,sample,train avg loss\n') with open(train_csv_path, 'a') as f: f.write('%dmin%dsec,%d,%d,%d,%1.3f\n' % ((t2 - t1) // 60, (t2 - t1) % 60, epoch, t, t * cfg.batch, train_avg_loss / t)) t1 = time() val_avg_loss = 0. model.eval() t3 = time() t5 = time() with torch.no_grad(): for t, (img, anno_img) in enumerate(val_loader, 1): if img.size(0) != cfg.batch: break # to avoid batch normalization error img = img.to(device) anno_img = anno_img.to(device) outputs = model(img) loss = loss_func(outputs, anno_img.long()) val_avg_loss += loss.item() if t % (cfg.batch_verbose) == 0: t4 = time() print( 'val Epoch: %d, batch: %d, sample: %d/%d, Avg Loss: %1.3f, %dmin%dsec' % (epoch, t, t * cfg.batch, n_val_img, val_avg_loss / t, (t4 - t3) // 60, (t4 - t3) % 60)) """ if cfg.islogger: if t == cfg.batch_verbose and epoch == 1: log_path = '%s%s_val.csv'%(cfg.log_dir, cfg.dump_date)#cfg.log_dir = ./Csv/ print(f'generate {log_path}') with open(log_path, 'w') as f: f.write('time,epoch,batch,sample,loss\n') with open(log_path, 'a') as f: f.write('%dmin%dsec,%d,%d,%d, %1.3f\n'%( (t4-t3)//60, (t4-t3)%60, epoch, t, t*cfg.batch, val_avg_loss/t)) """ t3 = time() if epoch == 1: param_path = '%s%s_param.csv' % (cfg.log_dir, cfg.dump_date ) # cfg.log_dir = ./Csv/ print(f'generate {param_path}') with open(param_path, 'w') as f: f.write(''.join('%s,%s\n' % item for item in vars(cfg).items())) f.write('n_train_img,%d\n' % (n_train_img)) f.write('n_val_img,%d\n' % (n_val_img)) t6 = time() if val_avg_loss / t < min_val_avg_loss: min_val_avg_loss = val_avg_loss / t print('update min_val_avg_loss: ', min_val_avg_loss) weight_path = '%s%s_epoch%s.pt' % (cfg.weight_dir, cfg.dump_date, epoch) torch.save(model.state_dict(), weight_path) print(f'generate {weight_path}') if cfg.islogger: if epoch == 1: val_csv_path = '%s%s_val.csv' % (cfg.log_dir, cfg.dump_date ) #cfg.log_dir = ./Csv/ print(f'generate {val_csv_path}') with open(val_csv_path, 'w') as f: f.write('time,epoch,val avg loss\n') with open(val_csv_path, 'a') as f: f.write('%dmin%dsec,%d,%1.3f\n' % ((t6 - t5) // 60, (t6 - t5) % 60, epoch, val_avg_loss / t)) else: # val_avg_loss/t >= min_val_avg_loss: cnt += 1 print(f'current cnt: {cnt}/7') if cnt >= 7: print('early stopping') break
import cv2 from model import PSPNet import torch from data import make_datapath_list, DataTransform from PIL import Image import numpy as np # load trained model net = PSPNet(n_classes=21) #state_dict = torch.load('./pspnet50_49.pth', map_location={'cuda:0': 'cpu'}) #for cpu state_dict = torch.load('./pspnet50_49.pth', map_location={'cuda:0': 'cuda: 0'}) #for gpu net.load_state_dict(state_dict) net.eval() # 1500 images for training # 1500 images for validation rootpath = "./data/VOCdevkit/VOC2012/" train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list( rootpath) color_mean = (0.485, 0.456, 0.406) color_std = (0.229, 0.224, 0.225) anno_file_path = val_anno_list[1] anno_class_img = Image.open(anno_file_path) p_palatte = anno_class_img.getpalette() transform = DataTransform(input_size=475, color_mean=color_mean, color_std=color_std)
def main(): args = get_arguments() print(args) coord = tf.train.Coordinator() tf.reset_default_graph() with tf.name_scope("create_inputs"): reader = ImageReader( DATA_DIRECTORY, DATA_LIST_PATH, input_size, None, None, ignore_label, IMG_MEAN, coord) image, label = reader.image, reader.label image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension. # Create network. net = PSPNet({'data': image_batch}, is_training=False, num_classes=num_classes) with tf.variable_scope('', reuse=True): flipped_img = tf.image.flip_left_right(image) flipped_img = tf.expand_dims(flipped_img, dim=0) net2 = PSPNet({'data': flipped_img}, is_training=False, num_classes=num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['conv6'] if args.flipped_eval: flipped_output = tf.image.flip_left_right(tf.squeeze(net2.layers['conv6'])) flipped_output = tf.expand_dims(flipped_output, dim=0) raw_output = tf.add_n([raw_output, flipped_output]) raw_output_up = tf.image.resize_bilinear(raw_output, size=input_size, align_corners=True) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # mIoU pred_flatten = tf.reshape(pred, [-1,]) raw_gt = tf.reshape(label_batch, [-1,]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) pred = tf.gather(pred_flatten, indices) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred, gt, num_classes=num_classes) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) restore_var = tf.global_variables() ckpt = tf.train.get_checkpoint_state(args.model) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) for step in range(num_steps): preds, _ = sess.run([pred, update_op]) if step > 0 and args.measure_time: calculate_time(sess, net) if step % 10 == 0: print('Finish {0}/{1}'.format(step, num_steps)) print('step {0} mIoU: {1}'.format(step, sess.run(mIoU))) print('step {0} mIoU: {1}'.format(step, sess.run(mIoU))) coord.request_stop() coord.join(threads)
def main(): """Create the model and start the training.""" args = get_arguments() h, w = map(int, args.input_size.split(',')) input_size = (h, w) #tf.set_random_seed(args.random_seed) coord = tf.train.Coordinator() with tf.Graph().as_default(), tf.device('/cpu:0'): # Using Poly learning rate policy base_lr = tf.constant(args.learning_rate) step_ph = tf.placeholder(dtype=tf.float32, shape=()) learning_rate = tf.train.exponential_decay(base_lr, step_ph, 20000, 0.5, staircase=True) tf.summary.scalar('lr', learning_rate) opt = tf.train.MomentumOptimizer(learning_rate, 0.9) #opt = tf.train.RMSPropOptimizer(learning_rate, 0.9, momentum=0.9, epsilon=1e-10) #opt = tf.train.AdamOptimizer(learning_rate) losses = [] train_op = [] total_batch_size = args.batch_size * args.gpu_nums with tf.name_scope('PSPnet') as scope: with tf.name_scope("create_inputs"): reader = ImageReader(args.data_dir, args.data_list, input_size, args.random_blur, args.random_scale, args.random_mirror, args.random_rotate, args.ignore_label, IMG_MEAN, coord) image_batch, label_batch = reader.dequeue(total_batch_size) images_splits = tf.split(axis=0, num_or_size_splits=args.gpu_nums, value=image_batch) labels_splits = tf.split(axis=0, num_or_size_splits=args.gpu_nums, value=label_batch) net = PSPNet({'data': images_splits}, is_training=True, num_classes=args.num_classes) raw_output_list = net.layers['conv6'] add_list = [ 'conv5_3_pool1_conv', 'conv5_3_pool1_conv_bn', 'conv5_3_pool2_conv', 'conv5_3_pool2_conv_bn', 'conv5_3_pool3_conv', 'conv5_3_pool3_conv_bn', 'conv5_3_pool6_conv', 'conv5_3_pool6_conv_bn', 'conv5_4', 'conv5_4_bn', 'conv6' ] num_valide_pixel = 0 for i in range(len(raw_output_list)): with tf.device('/gpu:%d' % i): raw_output_up = tf.image.resize_bilinear( raw_output_list[i], size=input_size, align_corners=True) tf.summary.image('images_{}'.format(i), images_splits[i] + IMG_MEAN, max_outputs=4) tf.summary.image('labels_{}'.format(i), labels_splits[i], max_outputs=4) tf.summary.image('predict_{}'.format(i), tf.cast( tf.expand_dims( tf.argmax(raw_output_up, -1), 3), tf.float32), max_outputs=4) all_trainable = [v for v in tf.trainable_variables()] # Predictions: ignoring all predictions with labels greater or equal than n_classes raw_prediction = tf.reshape(raw_output_up, [-1, args.num_classes]) label_proc = prepare_label( labels_splits[i], tf.stack(raw_output_up.get_shape()[1:3]), num_classes=args.num_classes, one_hot=False) # [batch_size, h, w] raw_gt = tf.reshape(label_proc, [ -1, ]) #indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)), 1) indices = tf.where( tf.logical_and(tf.less(raw_gt, args.num_classes), tf.greater_equal(raw_gt, 0))) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) prediction = tf.gather(raw_prediction, indices) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou( tf.argmax(tf.nn.softmax(prediction), axis=-1), gt, num_classes=args.num_classes) tf.summary.scalar('mean IoU_{}'.format(i), mIoU) train_op.append(update_op) # Pixel-wise softmax loss. loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=prediction, labels=gt) num_valide_pixel += tf.shape(gt)[0] losses.append(tf.reduce_sum(loss)) l2_losses = [ args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name ] reduced_loss = tf.truediv( tf.reduce_sum(losses), tf.cast( num_valide_pixel, tf.float32)) + tf.add_n(l2_losses) tf.summary.scalar('average_loss', reduced_loss) grads = tf.gradients(reduced_loss, all_trainable, colocate_gradients_with_ops=True) variable_averages = tf.train.ExponentialMovingAverage(0.99, step_ph) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply(variables_to_average) train_op = tf.group(opt.apply_gradients(zip(grads, all_trainable)), *train_op) train_op = tf.group(train_op, variables_averages_op) summary_op = tf.summary.merge_all() # Set up tf session and initialize variables. config = tf.ConfigProto() config.allow_soft_placement = True sess = tf.Session(config=config) init = [ tf.global_variables_initializer(), tf.local_variables_initializer() ] sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=2) #restore from resnet imagenet, bised and local_step is in moving_average restore_var = [ v for v in tf.trainable_variables() if 'conv5_3_pool' not in v.name and 'conv5_4' not in v.name and 'conv6' not in v.name ] + [ v for v in tf.global_variables() if ('moving_mean' in v.name or 'moving_variance' in v.name) and ('conv5_3_pool' not in v.name and 'conv5_4_bn' not in v.name and 'biased' not in v.name and 'local_step' not in v.name) ] ckpt = tf.train.get_checkpoint_state(args.restore_from) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') """ #restore from snapshot restore_var = tf.global_variables() ckpt = tf.train.get_checkpoint_state(args.snapshot_dir) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var, allow_empty=True) load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') load_step = 0 """ # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_writer = tf.summary.FileWriter(args.snapshot_dir, graph=sess.graph) # Iterate over training steps. for step in range(args.num_steps): start_time = time.time() feed_dict = {step_ph: step} if step % args.save_pred_every == 0 and step != 0: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) save(saver, sess, args.snapshot_dir, step) elif step % 100 == 0: summary_str, loss_value, _, IOU = sess.run( [summary_op, reduced_loss, train_op, mIoU], feed_dict=feed_dict) duration = time.time() - start_time summary_writer.add_summary(summary_str, step) print( 'step {:d} \t loss = {:.3f}, mean_IoU = {:.3f}, ({:.3f} sec/step)' .format(step, loss_value, IOU, duration)) else: loss_value, _ = sess.run([reduced_loss, train_op], feed_dict=feed_dict) coord.request_stop() coord.join(threads)
import torch from torch.utils.data import DataLoader import numpy as np import cv2 import matplotlib.pyplot as plt from PIL import Image from model import PSPNet from dataset import MyDataset from config import test_parser from visualize import unnormalize_show, PIL_show if __name__ == '__main__': arg = test_parser() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = PSPNet(n_classes=arg.n_classes) if arg.path is not None: print('load model ...') model.load_state_dict(torch.load(arg.path, map_location=device)) model = model.to(device) test_dataset = MyDataset(img_dir=arg.img_dir, anno_dir=arg.anno_dir, phase='test') n_test_img = len(test_dataset) test_loader = DataLoader(test_dataset, batch_size=arg.batch, shuffle=True, pin_memory=True, num_workers=arg.num_workers) model.eval()
def main(): args = get_arguments() img, filename = load_img(args.img_path) img_shape = tf.shape(img) h, w = (tf.maximum(crop_size[0], img_shape[0]), tf.maximum(crop_size[1], img_shape[1])) img = preprocess(img, h, w) # Create network. net = PSPNet({'data': img}, is_training=False, num_classes=num_classes) with tf.variable_scope('', reuse=True): flipped_img = tf.image.flip_left_right(tf.squeeze(img)) flipped_img = tf.expand_dims(flipped_img, dim=0) net2 = PSPNet({'data': flipped_img}, is_training=False, num_classes=num_classes) raw_output = net.layers['conv6'] if args.flipped_eval: flipped_output = tf.image.flip_left_right( tf.squeeze(net2.layers['conv6'])) flipped_output = tf.expand_dims(flipped_output, dim=0) raw_output = tf.add_n([raw_output, flipped_output]) # Predictions. raw_output_up = tf.image.resize_bilinear(raw_output, size=[h, w], align_corners=True) raw_output_up = tf.image.crop_to_bounding_box(raw_output_up, 0, 0, img_shape[0], img_shape[1]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Init tf Session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10) restore_var = tf.global_variables() ckpt = tf.train.get_checkpoint_state(args.model) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int( os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') preds = sess.run(pred) msk = decode_labels(preds, num_classes=num_classes) im = Image.fromarray(msk[0]) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) im.save(args.save_dir + filename)
def main(): args = get_arguments() print(args) coord = tf.train.Coordinator() tf.reset_default_graph() image_list, label_list = read_labeled_image_list(DATA_DIRECTORY, DATA_LIST_PATH) # Create network. image_batch = tf.placeholder(tf.float32, [None, input_size, input_size, 3]) net = PSPNet({'data': [image_batch]}, is_training=False, num_classes=num_classes) # Which variables to load. restore_var = tf.global_variables() # Predictions. raw_output = net.layers['conv6'][0] raw_output_up = tf.image.resize_bilinear(raw_output, size=[input_size,input_size], align_corners=True) # mIoU pred_all = tf.placeholder(tf.float32, [None,None]) raw_all = tf.placeholder(tf.float32, [None,None,None, None]) pred_flatten = tf.reshape(pred_all, [-1,]) raw_gt = tf.reshape(raw_all, [-1,]) indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, num_classes - 1)), 1) gt = tf.cast(tf.gather(raw_gt, indices), tf.int32) pred_label = tf.gather(pred_flatten, indices) mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred_label, gt, num_classes=num_classes) # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) restore_var = tf.global_variables() ckpt = tf.train.get_checkpoint_state(args.model) if ckpt and ckpt.model_checkpoint_path: loader = tf.train.Saver(var_list=restore_var) load_step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1]) load(loader, sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found.') for step in range(len(image_list)): image, label = cv2.imread(image_list[step], 1), cv2.imread(label_list[step], 0) label = np.reshape(label, [1, label.shape[0], label.shape[1], 1]) imgsplitter = ImageSplitter(image, 1.0, input_size, IMG_MEAN) feed_dict = {image_batch: imgsplitter.get_split_crops()} logits= sess.run(raw_output_up, feed_dict=feed_dict) total_logits = imgsplitter.reassemble_crops(logits) #mirror image_mirror = image[:, ::-1] imgsplitter_mirror = ImageSplitter(image_mirror, 1.0, input_size, IMG_MEAN) feed_dict = {image_batch: imgsplitter_mirror.get_split_crops()} logits_mirror = sess.run(raw_output_up,feed_dict=feed_dict) logits_mirror = imgsplitter_mirror.reassemble_crops(logits_mirror) total_logits += logits_mirror[:, ::-1] prediction = np.argmax(total_logits, axis=-1) #=====================================================# sess.run([update_op], feed_dict ={pred_all:prediction, raw_all:label}) if step > 0 and args.measure_time: calculate_time(sess, net) if step % 10 == 0: print('Finish {0}/{1}'.format(step, len(image_list))) print('step {0} mIoU: {1}'.format(step, sess.run(mIoU))) print('step {0} mIoU: {1}'.format(step, sess.run(mIoU)))
Returns loss : torch tensor """ loss = F.cross_entropy(outputs[0], gts, reduction='mean') loss_aux = F.cross_entropy(outputs[1], gts, reduction='mean') return loss + self.aux_weight * loss_aux if __name__ == '__main__': loss_func = PSPLoss(aux_weight=0.4) img_dir = 'data/leftImg8bit/' anno_dir = 'data/gtFine/' phase = 'val' batch = 2 num_workers = 4 model = PSPNet(n_classes=35) dataset = MyDataset(img_dir=img_dir, anno_dir=anno_dir, phase=phase, h=512, w=1024) loader = DataLoader(dataset, batch_size=batch, shuffle=True, pin_memory=True, num_workers=num_workers) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model.to(device) for t, (img, anno_img) in enumerate(loader, 1): print(img.size())
def _build_model(self): net = PSPNet({'data': self._images}, is_training=self.training, num_classes=self.num_classes) self.logits = tf.squeeze(net.layers['conv6'], [1, 2])
# CLASSES = ['sky', 'building', 'pole', 'road', 'pavement', # 'tree', 'signsymbol', 'fence', 'car', # 'pedestrian', 'bicyclist', 'unlabelled'] # Load model if FLAGS.resume_train: model = torch.load(FLAGS.resume_train) else: model = PSPNet( preupsample=False, upsampling=8, encoder_name="mobilenetv1", encoder_weights=False, encoder_depth=5, psp_out_channels=512, # PSP out channels after concat not yet final psp_use_batchnorm=True, psp_dropout=0.2, in_channels=3, classes=len(CLASSES), activation='sigmoid', # Optional[Union[str, callable]] dilated=False, # aux_params={'classes': 1, 'height': 320, # 'width': 320, 'dropout': 0.2}, # Opt ) # Define parameters loss = losses.DiceLoss() metrics = [ metrics.IoU(threshold=0.5), metrics.Accuracy(), metrics.Recall() ]