def evaluate(model, loader, n_class, device, dtype, iter_idx, writer): hist = np.zeros((n_class, n_class)) for batch_idx, (data, target) in enumerate(loader): data = data.to(device=device, dtype=dtype) with torch.no_grad(): output = model(data) _, h, w = target.shape output = torch.nn.functional.interpolate(output, size=(h, w), mode='bilinear', align_corners=True) output, target = output.data.cpu().numpy(), target.data.cpu().numpy() output = np.argmax(output, axis=1) hist += fast_hist(target.flatten(), output.flatten(), n_class) if batch_idx == 0: writer.add_image( 'val/input', vutils.make_grid(data, normalize=True, scale_each=True, padding=0), iter_idx) writer.add_image('val/output', decode_labels(output[0]), iter_idx) writer.add_image('val/gt', decode_labels(target[0]), iter_idx) m_iou = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) return np.sum(m_iou) / len(m_iou)
def save_train_result(args, step, images, labels, preds): fig, axes = plt.subplots(args.save_num_images, 2, figsize=(16, 12)) for i in xrange(args.save_num_images): cv2.imwrite(args.save_dir + str(step) + "_%d.png" % i, (images[i] + IMG_MEAN)[:, :, ::-1].astype(np.uint8)) axes.flat[i * 2 + 0].set_title('mask') axes.flat[i * 2 + 0].imshow(decode_labels(labels[i, :, :, 0])) axes.flat[i * 2 + 1].set_title('pred') axes.flat[i * 2 + 1].imshow(decode_labels(preds[i, :, :, 0])) plt.savefig(args.save_dir + str(step) + ".png") plt.close(fig)
def save_val_result(args,step,images,labels,preds,i): for j in range(BATCH_SIZE): fig, axes = plt.subplots(1, 2, figsize=(16, 12)) if j < 1: cv2.imwrite(args.save_dir + str(step) + '_' + str(i * BATCH_SIZE + j) + "test_img.png", (images[j] + IMG_MEAN)[:, :, ::-1].astype(np.uint8)) axes.flat[0].set_title('mask') axes.flat[0].imshow(decode_labels(labels[j, :, :, 0])) axes.flat[1].set_title('pred') axes.flat[1].imshow(decode_labels(preds[j, :, :, 0])) plt.savefig(args.save_dir + str(step) + '_' + str(i * BATCH_SIZE + j) + "test.png") plt.close(fig)
def main(): args = docopt(docstr, version='v0.1') print(args) gpu0 = int(args['--gpu0']) model = deeplab_resnet.Res_Deeplab(21, True, 4, 1e-2) model.load_state_dict(torch.load(args['--snapshots'])) model.eval().cuda(gpu0) im_path = args['--img_path'] img = cv2.imread(im_path).astype(float) img_original = img.copy() / 255.0 img[:, :, 0] = img[:, :, 0] - 104.008 img[:, :, 1] = img[:, :, 1] - 116.669 img[:, :, 2] = img[:, :, 2] - 122.675 with torch.no_grad(): output = model(*[torch.from_numpy(i[np.newaxis, :].transpose(0, 3, 1, 2)).float().cuda(gpu0) for i in [img, img_original]]) output = output.cpu().data[0].numpy().transpose(1, 2, 0) output = np.argmax(output, axis=2) vis_output = decode_labels(output) output_directory = os.path.dirname(im_path) output_name = os.path.splitext(os.path.basename(im_path))[0] save_path = os.path.join(output_directory, '{}_labels.png'.format(output_name)) imsave(save_path, vis_output)
def main(): args = docopt(docstr, version='v0.1') print(args) gpu0 = int(args['--gpu0']) im_path = args['--testIMpath'] gt_path = args['--testGTpath'] model = deeplab_resnet.Res_Deeplab(int(args['--NoLabels']), args['--dgf'], 4, 1e-2) model.eval().cuda(gpu0) img_list = open('data/list/val.txt').readlines() saved_state_dict = torch.load(args['--snapshots']) model.load_state_dict(saved_state_dict) save_path = os.path.join('data', args['--exp']) if not os.path.isdir(save_path): os.makedirs(save_path) max_label = int(args['--NoLabels']) - 1 # labels from 0,1, ... 20(for VOC) hist = np.zeros((max_label + 1, max_label + 1)) for idx, i in enumerate(img_list): print('{}/{} ...'.format(idx + 1, len(img_list))) img = cv2.imread(os.path.join(im_path, i[:-1] + '.jpg')).astype(float) img_original = img.copy() / 255.0 img[:, :, 0] = img[:, :, 0] - 104.008 img[:, :, 1] = img[:, :, 1] - 116.669 img[:, :, 2] = img[:, :, 2] - 122.675 if args['--dgf']: inputs = [img, img_original] else: inputs = [np.zeros((513, 513, 3))] inputs[0][:img.shape[0], :img.shape[1], :] = img with torch.no_grad(): output = model(*[ torch.from_numpy(i[np.newaxis, :].transpose( 0, 3, 1, 2)).float().cuda(gpu0) for i in inputs ]) if not args['--dgf']: interp = nn.Upsample(size=(513, 513), mode='bilinear', align_corners=True) output = interp(output) output = output[:, :, :img.shape[0], :img.shape[1]] output = output.cpu().data[0].numpy().transpose(1, 2, 0) output = np.argmax(output, axis=2) vis_output = decode_labels(output) imsave(os.path.join(save_path, i[:-1] + '.png'), vis_output) gt = cv2.imread(os.path.join(gt_path, i[:-1] + '.png'), 0) hist += fast_hist(gt.flatten(), output.flatten(), max_label + 1) miou = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) print("Mean iou = ", np.sum(miou) / len(miou))
def main(): args = get_arguments() # Read Image img = tf.image.decode_jpeg(tf.read_file(args.img_path), channels=3) # Convert RGB to BGR red, green, blue = tf.split(axis=2, num_or_size_splits=3, value=img) img = tf.cast(tf.concat(axis=2, values=[blue, green, red]), dtype=tf.float32) # Extract mean img -= IMG_MEAN # Create Network net = DeepLabResNetModel(tf.expand_dims(img, dim=0), ModeKeys.TRAIN, args.num_classes, args.atrous_blocks) # Predictions raw_output = net.output raw_output_up = tf.image.resize_bilinear(raw_output, tf.shape(img)[0:2, ]) raw_output_up = tf.argmax(raw_output_up, dimension=3) pred = tf.expand_dims(raw_output_up, dim=3) # Init with tf.Session() as sess: tf.global_variables_initializer().run() restorer = tf.train.Saver() load_model(restorer, sess, args.model_weights) preds = sess.run(pred) msk = decode_labels(preds, num_classes=args.num_classes) im = Image.fromarray(msk[0]) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) im.save(args.save_dir + 'mask.png') print 'Image saved'
def plot_images(imdata, lbl_preds, lbl_trues, epoch, split='test', crop_size=None): img = unNormalize(imdata.data).cpu().numpy()[0] * 255 img = img.astype(np.uint8) # (c, h, w) pred = [] gt = [] for lbl_pred, lbl_true in zip(lbl_preds, lbl_trues): predtmp = decode_labels(lbl_pred, num_images=1)[0] # (h, w, c) gttmp = decode_labels(lbl_true, num_images=1)[0] pred.append(np.moveaxis(predtmp, 2, 0)) # (c, h, w) gt.append(np.moveaxis(gttmp, 2, 0)) if crop_size is not None: # center crop _, h, w = img.shape tx, ty, bx, by = 0, 0, w, h ch, cw = crop_size if ch > h: ty, by = 0, 0 else: ty, by = (h - ch) // 2, h - (h - ch) // 2 if cw > w: tx, bx = 0, 0 else: tx, bx = (w - cw) // 2, w - (w - cw) // 2 img = img[:, ty:by, tx:bx] for i in range(len(pred)): pred[i] = pred[i][:, ty:by, tx:bx] gt[i] = gt[i][:, ty:by, tx:bx] grid_imgs = [img] for g, p in zip(gt, pred): grid_imgs = grid_imgs + [g] + [p] plotter.plot_images(grid_imgs, split, epoch, nrow=7, exp_name=args.name + '_' + args.dataset)
def main(test_data_dir, save_img=False): if save_img and not os.path.exists(IMG_DIR): os.makedirs(IMG_DIR) print('INFO : IMG_DIR does not exists, create a directory!') mha_list = read_mha_files(test_file) length = len(mha_list) * 155 image_batch = net_inputs_test(batch_size, test_data_dir) image_batch = tf.cast(image_batch, tf.float32) output = Model(image_batch, dcr_type, dilated_rates=dilated_rates) out_mask = tf.expand_dims( tf.cast(tf.arg_max(output, dimension=3), tf.uint8), -1) out_mask = tf.image.resize_images(out_mask, [240, 240]) with tf.Session() as sess: mha = [] ckpt = tf.train.get_checkpoint_state(CKPT_PATH) if not ckpt: raise RuntimeError('No Checkpoint Found !') else: saver = tf.train.Saver() ckpt_path = ckpt.model_checkpoint_path saver.restore(sess, ckpt_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for step in range(1, length + 1): stdout.write('\rProcessing {} / {} ...'.format(step, length)) _out_mask = sess.run(out_mask) _out_mask = np.round(_out_mask).astype(np.uint8) re_img = _out_mask.reshape([240, 240, 1]) mha.append(re_img) if save_img: img = utils.decode_labels( np.round(_out_mask).astype(np.uint8)).reshape( [240, 240, 3]) scipy.misc.imsave('{}/{}.jpg'.format(IMG_DIR, step), img) if step % 155 == 0 and step != 0: file_id = mha_list.pop().split(',')[0].strip()[1:-1].split( '.')[-2] mha_name = ''.join(['VSD.Seg_HG_001.', str(file_id), '.mha']) mha = [] coord.request_stop() coord.join(threads) print()
def predict(self): self.predict_setup() self.sess.run(tf.global_variables_initializer()) self.sess.run(tf.local_variables_initializer()) # load checkpoint #checkpointfile = self.conf.modeldir+ '/model.ckpt-' + str(self.conf.valid_step) #checkpointfile = 'deeplab_resnet_init.ckpt' checkpointfile = tf.train.latest_checkpoint("./model_multigpu_bs10/") #checkpointfile = './model_crf_test0'+ '/model.ckpt-' + '0' self.load(self.loader, checkpointfile) # Start queue threads. threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess) # img_name_list image_list, _ = read_labeled_image_list('', self.conf.test_data_list) # Predict! for step in range(self.conf.test_num_steps): preds = self.sess.run(self.pred) resized_dec = self.sess.run(self.resized_decoder100) img_name = image_list[step].split('/')[2].split('.')[0] # Save raw predictions, i.e. each pixel is an integer between [0,20]. im = Image.fromarray(preds[0, :, :, 0], mode='L') filename = '/%s_mask.png' % (img_name) im.save(self.conf.out_dir + '/prediction' + filename) #resized = Image.fromarray(resized_dec[0], mode='RGB') #fn = '/%s_resized_dec.png' % (img_name) #resized.save(self.conf.out_dir + '/resized_decoding' + fn) # Save predictions for visualization. # See utils/label_utils.py for color setting # Need to be modified based on datasets. if self.conf.visual: msk = decode_labels(preds, num_classes=self.conf.num_classes) im = Image.fromarray(msk[0], mode='RGB') filename = '/%s_mask_visual.png' % (img_name) im.save(self.conf.out_dir + '/visual_prediction' + filename) if step % 100 == 0: print('step {:d}'.format(step)) print('The output files has been saved to {}'.format( self.conf.out_dir)) # finish self.coord.request_stop() self.coord.join(threads)
def predict(self): normal_color = "\033[0;37;40m" self.predict_setup() self.sess.run(tf.global_variables_initializer()) self.sess.run(tf.local_variables_initializer()) # load checkpoint checkpointfile = self.conf.modeldir + '/model.ckpt-' + str( self.conf.test_step) self.load(self.loader, checkpointfile) # Start queue threads. threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess) # img_name_list image_list, _ = read_labeled_image_list('', self.conf.test_data_list) # Predict! for step in range(self.conf.test_num_steps): preds, raw_preds = self.sess.run([self.pred, raw_output_]) img_name = image_list[step].split('/')[2].split('.')[0] # Save raw predictions, i.e. each pixel is an integer between [0,20]. prior1 = self.conf.prior im = Image.fromarray(preds[0, :, :, 0], mode='L') filename = '/%s_mask.png' % (img_name) im.save(self.conf.out_dir + '/prediction' + '/' + str(prior1) + filename) # Save predictions for visualization. # See utils/label_utils.py for color setting # Need to be modified based on datasets. if self.conf.visual: msk = decode_labels(preds, num_classes=self.conf.num_classes) im = Image.fromarray(msk[0], mode='RGB') filename = '/%s_mask_visual.png' % (img_name) im.save(self.conf.out_dir + '/visual_prediction' + '/' + str(prior1) + filename) if step % 100 == 0: print('step {:d}'.format(step)) print( 'The output files has been saved to {}'.format(self.conf.out_dir) + normal_color) # finish self.coord.request_stop() self.coord.join(threads)
def image_summary(image, truth, prediction, image_mean, image_std=None, num_classes=2, max_output=10): """ :param image: 4-D array(N, H, W, 3) :param truth: 4-D array(N, H, W, 1) :param prediction: 4-D array(N, H, W, 1) :param image_mean: [B,G,R] :param image_std: [B,G,R] :param num_classes: scalar :param max_output: scalar, must be less than N :return: 4-D array(max_output, H, 3*W, 3) """ images = inv_preprocess(image, max_output, image_mean, image_std) labels = decode_labels(truth, max_output, num_classes) predictions = decode_labels(prediction, max_output, num_classes) return np.concatenate([images, labels, predictions], axis=2)
def predict(self): self.predict_setup() self.sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) # checkpoing_file checkpoint_file = tf.train.latest_checkpoint(self.conf.modeldir) if (not os.path.exists("{}.meta".format(checkpoint_file))) and (self.conf.pretrain_file is not None): self.load(self.loader, self.conf.pretrain_file) elif os.path.exists("{}.meta".format(checkpoint_file)): self.load(self.loader, checkpoint_file) # Start queue threads. threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess) # img_name_list image_list, _ = read_labeled_image_list('', self.conf.test_data_list) # Predict! for step in range(self.conf.test_num_steps): preds = self.sess.run(self.pred) img_name = image_list[step].split('/')[2].split('.')[0] # Save raw predictions, i.e. each pixel is an integer between [0,20]. im = Image.fromarray(preds[0, :, :, 0], mode='L') filename = '/%s_mask.png' % (img_name) im.save(self.conf.out_dir + '/prediction' + filename) # Save predictions for visualization. if self.conf.visual: msk = decode_labels(preds, num_classes=self.conf.num_classes) im = Image.fromarray(msk[0], mode='RGB') im.save(self.conf.out_dir + '/visual_prediction' + '/{}_mask_visual.png'.format(img_name)) # 原图 origin_image = "{}{}".format(self.conf.data_dir, image_list[step]) im = Image.open(origin_image) filename, ext = os.path.splitext(filename) im.save(self.conf.out_dir + '/visual_prediction' + filename + "_original" + ext) if step % 100 == 0: print('step {:d}'.format(step)) print('The output files has been saved to {}'.format(self.conf.out_dir)) # finish self.coord.request_stop() self.coord.join(threads) pass
def predict(self): self.predict_setup() self.sess.run([ tf.local_variables_initializer(), tf.global_variables_initializer() ]) # load checkpoint self.load( self.loader, self.conf.modeldir + '/model.ckpt-' + str(self.conf.valid_step)) # Start queue threads. threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess) # img_name_list image_list, _ = read_labeled_image_list('', self.conf.test_data_list) # Predict! for step in range(self.conf.test_num_steps): preds = self.sess.run(self.pred) img_name = image_list[step].split('/')[2].split('.')[0] # Save raw predictions, i.e. each pixel is an integer between [0,20]. im = Image.fromarray(preds[0, :, :, 0], mode='L') im.save(self.conf.out_dir + '/prediction/{}_mask.png'.format(img_name)) if self.conf.visual: msk = decode_labels(preds, num_classes=self.conf.num_classes) im = Image.fromarray(msk[0], mode='RGB') im.save( self.conf.out_dir + '/visual_prediction/{}_mask_visual.png'.format(img_name)) if step % 100 == 0: print('step {:d}'.format(step)) print('The output files has been saved to {}'.format( self.conf.out_dir)) # finish self.coord.request_stop() self.coord.join(threads) pass
def predict(self): self.predict_setup() self.sess.run(tf.global_variables_initializer()) self.sess.run(tf.local_variables_initializer()) # load checkpoint checkpointfile = self.conf.modeldir+ '/model.ckpt-' + str(self.conf.valid_step) self.load(self.loader, checkpointfile) # Start queue threads. threads = tf.train.start_queue_runners(coord=self.coord, sess=self.sess) # img_name_list image_list, _ = read_labeled_image_list('', self.conf.test_data_list) # Predict! for step in range(self.conf.test_num_steps): preds = self.sess.run(self.pred) img_name = image_list[step].split('/')[2].split('.')[0] # Save raw predictions, i.e. each pixel is an integer between [0,20]. im = Image.fromarray(preds[0,:,:,0], mode='L') filename = '/%s_mask.png' % (img_name) im.save(self.conf.out_dir + '/prediction' + filename) # Save predictions for visualization. # See utils/label_utils.py for color setting # Need to be modified based on datasets. if self.conf.visual: msk = decode_labels(preds, num_classes=self.conf.num_classes) im = Image.fromarray(msk[0], mode='RGB') filename = '/%s_mask_visual.png' % (img_name) im.save(self.conf.out_dir + '/visual_prediction' + filename) if step % 100 == 0: print('step {:d}'.format(step)) print('The output files has been saved to {}'.format(self.conf.out_dir)) # finish self.coord.request_stop() self.coord.join(threads)
def build_computational_graph(self): """Build the ICNet with ResNet50 backbone. and 4 level PSP module.""" def bottleneck_module(inputs, lvl, pad, is_training, filters, strides, data_format='channels_last', bottleneck_factor=4): """ Implement the bottleneck module proposed in ResNet. 1x1 conv -> 3x3 conv -> 1x1 conv """ # 1x1 reduce component x = tf.layers.conv2d(inputs, filters=filters // bottleneck_factor, kernel_size=1, strides=strides, data_format=data_format, use_bias=False, name="conv{}_1x1_reduce".format(lvl)) x = tf.layers.batch_normalization( x, momentum=0.95, epsilon=1e-5, training=is_training, name="conv{}_1x1_reduce_bn".format(lvl)) x = tf.nn.relu(x) # 3x3 component x = zero_padding(x, pad) x = tf.layers.conv2d(x, filters=filters // bottleneck_factor, kernel_size=3, strides=1, dilation_rate=pad, data_format=data_format, use_bias=False, name="conv{}_3x3".format(lvl)) x = tf.layers.batch_normalization(x, momentum=0.95, epsilon=1e-5, training=is_training, name="conv{}_3x3_bn".format(lvl)) x = tf.nn.relu(x) # 1x1 increase component x = tf.layers.conv2d(x, filters=filters, kernel_size=1, strides=1, data_format=data_format, use_bias=False, name="conv{}_1x1_increase".format(lvl)) x = tf.layers.batch_normalization( x, momentum=0.95, epsilon=1e-5, training=is_training, name="conv{}_1x1_increase_bn".format(lvl)) # 1x1 project (if needed) if data_format == "channels_last": _, h, w, d = inputs.get_shape().as_list() _, hh, ww, dd = x.get_shape().as_list() else: _, d, h, w = inputs.get_shape().as_list() _, dd, hh, ww = x.get_shape().as_list() if h != hh or d != dd: conv_proj = tf.layers.conv2d( inputs, filters, kernel_size=1, strides=strides, use_bias=False, name="conv{}_1x1_proj".format(lvl)) conv_proj_bn = tf.layers.batch_normalization( conv_proj, momentum=0.95, epsilon=1e-5, training=is_training, name="conv{}_1x1_proj_bn".format(lvl)) out = x + conv_proj_bn else: out = x + inputs return tf.nn.relu(out) def build_dilated_residual_network(input_layer): """Construct a 34-layer variant dilated residual network.""" is_training = self.placeholders["is_training"] conv1_1 = tf.layers.conv2d(input_layer, filters=32, kernel_size=3, strides=2, padding="same", use_bias=False, name="conv1_1_3x3_s2") conv1_1_bn = tf.layers.batch_normalization( conv1_1, momentum=0.95, epsilon=1e-5, training=is_training, name="conv1_1_3x3_s2_bn") conv1_1_relu = tf.nn.relu(conv1_1_bn) conv1_2 = tf.layers.conv2d(conv1_1_relu, filters=32, kernel_size=3, strides=1, padding="same", use_bias=False, name="conv1_2_3x3") conv1_2_bn = tf.layers.batch_normalization(conv1_2, momentum=0.95, epsilon=1e-5, training=is_training, name="conv1_2_3x3_bn") conv1_2_relu = tf.nn.relu(conv1_2_bn) conv1_3 = tf.layers.conv2d(conv1_2_relu, filters=64, kernel_size=3, strides=1, padding="same", use_bias=False, name="conv1_3_3x3") conv1_3_bn = tf.layers.batch_normalization(conv1_3, momentum=0.95, epsilon=1e-5, training=is_training, name="conv1_3_3x3_bn") conv1_3_relu = tf.nn.relu(conv1_3_bn) padding0 = zero_padding(conv1_3_relu, paddings=1) pool1 = tf.layers.max_pooling2d(padding0, pool_size=3, strides=2, padding='valid', name="pool1") conv2_1_block = bottleneck_module(pool1, lvl="2_1", pad=1, is_training=is_training, filters=128, strides=1) conv2_2_block = bottleneck_module(conv2_1_block, lvl="2_2", pad=1, is_training=is_training, filters=128, strides=1) conv2_3_block = bottleneck_module(conv2_2_block, lvl="2_3", pad=1, is_training=is_training, filters=128, strides=1) conv3_1_block = bottleneck_module(conv2_3_block, lvl="3_1", pad=1, is_training=is_training, filters=256, strides=2) # We share weights for the low and med resolution levels; # conv3_1_sub4 is a hook into the end of med resolution level conv3_1_sub4 = tf.image.resize_bilinear( conv3_1_block, tf.shape(conv3_1_block)[1:-1] // 2, align_corners=True, name="conv3_1_sub4") conv3_2_block = bottleneck_module(conv3_1_sub4, lvl="3_2", pad=1, is_training=is_training, filters=256, strides=1) conv3_3_block = bottleneck_module(conv3_2_block, lvl="3_3", pad=1, is_training=is_training, filters=256, strides=1) conv3_4_block = bottleneck_module(conv3_3_block, lvl="3_4", pad=1, is_training=is_training, filters=256, strides=1) # Pad is used as dilation rate internally in bottleneck module conv4_1_block = bottleneck_module(conv3_4_block, lvl="4_1", pad=2, is_training=is_training, filters=512, strides=1) conv4_2_block = bottleneck_module(conv4_1_block, lvl="4_2", pad=2, is_training=is_training, filters=512, strides=1) conv4_3_block = bottleneck_module(conv4_2_block, lvl="4_3", pad=2, is_training=is_training, filters=512, strides=1) conv4_4_block = bottleneck_module(conv4_3_block, lvl="4_4", pad=2, is_training=is_training, filters=512, strides=1) conv4_5_block = bottleneck_module(conv4_4_block, lvl="4_5", pad=2, is_training=is_training, filters=512, strides=1) conv4_6_block = bottleneck_module(conv4_5_block, lvl="4_6", pad=2, is_training=is_training, filters=512, strides=1) conv5_1_block = bottleneck_module(conv4_6_block, lvl="5_1", pad=4, is_training=is_training, filters=1024, strides=1) conv5_2_block = bottleneck_module(conv5_1_block, lvl="5_2", pad=4, is_training=is_training, filters=1024, strides=1) conv5_3_block = bottleneck_module(conv5_2_block, lvl="5_3", pad=4, is_training=is_training, filters=1024, strides=1) return conv3_1_block, conv5_3_block input_shape = tf.shape(self.placeholders["image"]) processed_image = self.placeholders["image"] is_training = self.placeholders["is_training"] with tf.variable_scope("ICNet"): # Assume NHWC data_sub2 = tf.image.resize_bilinear(processed_image, (input_shape[1:-1] // 2), align_corners=True, name="data_sub2") conv3_1, drn = build_dilated_residual_network(data_sub2) # According to paper: "1/4 sized image is fed into PSPNet with # downsampling rate 8, resulting in a 1/32-resolution feature map". # However, according to author's cityscape prototxt, we feed a # 1/2 sized image into PSPNet with downsampling rate 16. Either way # results in 1/32 resolution feature map; compute those dimensions h, w = self.input_shape[0] // 32, self.input_shape[1] // 32 pool_sizes = strides_list = [(h, w), (h / 2, w / 2), (h / 3, w / 3), (h / 4, w / 4)] # These are used to match names pretrained weights level_indices = [1, 2, 3, 6] psp = pyramid_pooling_module(drn, filters=256, pool_sizes=pool_sizes, strides_list=strides_list, level_indices=level_indices, is_training=is_training, name_prefix="conv5_3", convolve=False) conv5_4 = tf.layers.conv2d(psp, filters=256, kernel_size=1, strides=1, padding="same", use_bias=False, name="conv5_4_k1") conv5_4_bn = tf.layers.batch_normalization(conv5_4, momentum=0.95, epsilon=1e-5, training=is_training, name="conv5_4_k1_bn") conv5_4_bn = tf.nn.relu(conv5_4_bn) # Build light high resolution CNN on top of input conv1_sub1 = tf.layers.conv2d(processed_image, kernel_size=3, filters=32, strides=2, padding="same", use_bias=False, name="conv1_sub1") conv1_sub1_bn = tf.layers.batch_normalization(conv1_sub1, momentum=0.95, epsilon=1e-5, training=is_training, name="conv1_sub1_bn") conv1_sub1_relu = tf.nn.relu(conv1_sub1_bn) conv2_sub1 = tf.layers.conv2d(conv1_sub1_relu, kernel_size=3, filters=32, strides=2, padding="same", use_bias=False, name="conv2_sub1") conv2_sub1_bn = tf.layers.batch_normalization(conv2_sub1, momentum=0.95, epsilon=1e-5, training=is_training, name="conv2_sub1_bn") conv2_sub1_relu = tf.nn.relu(conv2_sub1_bn) conv3_sub1 = tf.layers.conv2d(conv2_sub1_relu, kernel_size=3, filters=64, strides=2, padding="same", use_bias=False, name="conv3_sub1") conv3_sub1_bn = tf.layers.batch_normalization(conv3_sub1, momentum=0.95, epsilon=1e-5, training=is_training, name="conv3_sub1_bn") conv3_sub1_relu = tf.nn.relu(conv3_sub1_bn) # Do cascade feature fusion for sub24 sub24_cff_names = { "f1_conv": "conv_sub4", "f1_bn": "conv_sub4_bn", "f2_conv": "conv3_1_sub2_proj", "f2_bn": "conv3_1_sub2_proj_bn", "out": "sub24_sum" } conv5_4_interp, sub24_sum_relu = cascade_feature_fusion_module( f1=conv5_4_bn, f2=conv3_1, c3=128, is_training=is_training, names=sub24_cff_names) # Do cascade feature fusion for sub12 sub12_cff_names = { "f1_conv": "conv_sub2", "f1_bn": "conv_sub2_bn", "f2_conv": "conv3_sub1_proj", "f2_bn": "conv3_sub1_proj_bn", "out": "sub12_sum" } sub24_sum_interp, sub12_sum_relu = cascade_feature_fusion_module( f1=sub24_sum_relu, f2=conv3_sub1_relu, c3=128, is_training=is_training, names=sub12_cff_names) # Get the sub outputs to use in cascade label guidance low_res_logits = tf.layers.conv2d(conv5_4_interp, kernel_size=1, filters=self.num_classes, strides=1, name="sub4_out") med_res_logits = tf.layers.conv2d(sub24_sum_interp, kernel_size=1, filters=self.num_classes, strides=1, name="sub24_out") # interpolate to output feature map size (1/4 input) and project to # final number of classes to get logits output_shape = (self.input_shape[0] // 4, self.input_shape[1] // 4) sub12_sum_interp = tf.image.resize_bilinear( sub12_sum_relu, output_shape, name="sub12_sum_interp") conv6_cls = tf.layers.conv2d(sub12_sum_interp, kernel_size=1, filters=self.num_classes, strides=1, name="conv6_cls") high_res_logits = conv6_cls # Upscale the logits and decode prediction to get final result. logits_up = tf.image.resize_bilinear(high_res_logits, size=self.input_shape, align_corners=True) logits_up_cropped = tf.image.crop_to_bounding_box( logits_up, 0, 0, self.input_shape[0], self.input_shape[1]) # Create output node for evaluation raw_predict = tf.argmax(logits_up, axis=3) predict = tf.expand_dims(raw_predict, axis=3, name="predict") # Create output node for inference output_classes = tf.argmax(logits_up_cropped, axis=3) output = decode_labels(output_classes, self.input_shape, self.num_classes) return low_res_logits, med_res_logits, high_res_logits, predict, output
def _train_epoch(self, epoch): self.model.train() epoch_start = time.time() batch_start = time.time() train_loss = 0. running_metric_melons = runningScore(3) lr = self.optimizer.param_groups[0]['lr'] for i, batch in enumerate(self.train_loader): if i >= self.train_loader_len: break self.global_step += 1 lr = self.optimizer.param_groups[0]['lr'] print(self.optimizer, self.config['local_rank']) # 数据进行转换和丢到gpu for key, value in batch.items(): if value is not None: if isinstance(value, torch.Tensor): batch[key] = value.to(self.device) cur_batch_size = batch['img'].size()[0] # print('image name :',batch['img_name']) self.optimizer.zero_grad() preds = self.model(batch['img']) loss_dict = self.criterion(preds, batch) # backward if isinstance(preds, tuple): preds = preds[0] # print('preds:', preds.shape) # 反向传播时:在求导时开启侦测 # print(loss_dict['loss']) # exit() reduce_loss = self.all_reduce_tensor(loss_dict['loss']) with torch.autograd.detect_anomaly(): # loss.backward() loss_dict['loss'].backward() self.optimizer.step() if self.config['lr_scheduler']['type'] == 'WarmupPolyLR': self.scheduler.step() # acc iou target = batch['label'] h, w = target.size(1), target.size(2) scale_pred = F.interpolate(input=preds, size=(h, w), mode='bilinear', align_corners=True) label_preds = torch.argmax(scale_pred, dim=1) running_metric_melons.update(target.data.cpu().numpy(), label_preds.data.cpu().numpy()) score_, _ = running_metric_melons.get_scores() # loss 和 acc 记录到日志 loss_str = 'loss: {:.4f}, '.format(reduce_loss.item()) for idx, (key, value) in enumerate(loss_dict.items()): loss_dict[key] = value.item() if key == 'loss': continue loss_str += '{}: {:.4f}'.format(key, loss_dict[key]) if idx < len(loss_dict) - 1: loss_str += ', ' train_loss += loss_dict['loss'] print(train_loss / self.train_loader_len, self.config['local_rank']) acc = score_['Mean Acc'] iou_Mean_map = score_['Mean IoU'] if self.global_step % self.log_iter == 0: batch_time = time.time() - batch_start self.logger_info( '[{}/{}], [{}/{}], global_step: {}, speed: {:.1f} samples/sec, acc: {:.4f}, iou_Mean_map: {:.4f}, {}, lr:{:.6}, time:{:.2f}' .format(epoch, self.epochs, i + 1, self.train_loader_len, self.global_step, self.log_iter * cur_batch_size / batch_time, acc, iou_Mean_map, loss_str, lr, batch_time)) batch_start = time.time() # print('loss_str', loss_str) if self.tensorboard_enable and self.config['local_rank'] == 0: # write tensorboard for key, value in loss_dict.items(): self.writer.add_scalar('TRAIN/LOSS/{}'.format(key), value, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/acc', acc, self.global_step) self.writer.add_scalar('TRAIN/ACC_IOU/iou_Mean_map', iou_Mean_map, self.global_step) self.writer.add_scalar('TRAIN/lr', lr, self.global_step) if self.global_step % self.show_images_iter == 0: # show images on tensorboard self.inverse_normalize(batch['img']) preds_colors = decode_predictions(preds, cur_batch_size, 3) self.writer.add_images('TRAIN/imgs', batch['img'][0].unsqueeze(0), self.global_step) target = batch['label'] # (8, 256, 320, 3) targets_colors = decode_labels(target, cur_batch_size, 3) self.writer.add_image('TRAIN/labels', targets_colors[0], self.global_step, dataformats='HWC') self.writer.add_image('TRAIN/preds', preds_colors[0], self.global_step, dataformats='HWC') return { 'train_loss': train_loss / self.train_loader_len, 'lr': lr, 'time': time.time() - epoch_start, 'epoch': epoch, 'MeanIoU': iou_Mean_map }
def test(test_loader, net, criterion, epoch, showall=False): cnn0_loss, cnn0_accs, cnn0_mIoUs, cnn0_acc_clss, cnn0_fscore = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() cnn1_loss, cnn1_accs, cnn1_mIoUs, cnn1_acc_clss, cnn1_fscore = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() cnn2_loss, cnn2_accs, cnn2_mIoUs, cnn2_acc_clss, cnn2_fscore = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() # switch to evaluation mode net.eval() start_time = time.time() for batch_idx, (datas, targets) in enumerate(test_loader): if args.cuda: datas = datas.cuda() datas = Variable(datas, volatile=True) # compute output scores = net(datas) multi_targets = combine_label(targets, COMB_DICTs) multi_targets_tensor = torch.from_numpy(multi_targets).long() if args.cuda: multi_targets_tensor = multi_targets_tensor.cuda() testlosses = [] for i, score in enumerate(scores): targets_i = Variable(multi_targets_tensor[i, :, :, :]) testlosses.append(criterion(score, targets_i)) testloss = sum(testlosses) # measure accuracy and record loss preds = [] for score in scores: p = score.data.max(1)[1] preds.append(p) for i, lbl_pred in enumerate(preds): lbl_pred = lbl_pred.cpu().numpy()[:, :, :] # (n_batch, h, w) lbl_true = multi_targets[i, :, :, :] acc, acc_cls, mIoU, fscore = label_accuracy_score( lbl_true, lbl_pred, n_class=NUM_CLASSES[i]) locals()['cnn%d_loss' % (i)].update(testlosses[i].data[0], datas.size(0)) locals()['cnn%d_accs' % (i)].update(acc, datas.size(0)) locals()['cnn%d_acc_clss' % (i)].update(acc_cls, datas.size(0)) locals()['cnn%d_mIoUs' % (i)].update(mIoU, datas.size(0)) locals()['cnn%d_fscore' % (i)].update(fscore, datas.size(0)) if showall: trues = decode_labels(targets, num_images=len(targets)) for i, t in enumerate(trues): Image.fromarray(t).save( 'runs/{}_{}/results/{}_{}_gt.png'.format( args.name, args.dataset, batch_idx, i)) Image.fromarray( (unNormalize(datas.data).transpose(1, 2).transpose( 2, 3).cpu().numpy()[i] * 255).astype(np.uint8)).save( 'runs/{}_{}/results/{}_{}_img.png'.format( args.name, args.dataset, batch_idx, i)) lbl_pred = preds[2] pred = decode_labels(lbl_pred, num_images=len(lbl_pred)) for i, p in enumerate(pred): Image.fromarray(p).save( 'runs/{}_{}/results/{}_{}_pred.png'.format( args.name, args.dataset, batch_idx, i)) if showall and args.visdom: plot_images(datas, [p.cpu().numpy() for p in preds], multi_targets, epoch, split='test', crop_size=map(int, args.input_size.split(','))) duration = time.time() - start_time print( '\nTest set: Loss: {:.4f}, Acc: {:.2f}%, mIoU: {:.4f}, Acc_cls: {:.2f}%, f-score: {:.2f}% ({:.3f} sec)\n' .format(cnn2_loss.avg, 100. * cnn2_accs.avg, cnn2_mIoUs.avg, 100 * cnn2_acc_clss.avg, 100 * cnn2_fscore.avg, duration)) if args.visdom: for i in range(3): plotter.plot('cnn%d_acc' % (i), 'test', epoch, locals()['cnn%d_accs' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_loss' % (i), 'test', epoch, locals()['cnn%d_loss' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_mIoU' % (i), 'test', epoch, locals()['cnn%d_mIoUs' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_acc_cls' % (i), 'test', epoch, locals()['cnn%d_acc_clss' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_fscore' % (i), 'test', epoch, locals()['cnn%d_fscore' % (i)].avg, exp_name=args.name + '_' + args.dataset) # plot images in a grid if epoch == 1 or epoch % 10 == 0: plot_images(datas, [p.cpu().numpy() for p in preds], multi_targets, epoch, split='test', crop_size=map(int, args.input_size.split(','))) return cnn2_accs.avg, cnn2_mIoUs.avg