def train(args, train_loader, model, criterion, optimizer, epoch, scheduler=None): losses = AverageMeter() ious = AverageMeter() dices_1s = AverageMeter() dices_2s = AverageMeter() model.train() for i, (input, target) in tqdm(enumerate(train_loader), total=len(train_loader)): #print(input.shape) #print(target.shape) #v = input() input = input.cuda() target = target.cuda() # compute output if args.deepsupervision: outputs = model(input) loss = 0 for output in outputs: loss += criterion(output, target) loss /= len(outputs) iou = iou_score(outputs[-1], target) else: output = model(input) loss = criterion(output, target) iou = iou_score(output, target) dice_1 = dice_coef(output, target)[0] dice_2 = dice_coef(output, target)[1] losses.update(loss.item(), input.size(0)) ious.update(iou, input.size(0)) dices_1s.update(torch.tensor(dice_1), input.size(0)) dices_2s.update(torch.tensor(dice_2), input.size(0)) # compute gradient and do optimizing step optimizer.zero_grad() loss.backward() optimizer.step() log = OrderedDict([('loss', losses.avg), ('iou', ious.avg), ('dice_1', dices_1s.avg), ('dice_2', dices_2s.avg)]) return log
def compute_ious(df, max_dist=10): """ Computes ious between boxes. If boxes are too far the iou is set to 0. Args: df (pandas dataframe): Predicted boxes. max_dist (int, optional): Maximum frame distance to compute iou for. Defaults to 10. Returns: np array [len(df) x len(df)]: ious between boxes. """ ious = np.zeros((len(df), len(df))) for i in range(len(df)): for j in range(len(df)): frames = df["frame"].values[[i, j]] if np.abs(frames[0] - frames[1]) > max_dist: continue try: boxes = df[["left", "width", "top", "height"]].values[[i, j]] except KeyError: boxes = df[["x", "w", "y", "h"]].values[[i, j]] boxes[:, 1] += boxes[:, 0] boxes[:, 3] += boxes[:, 2] boxes = boxes[:, [0, 2, 1, 3]] iou = iou_score(boxes[0], boxes[1]) ious[i, j] = iou ious[j, i] = iou return ious
def validate(args, val_loader, model, criterion): losses = AverageMeter() ious = AverageMeter() dices_1s = AverageMeter() dices_2s = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): for i, (input, target) in tqdm(enumerate(val_loader), total=len(val_loader)): input = input.cuda() target = target.cuda() # compute output if args.deepsupervision: outputs = model(input) loss = 0 for output in outputs: loss += criterion(output, target) loss /= len(outputs) iou = iou_score(outputs[-1], target) else: output = model(input) loss = criterion(output, target) iou = iou_score(output, target) dice_1 = dice_coef(output, target)[0] dice_2 = dice_coef(output, target)[1] losses.update(loss.item(), input.size(0)) ious.update(iou, input.size(0)) dices_1s.update(torch.tensor(dice_1), input.size(0)) dices_2s.update(torch.tensor(dice_2), input.size(0)) log = OrderedDict([('loss', losses.avg), ('iou', ious.avg), ('dice_1', dices_1s.avg), ('dice_2', dices_2s.avg)]) return log
def __call__(self, features, labels, mode, params): if "debug_verbosity" not in params.keys(): raise RuntimeError("Parameter `debug_verbosity` is missing...") if mode == tf.estimator.ModeKeys.TRAIN: if "rmsprop_decay" not in params.keys(): raise RuntimeError("Parameter `rmsprop_decay` is missing...") if "rmsprop_momentum" not in params.keys(): raise RuntimeError( "Parameter `rmsprop_momentum` is missing...") if "learning_rate" not in params.keys(): raise RuntimeError("Parameter `learning_rate` is missing...") if "learning_rate_decay_steps" not in params.keys(): raise RuntimeError("Parameter `learning_rate` is missing...") if "learning_rate_decay_factor" not in params.keys(): raise RuntimeError("Parameter `learning_rate` is missing...") if "weight_decay" not in params.keys(): raise RuntimeError("Parameter `weight_decay` is missing...") if "loss_fn_name" not in params.keys(): raise RuntimeError("Parameter `loss_fn_name` is missing...") if mode == tf.estimator.ModeKeys.PREDICT: y_pred, y_pred_logits = self.build_model( features, training=False, reuse=False, debug_verbosity=params["debug_verbosity"]) predictions = {'logits': y_pred} return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) input_image, mask_image = features with tf.device("/gpu:0"): tf.identity(input_image, name="input_image_ref") tf.identity(mask_image, name="mask_image_ref") tf.identity(labels, name="labels_ref") y_pred, y_pred_logits = self.build_model( input_image, training=mode == tf.estimator.ModeKeys.TRAIN, reuse=False, debug_verbosity=params["debug_verbosity"]) all_trainable_vars = tf.reduce_sum( [tf.reduce_prod(v.shape) for v in tf.trainable_variables()]) tf.identity(all_trainable_vars, name='trainable_parameters_count_ref') if mode == tf.estimator.ModeKeys.EVAL: eval_metrics = dict() # ==================== Samples ==================== # image_uint8 = tf.cast((input_image + 1) * 127.5, dtype=tf.uint8) input_image_jpeg = tf.image.encode_jpeg(image_uint8[0], format='grayscale', quality=100) tf.identity(input_image_jpeg, name="input_image_jpeg_ref") for threshold in [ None, 0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99 ]: binarize_img, binarize_img_jpeg = image_processing.binarize_output( y_pred[0], threshold=threshold) tf.identity(binarize_img_jpeg, name="output_sample_ths_%s_ref" % threshold) tf.summary.image('output_sample_ths_%s' % threshold, binarize_img, 10) # ==============+ Evaluation Metrics ==================== # with tf.name_scope("IoU_Metrics"): for threshold in [ 0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99 ]: iou_score = metrics.iou_score(y_pred=y_pred, y_true=mask_image, threshold=threshold) tf.identity(iou_score, name='iou_score_ths_%s_ref' % threshold) tf.summary.scalar('iou_score_ths_%s' % threshold, iou_score) if mode == tf.estimator.ModeKeys.EVAL: eval_metrics["IoU_THS_%s" % threshold] = tf.metrics.mean(iou_score) labels = tf.cast(labels, tf.float32) labels_preds = tf.reduce_max(y_pred, axis=(1, 2, 3)) assert (abs(labels_preds - tf.clip_by_value(labels_preds, 0, 1)) < 0.00001, "Clipping labels_preds introduces non-trivial loss.") labels_preds = tf.clip_by_value(labels_preds, 0, 1) with tf.variable_scope("Confusion_Matrix") as scope: tp, update_tp = tf.metrics.true_positives_at_thresholds( labels=labels, predictions=labels_preds, thresholds=[ 0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99 ], ) tn, update_tn = tf.metrics.true_negatives_at_thresholds( labels=labels, predictions=labels_preds, thresholds=[ 0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99 ], ) fp, update_fp = tf.metrics.false_positives_at_thresholds( labels=labels, predictions=labels_preds, thresholds=[ 0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99 ], ) fn, update_fn = tf.metrics.false_negatives_at_thresholds( labels=labels, predictions=labels_preds, thresholds=[ 0.05, 0.125, 0.25, 0.5, 0.75, 0.85, 0.95, 0.99 ], ) if mode == tf.estimator.ModeKeys.TRAIN: local_vars = tf.get_collection( tf.GraphKeys.LOCAL_VARIABLES, scope=scope.name) confusion_matrix_reset_op = tf.initializers.variables( local_vars, name='reset_op') with tf.control_dependencies([confusion_matrix_reset_op]): with tf.control_dependencies( [update_tp, update_tn, update_fp, update_fn]): tp = tf.identity(tp) tn = tf.identity(tn) fp = tf.identity(fp) fn = tf.identity(fn) else: eval_metrics["Confusion_Matrix_TP"] = tp, update_tp eval_metrics["Confusion_Matrix_TN"] = tn, update_tn eval_metrics["Confusion_Matrix_FP"] = fp, update_fp eval_metrics["Confusion_Matrix_FN"] = fn, update_fn tf.identity(tp, name='true_positives_ref' ) # Confusion_Matrix/true_positives_ref:0 tf.identity(tn, name='true_negatives_ref' ) # Confusion_Matrix/true_negatives_ref:0 tf.identity(fp, name='false_positives_ref' ) # Confusion_Matrix/false_positives_ref:0 tf.identity(fn, name='false_negatives_ref' ) # Confusion_Matrix/false_negatives_ref:0 tf.summary.scalar('true_positives', tp[3]) # For Ths = 0.5 tf.summary.scalar('true_negatives', tn[3]) # For Ths = 0.5 tf.summary.scalar('false_positives', fp[3]) # For Ths = 0.5 tf.summary.scalar('false_negatives', fn[3]) # For Ths = 0.5 binarized_mask, binarized_mask_jpeg = image_processing.binarize_output( mask_image[0], threshold=0.5) tf.identity(binarized_mask_jpeg, name="mask_sample_ref") tf.summary.image('sample_mask', binarized_mask, 10) ########################## mask_max_val = tf.reduce_max(mask_image) tf.identity(mask_max_val, name='mask_max_val_ref') mask_min_val = tf.reduce_min(mask_image) tf.identity(mask_min_val, name='mask_min_val_ref') mask_mean_val = tf.reduce_mean(mask_image) tf.identity(mask_mean_val, name='mask_mean_val_ref') mask_std_val = tf.math.reduce_std(mask_image) tf.identity(mask_std_val, name='mask_std_val_ref') ########################## output_max_val = tf.reduce_max(y_pred) tf.identity(output_max_val, name='output_max_val_ref') output_min_val = tf.reduce_min(y_pred) tf.identity(output_min_val, name='output_min_val_ref') output_mean_val = tf.reduce_mean(y_pred) tf.identity(output_mean_val, name='output_mean_val_ref') output_std_val = tf.math.reduce_std(y_pred) tf.identity(output_std_val, name='output_std_val_ref') with tf.variable_scope("losses"): # ==============+ Reconstruction Loss ==================== # if params["loss_fn_name"] == "x-entropy": reconstruction_loss = losses.reconstruction_x_entropy( y_pred=y_pred, y_true=mask_image) elif params["loss_fn_name"] == "l2_loss": reconstruction_loss = losses.reconstruction_l2loss( y_pred=y_pred, y_true=mask_image) elif params["loss_fn_name"] == "dice_sorensen": reconstruction_loss = 1 - losses.dice_coe( y_pred=y_pred, y_true=mask_image, loss_type='sorensen') elif params["loss_fn_name"] == "dice_jaccard": reconstruction_loss = 1 - losses.dice_coe( y_pred=y_pred, y_true=mask_image, loss_type='jaccard') elif params["loss_fn_name"] == "adaptive_loss": reconstruction_loss = losses.adaptive_loss( y_pred=y_pred, y_pred_logits=y_pred_logits, y_true=mask_image, switch_at_threshold=0.3, loss_type='sorensen') else: raise ValueError("Unknown loss function received: %s" % params["loss_fn_name"]) tf.identity(reconstruction_loss, name='reconstruction_loss_ref') tf.summary.scalar('reconstruction_loss', reconstruction_loss) if mode == tf.estimator.ModeKeys.TRAIN: # ============== Regularization Loss ==================== # l2_loss = losses.regularization_l2loss( weight_decay=params["weight_decay"]) tf.identity(l2_loss, name='l2_loss_ref') tf.summary.scalar('l2_loss', l2_loss) total_loss = tf.add(reconstruction_loss, l2_loss, name="total_loss") else: total_loss = reconstruction_loss tf.identity(total_loss, name='total_loss_ref') tf.summary.scalar('total_loss', total_loss) if mode == tf.estimator.ModeKeys.TRAIN: with tf.variable_scope("optimizers"): # Update Global Step global_step = tf.train.get_or_create_global_step() tf.identity(global_step, name="global_step_ref") learning_rate = tf.train.exponential_decay( learning_rate=params["learning_rate"], decay_steps=params["learning_rate_decay_steps"], decay_rate=params["learning_rate_decay_factor"], global_step=global_step, staircase=True) tf.identity(learning_rate, name="learning_rate_ref") tf.summary.scalar('learning_rate_ref', learning_rate) opt = tf.train.RMSPropOptimizer( learning_rate=learning_rate, use_locking=False, centered=True, decay=params["rmsprop_decay"], momentum=params["rmsprop_momentum"], ) if hvd_utils.is_using_hvd(): # Apply gradient compression using GRACE. from grace_dl.tensorflow.communicator.allgather import Allgather from grace_dl.tensorflow.compressor.topk import TopKCompressor from grace_dl.tensorflow.memory.residual import ResidualMemory world_size = hvd.size() grc = Allgather(TopKCompressor(0.3), ResidualMemory(), world_size) opt = hvd.DistributedOptimizer(opt, grace=grc, device_dense='/gpu:0') if params["apply_manual_loss_scaling"]: # if not hvd_utils.is_using_hvd() or hvd.rank() == 0: # Logger.log("Applying manual Loss Scaling ...") loss_scale_manager = tf.contrib.mixed_precision.ExponentialUpdateLossScaleManager( init_loss_scale=2**32, # 4,294,967,296 incr_every_n_steps=1000) opt = tf.contrib.mixed_precision.LossScaleOptimizer( opt, loss_scale_manager) deterministic = True gate_gradients = (tf.train.Optimizer.GATE_OP if deterministic else tf.train.Optimizer.GATE_NONE) backprop_op = opt.minimize(total_loss, gate_gradients=gate_gradients, global_step=global_step) train_op = tf.group( backprop_op, tf.get_collection(tf.GraphKeys.UPDATE_OPS)) return tf.estimator.EstimatorSpec( mode, loss=total_loss, train_op=train_op, ) elif mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=total_loss, eval_metric_ops=eval_metrics, predictions={"output": y_pred}) else: raise NotImplementedError('Unknown mode {}'.format(mode))
def train(model, data_loader, criterion, optimizer, scheduler, num_epochs=5, epochs_earlystopping=10): logdir = './logs/' + time.strftime("%Y%m%d_%H%M%S") logdir = os.path.join(logdir) pathlib.Path(logdir).mkdir(parents=True, exist_ok=True) tb_writer = SummaryWriter(log_dir=logdir) best_acc = 0.0 best_loss = sys.float_info.max best_iou = 0.0 early_stopping = epochs_earlystopping for epoch in range(num_epochs): result = [] early_stopping += 1 for phase in ['train', 'val']: if phase == 'train': # put the model in training mode model.train() else: # put the model in validation mode model.eval() # keep track of training and validation loss batch_nums = 0 running_loss = 0.0 running_iou = 0.0 running_corrects = 0.0 for (data, labels) in data_loader[phase]: # load the data and target to respective device (data, labels) = (data.to(device), labels.to(device)) with torch.set_grad_enabled(phase == 'train'): # feed the input output = model(data) # calculate the loss loss = criterion(output, labels) if phase == 'train': # backward pass: compute gradient of the loss with respect to model parameters loss.backward() optimizer.step() # zero the grad to stop it from accumulating optimizer.zero_grad() # statistics batch_nums += 1 running_loss += loss.item() running_iou += iou_score(output, labels) running_corrects += multi_acc(output, labels) if phase == 'train': scheduler.step(running_iou) # epoch statistics epoch_loss = running_loss / batch_nums epoch_iou = running_iou / batch_nums epoch_acc = running_corrects / batch_nums result.append('{} Loss: {:.4f} Acc: {:.4f} IoU: {:.4f}'.format( phase, epoch_loss, epoch_acc, epoch_iou)) tb_writer.add_scalar('Loss/' + phase, epoch_loss, epoch) tb_writer.add_scalar('IoU/' + phase, epoch_iou, epoch) tb_writer.add_scalar('Accuracy/' + phase, epoch_acc, epoch) if phase == 'val' and epoch_iou > best_iou: early_stopping = 0 best_acc = epoch_acc best_loss = epoch_loss best_iou = epoch_iou saveCheckpoint(CHECKPOINT_PATH, epoch, model, optimizer, BATCH_SIZE) print( 'Checkpoint saved - Loss: {:.4f} Acc: {:.4f} IoU: {:.4f}'. format(epoch_loss, epoch_acc, epoch_iou)) print(result) if early_stopping == 10: break print('-----------------------------------------') print('Final Result: Loss: {:.4f} Acc: {:.4f}'.format(best_loss, best_acc)) print('-----------------------------------------')
def test_iou_score(self): a = np.array([1, 0, 0, 1]) b = np.array([1, 1, 0, 0]) iou = metrics.iou_score(a, b) expected = 1 / 3 np.testing.assert_almost_equal(iou, expected, decimal=3)