def get_train_utils(opt, model_parameters): assert opt.train_crop in ['random', 'corner', 'center'] spatial_transform = [] if opt.train_crop == 'random': spatial_transform.append( RandomResizedCrop( opt.sample_size, (opt.train_crop_min_scale, 1.0), (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio))) elif opt.train_crop == 'corner': scales = [1.0] scale_step = 1 / (2**(1 / 4)) for _ in range(1, 5): scales.append(scales[-1] * scale_step) spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales)) elif opt.train_crop == 'center': spatial_transform.append(Resize(opt.sample_size)) spatial_transform.append(CenterCrop(opt.sample_size)) normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if not opt.no_hflip: spatial_transform.append(RandomHorizontalFlip()) if opt.colorjitter: spatial_transform.append(ColorJitter()) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ScaleValue(opt.value_scale)) spatial_transform.append(normalize) spatial_transform = Compose(spatial_transform) assert opt.train_t_crop in ['random', 'center'] temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) if opt.train_t_crop == 'random': temporal_transform.append(TemporalRandomCrop(opt.sample_duration)) elif opt.train_t_crop == 'center': temporal_transform.append(TemporalCenterCrop(opt.sample_duration)) temporal_transform = TemporalCompose(temporal_transform) train_data = get_training_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size, shuffle=(train_sampler is None), num_workers=opt.n_threads, pin_memory=True, sampler=train_sampler, worker_init_fn=worker_init_fn) if opt.is_master_node: train_logger = Logger(opt.result_path / 'train.log', ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( opt.result_path / 'train_batch.log', ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) else: train_logger = None train_batch_logger = None if opt.nesterov: dampening = 0 else: dampening = opt.dampening optimizer = SGD(model_parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) assert opt.lr_scheduler in ['plateau', 'multistep'] assert not (opt.lr_scheduler == 'plateau' and opt.no_val) if opt.lr_scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, 'min', patience=opt.plateau_patience) else: scheduler = lr_scheduler.MultiStepLR(optimizer, opt.multistep_milestones) return (train_loader, train_sampler, train_logger, train_batch_logger, optimizer, scheduler)
def get_train_utils(opt, model_parameters): assert opt.train_crop in ['random', 'corner', 'center'] spatial_transform = [] if opt.train_crop == 'random': spatial_transform.append( RandomResizedCrop( opt.sample_size, (opt.train_crop_min_scale, 1.0), (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio))) elif opt.train_crop == 'corner': scales = [1.0] scale_step = 1 / (2**(1 / 4)) for _ in range(1, 5): scales.append(scales[-1] * scale_step) spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales)) elif opt.train_crop == 'center': spatial_transform.append(Resize(opt.sample_size)) spatial_transform.append(CenterCrop(opt.sample_size)) normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if not opt.no_hflip: spatial_transform.append(RandomHorizontalFlip()) spatial_transform.append(ToArray()) if opt.colorjitter: spatial_transform.append(ColorJitter()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ScaleValue(opt.value_scale)) spatial_transform.append(normalize) spatial_transform = Compose(spatial_transform) assert opt.train_t_crop in ['random', 'center'] temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) if opt.train_t_crop == 'random': temporal_transform.append(TemporalRandomCrop(opt.sample_duration)) elif opt.train_t_crop == 'center': temporal_transform.append(TemporalCenterCrop(opt.sample_duration)) temporal_transform = TemporalCompose(temporal_transform) train_data = get_training_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) train_loader = paddle.batch(train_data.reader, batch_size=opt.batch_size) train_logger = Logger(opt.result_path / 'train.log', ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( opt.result_path / 'train_batch.log', ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) assert opt.lr_scheduler in ['plateau', 'multistep'] assert not (opt.lr_scheduler == 'plateau' and opt.no_val) if opt.lr_scheduler == 'plateau': scheduler = ReduceLROnPlateau(learning_rate=opt.learning_rate, mode='min', patience=opt.plateau_patience) else: scheduler = MultiStepDecay(learning_rate=opt.learning_rate, milestones=opt.multistep_milestones) optimizer = fluid.optimizer.MomentumOptimizer( learning_rate=scheduler, momentum=opt.momentum, parameter_list=model_parameters, use_nesterov=opt.nesterov, regularization=fluid.regularizer.L2Decay( regularization_coeff=opt.weight_decay)) return (train_loader, train_logger, train_batch_logger, optimizer, scheduler)
def main_worker(): seed = 1 random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) opt = parse_opts() train_data = get_training_data(cfg) val_data = get_validation_data(cfg) train_loader = DataLoader(train_data, num_workers=opt.num_workers, collate_fn=collater, batch_size=opt.batch_size, shuffle=True) val_loader = DataLoader(val_data, num_workers=opt.num_workers, collate_fn=collater, batch_size=opt.batch_size, shuffle=True) print(f"Training dataset size : {len(train_loader.dataset)}") print(f"Validation dataset size : {len(val_loader.dataset)}") dataiterator = iter(train_loader) faster_rcnn = FasterRCNN() # if torch.cuda.device_count() > 1 and opt.multi_gpu : # print("Let's use", torch.cuda.device_count(), "GPUs!") # faster_rcnn = nn.DataParallel(faster_rcnn) # loading model from a ckpt if opt.weight_path: load_from_ckpt(opt, faster_rcnn) faster_rcnn.to(cfg.DEVICE) if opt.lr is not None: cfg.TRAIN.LEARNING_RATE = opt.lr lr = cfg.TRAIN.LEARNING_RATE print(f"Learning rate : {lr}") if opt.weight_decay is not None: cfg.TRAIN.WEIGHT_DECAY = opt.weight_decay print(f"Weight Decay : {cfg.TRAIN.WEIGHT_DECAY}") ### Optimizer ### # record backbone params, i.e., conv_body and box_head params backbone_bias_params = [] backbone_bias_param_names = [] prd_branch_bias_params = [] prd_branch_bias_param_names = [] backbone_nonbias_params = [] backbone_nonbias_param_names = [] prd_branch_nonbias_params = [] prd_branch_nonbias_param_names = [] for key, value in dict(faster_rcnn.named_parameters()).items(): if value.requires_grad: if 'fpn' in key or 'box_head' in key or 'box_predictor' in key or 'rpn' in key: if 'bias' in key: backbone_bias_params.append(value) backbone_bias_param_names.append(key) else: backbone_nonbias_params.append(value) backbone_nonbias_param_names.append(key) else: if 'bias' in key: prd_branch_bias_params.append(value) prd_branch_bias_param_names.append(key) else: prd_branch_nonbias_params.append(value) prd_branch_nonbias_param_names.append(key) params = [ { 'params': backbone_nonbias_params, 'lr': cfg.TRAIN.LEARNING_RATE, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }, { 'params': backbone_bias_params, 'lr': cfg.TRAIN.LEARNING_RATE * (cfg.TRAIN.DOUBLE_BIAS + 1), 'weight_decay': cfg.TRAIN.WEIGHT_DECAY if cfg.TRAIN.BIAS_DECAY else 0 }, { 'params': prd_branch_nonbias_params, 'lr': cfg.TRAIN.LEARNING_RATE, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY }, { 'params': prd_branch_bias_params, 'lr': cfg.TRAIN.LEARNING_RATE * (cfg.TRAIN.DOUBLE_BIAS + 1), 'weight_decay': cfg.TRAIN.WEIGHT_DECAY if cfg.TRAIN.BIAS_DECAY else 0 }, ] if cfg.TRAIN.TYPE == "ADAM": optimizer = torch.optim.Adam(params) elif cfg.TRAIN.TYPE == "SGD": optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM) # scheduler if opt.scheduler == "plateau": scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5) elif opt.scheduler == "multi_step": scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[83631, 111508]) elif opt.scheduler == "step_lr": scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1, last_epoch=-1) if opt.weight_path: opt.begin_iter = load_train_utils(opt, optimizer, scheduler) # lr of non-backbone parameters, for commmand line outputs. lr = optimizer.param_groups[0]['lr'] # lr of backbone parameters, for commmand line outputs. # backbone_lr = optimizer.param_groups[0]['lr'] summary_writer = Metrics(log_dir='tf_logs') losses_sbj = AverageMeter('Sbj loss: ', ':.2f') losses_obj = AverageMeter('Obj loss: ', ':.2f') losses_rel = AverageMeter('Rel loss: ', ':.2f') losses_total = AverageMeter('Total loss: ', ':.2f') progress = ProgressMeter( [losses_sbj, losses_obj, losses_rel, losses_total], prefix='Train: ') faster_rcnn.train() th = 10000 for step in range(opt.begin_iter, opt.max_iter): try: input_data = next(dataiterator) except StopIteration: dataiterator = iter(train_loader) input_data = next(dataiterator) images, targets = input_data _, metrics = faster_rcnn(images, targets) final_loss = metrics["loss_objectness"] + metrics["loss_rpn_box_reg"] + \ metrics["loss_classifier"] + metrics["loss_box_reg"] + \ metrics["loss_sbj"] + metrics["loss_obj"] + metrics["loss_rlp"] optimizer.zero_grad() final_loss.backward() optimizer.step() losses_sbj.update(metrics["loss_sbj"].item(), len(images)) losses_obj.update(metrics["loss_obj"].item(), len(images)) losses_rel.update(metrics["loss_rlp"].item(), len(images)) losses_total.update(final_loss.item(), len(images)) if opt.scheduler != "plateau": scheduler.step() if (step) % 10 == 0: progress.display(step) if step % 2500 == 0: train_losses = {} train_losses['total_loss'] = losses_total.avg train_losses['sbj_loss'] = losses_sbj.avg train_losses['obj_loss'] = losses_obj.avg train_losses['rel_loss'] = losses_rel.avg val_losses = val_epoch(faster_rcnn, val_loader) if opt.scheduler == "plateau": scheduler.step(val_losses['total_loss']) lr = optimizer.param_groups[0]['lr'] # if val_losses['total_loss'] < th: # save_model(faster_rcnn, optimizer, scheduler, step) # print(f"*** Saved model ***") # th = val_losses['total_loss'] save_model(faster_rcnn, optimizer, scheduler, step) # write summary summary_writer.log_metrics(train_losses, val_losses, step, lr) print( f"* Average training loss : {train_losses['total_loss']:.3f}") print( f"* Average validation loss : {val_losses['total_loss']:.3f}") losses_sbj.reset() losses_obj.reset() losses_rel.reset() losses_total.reset() faster_rcnn.train()
from datasets.vrd import collater from opts import parse_opts mean = 0. std = 0. nb_samples = 0. seed = 1 random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) opt = parse_opts() train_data = get_training_data(cfg) val_data = get_validation_data(cfg) train_loader = DataLoader( train_data, num_workers=opt.num_workers, collate_fn=collater, batch_size=1, shuffle=True) def _resize_image_and_masks(image, self_min_size=800, self_max_size=1333): im_shape = torch.tensor(image.shape[-2:]) min_size = float(torch.min(im_shape)) max_size = float(torch.max(im_shape)) scale_factor = self_min_size / min_size if max_size * scale_factor > self_max_size: scale_factor = self_max_size / max_size image = torch.nn.functional.interpolate( image[None], scale_factor=scale_factor, mode='bilinear', recompute_scale_factor=True, align_corners=False)[0] return image
def run_training(batch_size, learning_rate, epochs, run_number): with tf.Graph().as_default(): images_placeholder, labels_placeholder = placeholder_inputs(batch_size) logits = define_model(images_placeholder) lossFunction = define_loss(logits, labels_placeholder) train_op = training(lossFunction, learning_rate) eval_correct = evaluation(logits, labels_placeholder) # summary = tf.summary.merge_all() saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as session: #session = tf_debug.LocalCLIDebugWrapperSession(session) logdir = "log/" + str(run_number) # summary_writer = tf.summary.FileWriter(logdir, session.graph) session.run(init) for step in range(epochs): training_data, training_labels = dataset.get_training_data() batches = [(training_data[i:i + batch_size], training_labels[i:i + batch_size]) for i in range(0, len(training_data), batch_size)] epochLoss = 0 for batch in batches: image_data = batch[0] label_data = batch[1] feed_dict = { images_placeholder: image_data, labels_placeholder: label_data } activations, loss_value = session.run( [train_op, lossFunction], feed_dict=feed_dict) epochLoss += loss_value if step % 2 == 0: #print('Step %d: loss = %.2f' % (step, epochLoss)) print('Step %d: loss = %.2f' % (step, loss_value)) sys.stdout.flush() # summary_str = session.run(summary, feed_dict=feed_dict) # summary_writer.add_summary(summary_str, step) # summary_writer.flush() early_stop = False if (step + 1) % 5 == 0 or (step + 1) == epochs: validation_data = dataset.get_validation_data(batch_size) print("Doing evaluation on validation Set") sys.stdout.flush() early_stop = do_evaluation(session, eval_correct, validation_data, batch_size, images_placeholder, labels_placeholder) if (step + 1) == epochs or early_stop: print("Doing evaluation on training set") sys.stdout.flush() do_evaluation(session, eval_correct, (training_data, training_labels), batch_size, images_placeholder, labels_placeholder) print("Doing evaluation on the test set") sys.stdout.flush() test_data = dataset.get_test_data(batch_size) do_evaluation(session, eval_correct, test_data, batch_size, images_placeholder, labels_placeholder) saver.save(session, "model.ckpt") if (early_stop): print("Achieved desired precision at step %d" % step) return