Exemple #1
0
def get_train_utils(opt, model_parameters):
    assert opt.train_crop in ['random', 'corner', 'center']
    spatial_transform = []
    if opt.train_crop == 'random':
        spatial_transform.append(
            RandomResizedCrop(
                opt.sample_size, (opt.train_crop_min_scale, 1.0),
                (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio)))
    elif opt.train_crop == 'corner':
        scales = [1.0]
        scale_step = 1 / (2**(1 / 4))
        for _ in range(1, 5):
            scales.append(scales[-1] * scale_step)
        spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales))
    elif opt.train_crop == 'center':
        spatial_transform.append(Resize(opt.sample_size))
        spatial_transform.append(CenterCrop(opt.sample_size))
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    if not opt.no_hflip:
        spatial_transform.append(RandomHorizontalFlip())
    if opt.colorjitter:
        spatial_transform.append(ColorJitter())
    spatial_transform.append(ToTensor())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.append(ScaleValue(opt.value_scale))
    spatial_transform.append(normalize)
    spatial_transform = Compose(spatial_transform)

    assert opt.train_t_crop in ['random', 'center']
    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    if opt.train_t_crop == 'random':
        temporal_transform.append(TemporalRandomCrop(opt.sample_duration))
    elif opt.train_t_crop == 'center':
        temporal_transform.append(TemporalCenterCrop(opt.sample_duration))
    temporal_transform = TemporalCompose(temporal_transform)

    train_data = get_training_data(opt.video_path, opt.annotation_path,
                                   opt.dataset, opt.input_type, opt.file_type,
                                   spatial_transform, temporal_transform)
    if opt.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_data)
    else:
        train_sampler = None
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=opt.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=opt.n_threads,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               worker_init_fn=worker_init_fn)

    if opt.is_master_node:
        train_logger = Logger(opt.result_path / 'train.log',
                              ['epoch', 'loss', 'acc', 'lr'])
        train_batch_logger = Logger(
            opt.result_path / 'train_batch.log',
            ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])
    else:
        train_logger = None
        train_batch_logger = None

    if opt.nesterov:
        dampening = 0
    else:
        dampening = opt.dampening
    optimizer = SGD(model_parameters,
                    lr=opt.learning_rate,
                    momentum=opt.momentum,
                    dampening=dampening,
                    weight_decay=opt.weight_decay,
                    nesterov=opt.nesterov)

    assert opt.lr_scheduler in ['plateau', 'multistep']
    assert not (opt.lr_scheduler == 'plateau' and opt.no_val)
    if opt.lr_scheduler == 'plateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(
            optimizer, 'min', patience=opt.plateau_patience)
    else:
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             opt.multistep_milestones)

    return (train_loader, train_sampler, train_logger, train_batch_logger,
            optimizer, scheduler)
Exemple #2
0
def get_train_utils(opt, model_parameters):
    assert opt.train_crop in ['random', 'corner', 'center']
    spatial_transform = []
    if opt.train_crop == 'random':
        spatial_transform.append(
            RandomResizedCrop(
                opt.sample_size, (opt.train_crop_min_scale, 1.0),
                (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio)))
    elif opt.train_crop == 'corner':
        scales = [1.0]
        scale_step = 1 / (2**(1 / 4))
        for _ in range(1, 5):
            scales.append(scales[-1] * scale_step)
        spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales))
    elif opt.train_crop == 'center':
        spatial_transform.append(Resize(opt.sample_size))
        spatial_transform.append(CenterCrop(opt.sample_size))
    normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm,
                                     opt.no_std_norm)
    if not opt.no_hflip:
        spatial_transform.append(RandomHorizontalFlip())
    spatial_transform.append(ToArray())
    if opt.colorjitter:
        spatial_transform.append(ColorJitter())
    if opt.input_type == 'flow':
        spatial_transform.append(PickFirstChannels(n=2))
    spatial_transform.append(ScaleValue(opt.value_scale))
    spatial_transform.append(normalize)
    spatial_transform = Compose(spatial_transform)

    assert opt.train_t_crop in ['random', 'center']
    temporal_transform = []
    if opt.sample_t_stride > 1:
        temporal_transform.append(TemporalSubsampling(opt.sample_t_stride))
    if opt.train_t_crop == 'random':
        temporal_transform.append(TemporalRandomCrop(opt.sample_duration))
    elif opt.train_t_crop == 'center':
        temporal_transform.append(TemporalCenterCrop(opt.sample_duration))
    temporal_transform = TemporalCompose(temporal_transform)

    train_data = get_training_data(opt.video_path, opt.annotation_path,
                                   opt.dataset, opt.input_type, opt.file_type,
                                   spatial_transform, temporal_transform)
    train_loader = paddle.batch(train_data.reader, batch_size=opt.batch_size)

    train_logger = Logger(opt.result_path / 'train.log',
                          ['epoch', 'loss', 'acc', 'lr'])
    train_batch_logger = Logger(
        opt.result_path / 'train_batch.log',
        ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr'])

    assert opt.lr_scheduler in ['plateau', 'multistep']
    assert not (opt.lr_scheduler == 'plateau' and opt.no_val)
    if opt.lr_scheduler == 'plateau':
        scheduler = ReduceLROnPlateau(learning_rate=opt.learning_rate,
                                      mode='min',
                                      patience=opt.plateau_patience)
    else:
        scheduler = MultiStepDecay(learning_rate=opt.learning_rate,
                                   milestones=opt.multistep_milestones)

    optimizer = fluid.optimizer.MomentumOptimizer(
        learning_rate=scheduler,
        momentum=opt.momentum,
        parameter_list=model_parameters,
        use_nesterov=opt.nesterov,
        regularization=fluid.regularizer.L2Decay(
            regularization_coeff=opt.weight_decay))

    return (train_loader, train_logger, train_batch_logger, optimizer,
            scheduler)
def main_worker():
    seed = 1
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

    opt = parse_opts()
    train_data = get_training_data(cfg)
    val_data = get_validation_data(cfg)
    train_loader = DataLoader(train_data,
                              num_workers=opt.num_workers,
                              collate_fn=collater,
                              batch_size=opt.batch_size,
                              shuffle=True)
    val_loader = DataLoader(val_data,
                            num_workers=opt.num_workers,
                            collate_fn=collater,
                            batch_size=opt.batch_size,
                            shuffle=True)

    print(f"Training dataset size : {len(train_loader.dataset)}")
    print(f"Validation dataset size : {len(val_loader.dataset)}")

    dataiterator = iter(train_loader)

    faster_rcnn = FasterRCNN()

    # if torch.cuda.device_count() > 1 and opt.multi_gpu :
    #     print("Let's use", torch.cuda.device_count(), "GPUs!")
    #     faster_rcnn = nn.DataParallel(faster_rcnn)

    # loading model from a ckpt
    if opt.weight_path:
        load_from_ckpt(opt, faster_rcnn)
    faster_rcnn.to(cfg.DEVICE)

    if opt.lr is not None:
        cfg.TRAIN.LEARNING_RATE = opt.lr
    lr = cfg.TRAIN.LEARNING_RATE
    print(f"Learning rate : {lr}")

    if opt.weight_decay is not None:
        cfg.TRAIN.WEIGHT_DECAY = opt.weight_decay
    print(f"Weight Decay : {cfg.TRAIN.WEIGHT_DECAY}")

    ### Optimizer ###
    # record backbone params, i.e., conv_body and box_head params
    backbone_bias_params = []
    backbone_bias_param_names = []
    prd_branch_bias_params = []
    prd_branch_bias_param_names = []
    backbone_nonbias_params = []
    backbone_nonbias_param_names = []
    prd_branch_nonbias_params = []
    prd_branch_nonbias_param_names = []
    for key, value in dict(faster_rcnn.named_parameters()).items():
        if value.requires_grad:
            if 'fpn' in key or 'box_head' in key or 'box_predictor' in key or 'rpn' in key:
                if 'bias' in key:
                    backbone_bias_params.append(value)
                    backbone_bias_param_names.append(key)
                else:
                    backbone_nonbias_params.append(value)
                    backbone_nonbias_param_names.append(key)
            else:
                if 'bias' in key:
                    prd_branch_bias_params.append(value)
                    prd_branch_bias_param_names.append(key)
                else:
                    prd_branch_nonbias_params.append(value)
                    prd_branch_nonbias_param_names.append(key)
    params = [
        {
            'params': backbone_nonbias_params,
            'lr': cfg.TRAIN.LEARNING_RATE,
            'weight_decay': cfg.TRAIN.WEIGHT_DECAY
        },
        {
            'params': backbone_bias_params,
            'lr': cfg.TRAIN.LEARNING_RATE * (cfg.TRAIN.DOUBLE_BIAS + 1),
            'weight_decay':
            cfg.TRAIN.WEIGHT_DECAY if cfg.TRAIN.BIAS_DECAY else 0
        },
        {
            'params': prd_branch_nonbias_params,
            'lr': cfg.TRAIN.LEARNING_RATE,
            'weight_decay': cfg.TRAIN.WEIGHT_DECAY
        },
        {
            'params': prd_branch_bias_params,
            'lr': cfg.TRAIN.LEARNING_RATE * (cfg.TRAIN.DOUBLE_BIAS + 1),
            'weight_decay':
            cfg.TRAIN.WEIGHT_DECAY if cfg.TRAIN.BIAS_DECAY else 0
        },
    ]

    if cfg.TRAIN.TYPE == "ADAM":
        optimizer = torch.optim.Adam(params)

    elif cfg.TRAIN.TYPE == "SGD":
        optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

    # scheduler
    if opt.scheduler == "plateau":
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   'min',
                                                   patience=5)
    elif opt.scheduler == "multi_step":
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             milestones=[83631, 111508])
    elif opt.scheduler == "step_lr":
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=5,
                                        gamma=0.1,
                                        last_epoch=-1)

    if opt.weight_path:
        opt.begin_iter = load_train_utils(opt, optimizer, scheduler)

    # lr of non-backbone parameters, for commmand line outputs.
    lr = optimizer.param_groups[0]['lr']
    # lr of backbone parameters, for commmand line outputs.
    # backbone_lr = optimizer.param_groups[0]['lr']

    summary_writer = Metrics(log_dir='tf_logs')

    losses_sbj = AverageMeter('Sbj loss: ', ':.2f')
    losses_obj = AverageMeter('Obj loss: ', ':.2f')
    losses_rel = AverageMeter('Rel loss: ', ':.2f')
    losses_total = AverageMeter('Total loss: ', ':.2f')
    progress = ProgressMeter(
        [losses_sbj, losses_obj, losses_rel, losses_total], prefix='Train: ')

    faster_rcnn.train()
    th = 10000
    for step in range(opt.begin_iter, opt.max_iter):
        try:
            input_data = next(dataiterator)
        except StopIteration:
            dataiterator = iter(train_loader)
            input_data = next(dataiterator)

        images, targets = input_data
        _, metrics = faster_rcnn(images, targets)
        final_loss = metrics["loss_objectness"] + metrics["loss_rpn_box_reg"] + \
            metrics["loss_classifier"] + metrics["loss_box_reg"] + \
            metrics["loss_sbj"] + metrics["loss_obj"] + metrics["loss_rlp"]

        optimizer.zero_grad()
        final_loss.backward()
        optimizer.step()

        losses_sbj.update(metrics["loss_sbj"].item(), len(images))
        losses_obj.update(metrics["loss_obj"].item(), len(images))
        losses_rel.update(metrics["loss_rlp"].item(), len(images))
        losses_total.update(final_loss.item(), len(images))

        if opt.scheduler != "plateau":
            scheduler.step()

        if (step) % 10 == 0:
            progress.display(step)

        if step % 2500 == 0:
            train_losses = {}
            train_losses['total_loss'] = losses_total.avg
            train_losses['sbj_loss'] = losses_sbj.avg
            train_losses['obj_loss'] = losses_obj.avg
            train_losses['rel_loss'] = losses_rel.avg
            val_losses = val_epoch(faster_rcnn, val_loader)

            if opt.scheduler == "plateau":
                scheduler.step(val_losses['total_loss'])

            lr = optimizer.param_groups[0]['lr']

            # if val_losses['total_loss'] < th:
            #     save_model(faster_rcnn, optimizer, scheduler, step)
            #     print(f"*** Saved model ***")
            #     th = val_losses['total_loss']
            save_model(faster_rcnn, optimizer, scheduler, step)

            # write summary
            summary_writer.log_metrics(train_losses, val_losses, step, lr)

            print(
                f"* Average training loss : {train_losses['total_loss']:.3f}")
            print(
                f"* Average validation loss : {val_losses['total_loss']:.3f}")

            losses_sbj.reset()
            losses_obj.reset()
            losses_rel.reset()
            losses_total.reset()
            faster_rcnn.train()
from datasets.vrd import collater
from opts import parse_opts


mean = 0.
std = 0.
nb_samples = 0.

seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)


opt = parse_opts()
train_data = get_training_data(cfg)
val_data = get_validation_data(cfg)
train_loader = DataLoader(
    train_data, num_workers=opt.num_workers, collate_fn=collater, batch_size=1, shuffle=True)

def _resize_image_and_masks(image, self_min_size=800, self_max_size=1333):
    im_shape = torch.tensor(image.shape[-2:])
    min_size = float(torch.min(im_shape))
    max_size = float(torch.max(im_shape))
    scale_factor = self_min_size / min_size
    if max_size * scale_factor > self_max_size:
        scale_factor = self_max_size / max_size
    image = torch.nn.functional.interpolate(
        image[None], scale_factor=scale_factor, mode='bilinear', recompute_scale_factor=True,
        align_corners=False)[0]
    return image
def run_training(batch_size, learning_rate, epochs, run_number):
    with tf.Graph().as_default():
        images_placeholder, labels_placeholder = placeholder_inputs(batch_size)

        logits = define_model(images_placeholder)
        lossFunction = define_loss(logits, labels_placeholder)
        train_op = training(lossFunction, learning_rate)

        eval_correct = evaluation(logits, labels_placeholder)

        #       summary = tf.summary.merge_all()
        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        with tf.Session() as session:
            #session = tf_debug.LocalCLIDebugWrapperSession(session)

            logdir = "log/" + str(run_number)
            #           summary_writer = tf.summary.FileWriter(logdir, session.graph)
            session.run(init)

            for step in range(epochs):
                training_data, training_labels = dataset.get_training_data()
                batches = [(training_data[i:i + batch_size],
                            training_labels[i:i + batch_size])
                           for i in range(0, len(training_data), batch_size)]
                epochLoss = 0
                for batch in batches:
                    image_data = batch[0]
                    label_data = batch[1]

                    feed_dict = {
                        images_placeholder: image_data,
                        labels_placeholder: label_data
                    }
                    activations, loss_value = session.run(
                        [train_op, lossFunction], feed_dict=feed_dict)
                    epochLoss += loss_value
                if step % 2 == 0:
                    #print('Step %d: loss = %.2f' % (step, epochLoss))
                    print('Step %d: loss = %.2f' % (step, loss_value))
                    sys.stdout.flush()


#                    summary_str = session.run(summary, feed_dict=feed_dict)
#                    summary_writer.add_summary(summary_str, step)
#                    summary_writer.flush()

                early_stop = False
                if (step + 1) % 5 == 0 or (step + 1) == epochs:
                    validation_data = dataset.get_validation_data(batch_size)
                    print("Doing evaluation on validation Set")
                    sys.stdout.flush()
                    early_stop = do_evaluation(session, eval_correct,
                                               validation_data, batch_size,
                                               images_placeholder,
                                               labels_placeholder)

                if (step + 1) == epochs or early_stop:
                    print("Doing evaluation on training set")
                    sys.stdout.flush()
                    do_evaluation(session, eval_correct,
                                  (training_data, training_labels), batch_size,
                                  images_placeholder, labels_placeholder)

                    print("Doing evaluation on the test set")
                    sys.stdout.flush()
                    test_data = dataset.get_test_data(batch_size)
                    do_evaluation(session, eval_correct, test_data, batch_size,
                                  images_placeholder, labels_placeholder)

                    saver.save(session, "model.ckpt")

                    if (early_stop):
                        print("Achieved desired precision at step %d" % step)
                        return