Beispiel #1
0
def start_training(model_class, model_args, model_kwargs, chkpt_num, lr,
                   train_sets, val_sets, data_dir, **params):

    #PyTorch Model
    net = utils.create_network(model_class, model_args, model_kwargs)
    monitor = utils.LearningMonitor()

    #Loading model checkpoint (if applicable)
    if chkpt_num != 0:
        utils.load_chkpt(net, monitor, chkpt_num, params["model_dir"],
                         params["log_dir"])

    #DataProvider Sampler
    Sampler = params["sampler_class"]
    train_sampler = utils.AsyncSampler(
        Sampler(data_dir,
                dsets=train_sets,
                mode="train",
                resize=params["resize"]))

    val_sampler = utils.AsyncSampler(
        Sampler(data_dir, dsets=val_sets, mode="val", resize=params["resize"]))

    loss_fn = loss.BinomialCrossEntropyWithLogits()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)

    train.train(net,
                loss_fn,
                optimizer,
                train_sampler,
                val_sampler,
                last_iter=chkpt_num,
                monitor=monitor,
                **params)
Beispiel #2
0
def train(model, loss_fn, optimizer, sampler, val_sampler=None, last_iter=0,
          train_writer=None, val_writer=None, monitor=None, **params):
    """ Generalized training function """

    assert params_defined(params), "Params under-specified"

    if monitor is None:
        monitor = utils.LearningMonitor()

    #Determine the names of inputs, labels, masks
    sample_spec = utils.SampleSpec(sampler().keys())
    mask_names = sample_spec.get_masks()

    print("======= BEGIN TRAINING LOOP ========")
    for i in range(last_iter, params['max_iter']):
        start = time.time()

        # Make sure no mask is empty (data for all tasks)
        sample = fetch_nonempty_sample(sampler, mask_names, params['batch_size'])

        inputs, labels, masks = group_sample(sample, sample_spec, "train")

        #Running forward pass
        preds = model(*inputs)

        losses, nmsks = eval_error(preds, labels, masks, loss_fn, sample_spec)

        update_model(optimizer, losses)

        log_errors(monitor, losses, nmsks, i)

        # Elapsed time.
        elapsed = time.time() - start
        log_elapsed_time(monitor, elapsed, i, "train")

        if val_sampler is not None and i % params["test_intv"] == 0:
            run_validation(model, val_sampler, params["test_iter"],
                           loss_fn, sample_spec, monitor, val_writer, i)

        if i % params["avgs_intv"] == 0 or i < last_iter + params["warm_up"]-1:
            monitor.compute_avgs(i, "train")

            #Displaying stats (both to console and TensorBoard)
            avg_losses = { k : round(monitor.get_last_value(k, "train"),5)
                           for k in losses.keys() }
            avg_time = round(monitor.get_last_value("iter_time","train"),5)

            write_averages(train_writer, avg_losses, avg_time, i)
            print("iter: {}; avg losses = {} (iter_time = {} s on avg)".format(i,avg_losses, avg_time))

        if i % params["chkpt_intv"] == 0 and i != last_iter:
            print("SAVE CHECKPOINT: {} iters.".format(i))
            utils.save_chkpt(model, monitor, i, params["model_dir"],
                             params["log_dir"])
Beispiel #3
0
def start_training(model_class, model_args, model_kwargs, sampler_class,
                   sampler_spec, augmentor_constr, chkpt_num, lr, train_sets,
                   val_sets, data_dir, model_dir, log_dir, tb_train, tb_val,
                   **params):

    #PyTorch Model
    net = utils.create_network(model_class, model_args, model_kwargs)
    train_writer = tensorboardX.SummaryWriter(tb_train)
    val_writer = tensorboardX.SummaryWriter(tb_val)
    monitor = utils.LearningMonitor()

    #Loading model checkpoint (if applicable)
    if chkpt_num != 0:
        utils.load_chkpt(net, monitor, chkpt_num, model_dir, log_dir)

    #DataProvider Stuff
    train_aug = augmentor_constr(True)
    train_sampler = utils.AsyncSampler(
        sampler_class(data_dir,
                      sampler_spec,
                      vols=train_sets,
                      mode="train",
                      aug=train_aug))

    val_aug = augmentor_constr(False)
    val_sampler = utils.AsyncSampler(
        sampler_class(data_dir,
                      sampler_spec,
                      vols=val_sets,
                      mode="val",
                      aug=val_aug))

    loss_fn = loss.BinomialCrossEntropyWithLogits()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)

    train.train(net,
                loss_fn,
                optimizer,
                train_sampler,
                val_sampler,
                train_writer=train_writer,
                val_writer=val_writer,
                last_iter=chkpt_num,
                model_dir=model_dir,
                log_dir=log_dir,
                monitor=monitor,
                **params)
Beispiel #4
0
def train(model,
          loss_fn,
          optimizer,
          sampler,
          val_sampler=None,
          last_iter=0,
          monitor=None,
          **params):
    """ Generalized training fn """

    assert params_defined(params), "Params under-specified"

    if monitor is None:
        monitor = utils.LearningMonitor()

    #Determine the names of inputs, labels, masks
    sample_spec = utils.SampleSpec(sampler.get().keys())
    mask_names = sample_spec.get_masks()

    start = time.time()
    print("======= BEGIN TRAINING LOOP ========")
    for i in range(last_iter, params['max_iter']):

        # Make sure no mask is empty (data for all tasks)
        sample = fetch_nonempty_sample(sampler, mask_names,
                                       params['batch_size'])
        #print("sample type, size in training loop", type(sample), sample.get(sample.keys()[0]).shape)
        inputs, labels, masks = make_variables(sample, sample_spec, "train")

        #Running forward pass
        preds = model(*inputs)

        if (params["resize"] != 1):
            print("Type of Preds[0]:", type(preds[0]))
            #preds = misc.imresize(preds, 1.0*params["resize"], interp="bilinear")
            #print("Resized!")

        losses, nmsks = eval_error(preds, labels, masks, loss_fn, sample_spec)

        update_model(optimizer, losses)

        log_errors(monitor, losses, nmsks)

        # Elapsed time.
        elapsed = time.time() - start
        log_elapsed_time(monitor, elapsed, "train")
        start = time.time()

        if val_sampler is not None and i % params["test_intv"] == 0:
            run_validation(model, val_sampler, params["test_iter"], loss_fn,
                           sample_spec, monitor, i)
            start = time.time()  #ignore validation time

        if i % params[
                "avgs_intv"] == 0 or i < last_iter + params["warm_up"] - 1:
            monitor.compute_avgs(i, "train")

            #Displaying stats
            avg_losses = {
                k: round(monitor.get_last_value(k, "train"), 5)
                for k in losses.keys()
            }
            avg_time = round(monitor.get_last_value("iter_time", "train"), 5)
            print("iter: {}; avg losses = {} (iter_time = {} s on avg)".format(
                i, avg_losses, avg_time))

        if i % params["chkpt_intv"] == 0 and i != last_iter:
            print("SAVE CHECKPOINT: {} iters.".format(i))
            utils.save_chkpt(model, monitor, i, params["model_dir"],
                             params["log_dir"])