def train(args, train_loader, model, criterion, optimizer, epoch):
    """
    args:
       train_loader: loaded for training dataset
       model: model
       criterion: loss function
       optimizer: optimization algorithm, such as ADAM or SGD
       epoch: epoch number
    return: average loss, per class IoU, and mean IoU
    """

    model.train()
    epoch_loss = []

    total_batches = len(train_loader)
    print("=====> the number of iterations per epoch: ", total_batches)
    st = time.time()
    for iteration, batch in enumerate(train_loader, 0):

        args.per_iter = total_batches
        args.max_iter = args.max_epochs * args.per_iter
        args.cur_iter = epoch * args.per_iter + iteration

        # learming scheduling
        if args.lr_schedule == 'poly':
            lambda1 = lambda epoch: math.pow(
                (1 - (args.cur_iter / args.max_iter)), args.poly_exp)
            scheduler = optim.lr_scheduler.LambdaLR(optimizer,
                                                    lr_lambda=lambda1)
        elif args.lr_schedule == 'warmpoly':
            scheduler = WarmupPolyLR(optimizer,
                                     T_max=args.max_iter,
                                     cur_iter=args.cur_iter,
                                     warmup_factor=1.0 / 3,
                                     warmup_iters=args.warmup_iters,
                                     power=0.9)

        lr = optimizer.param_groups[0]['lr']

        start_time = time.time()
        images, labels, _, _ = batch

        if torch_ver == '0.3':
            images = Variable(images).cuda()
            labels = Variable(labels.long()).cuda()
        else:
            images = images.cuda()
            labels = labels.long().cuda()

        output = model(images)
        loss = criterion(output, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        scheduler.step(
        )  # In pytorch 1.1.0 and later, should call 'optimizer.step()' before 'lr_scheduler.step()'

        epoch_loss.append(loss.item())
        time_taken = time.time() - start_time

        print(
            '=====> epoch[%d/%d] iter: (%d/%d) \tcur_lr: %.6f loss: %.3f time:%.2f'
            % (epoch + 1, args.max_epochs, iteration + 1, total_batches, lr,
               loss.item(), time_taken))

    time_taken_epoch = time.time() - st
    remain_time = time_taken_epoch * (args.max_epochs - 1 - epoch)
    m, s = divmod(remain_time, 60)
    h, m = divmod(m, 60)
    print("Remaining training time = %d hour %d minutes %d seconds" %
          (h, m, s))

    average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)

    return average_epoch_loss_train, lr
Example #2
0
def train(args, train_loader, model, criterion, optimizer, epoch):
    """
    args:
       train_loader: loaded for training dataset
       model: model
       criterion: loss function
       optimizer: optimization algorithm, such as ADAM or SGD
       epoch: epoch number
    return: average loss, per class IoU, and mean IoU
    """

    model.train()
    epoch_loss = []

    total_batches = len(train_loader)
    st = time.time()
    pbar = tqdm(iterable=enumerate(train_loader),
                total=total_batches,
                desc='Epoch {}/{}'.format(epoch, args.max_epochs))
    for iteration, batch in pbar:

        args.per_iter = total_batches
        args.max_iter = args.max_epochs * args.per_iter
        args.cur_iter = epoch * args.per_iter + iteration
        # learming scheduling
        if args.lr_schedule == 'poly':
            lambda1 = lambda epoch: math.pow(
                (1 - (args.cur_iter / args.max_iter)), args.poly_exp)
            scheduler = optim.lr_scheduler.LambdaLR(optimizer,
                                                    lr_lambda=lambda1)
        elif args.lr_schedule == 'warmpoly':
            scheduler = WarmupPolyLR(optimizer,
                                     T_max=args.max_iter,
                                     cur_iter=args.cur_iter,
                                     warmup_factor=1.0 / 3,
                                     warmup_iters=args.warmup_iters,
                                     power=0.9)

        lr = optimizer.param_groups[0]['lr']

        images, labels, _, _ = batch

        images = images.cuda()
        labels = labels.long().cuda()
        if args.model == 'PSPNet50':
            x, aux = model(images)
            main_loss = criterion(x, labels)
            aux_loss = criterion(aux, labels)
            loss = 0.6 * main_loss + 0.4 * aux_loss
        else:
            output = model(images)
            if type(output) is tuple:
                output = output[0]
            loss = criterion(output, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step(
        )  # In pytorch 1.1.0 and later, should call 'optimizer.step()' before 'lr_scheduler.step()'
        epoch_loss.append(loss.item())

    time_taken_epoch = time.time() - st
    remain_time = time_taken_epoch * (args.max_epochs - 1 - epoch)
    m, s = divmod(remain_time, 60)
    h, m = divmod(m, 60)
    print("Remaining training time = %d hour %d minutes %d seconds" %
          (h, m, s))

    average_epoch_loss_train = sum(epoch_loss) / len(epoch_loss)

    return average_epoch_loss_train, lr