Example #1
0
def predict(args, model):
    """Entrypoint for predict mode"""

    test_loader = dataset.get_test_loader(args)
    train_loader, val_loader = dataset.get_train_val_loader(args, predict=True)

    if args.fp16:
        model = amp.initialize(model, opt_level='O1')

    logging.info('Starting prediction')

    output = {}
    for k, loader in [('test', test_loader), ('val', val_loader)]:
        output[k] = {}
        res = infer(args, model, loader)

        for i, v in res.items():
            d = loader.dataset.data[i]
            name = '{}_{}_{}'.format(d[0], d[1], d[2])
            if name not in output[k]:
                output[k][name] = []
            output[k][name].append(v)

    logging.info('Saving predictions to {}'.format(args.load + '.output' +
                                                   args.pred_suffix))
    with open(args.load + '.output' + args.pred_suffix, 'wb') as file:
        pickle.dump(output, file)
Example #2
0
def predict(args, model):
    """Entrypoint for predict mode"""

    test_loader = dataset.get_test_loader(args)
    train_loader, val_loader = dataset.get_train_val_loader(args, predict=True)

    if args.fp16:
        model = amp.initialize(model, opt_level="O1")

    logging.info("Starting prediction")

    output = {}
    for k, loader in [("test", test_loader), ("val", val_loader)]:
        output[k] = {}
        res = infer(args, model, loader)

        for i, v in res.items():
            d = loader.dataset.data[i]
            name = "{}_{}_{}".format(d[0], d[1], d[2])
            if name not in output[k]:
                output[k][name] = []
            output[k][name].append(v)

    logging.info(
        "Saving predictions to {}".format(args.load + ".output" + args.pred_suffix)
    )
    with open(args.load + ".output" + args.pred_suffix, "wb") as file:
        pickle.dump(output, file)
Example #3
0
def train(args, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer,
                                           step_size=args.lr_decay_step,
                                           gamma=args.lr_decay)
    criterion = nn.MSELoss()

    if args.retrain:
        optimizer.load_state_dict(torch.load(args.load + '_optimizer'))
        # reset lr to initial learning rate
        for g in optimizer.param_groups:
            g['lr'] = args.lr

    train_loader, validation_loader = dataset.get_train_val_loader(args)
    train_iterations = len(train_loader)
    val_iterations = len(validation_loader)

    best_loss = 10000

    for epoch in range(args.epochs):
        current_lr = optimizer.param_groups[0]['lr']
        logging.info(f'Train: epoch {epoch}   learning rate: {current_lr}')

        model.train()
        optimizer.zero_grad()

        # Train set
        for i, (images, targets) in enumerate(train_loader):
            # rotate and resize batch if requested
            if args.transform:
                # Choose random rotation angle and scaling for this batch
                angle = random.choice(range(360))
                scale = random.choice(np.linspace(0.2, 2, 49))
                [new_height, new_width] = [
                    np.int(np.round(images.size()[2] * scale)),
                    np.int(np.round(images.size()[3] * scale))
                ]
                new_ims, new_targets = transform_input(images[0], targets[0],
                                                       angle, new_height,
                                                       new_width)
                for l in range(len(images)):
                    new_im, new_target = transform_input(
                        images[l], targets[l], angle, new_height, new_width)
                    new_ims = torch.cat((new_ims, new_im), dim=0)
                    new_targets = torch.cat((new_targets, new_target), dim=0)

                images = copy.deepcopy(new_ims)
                targets = copy.deepcopy(new_targets)
                del (new_ims, new_targets)

            images = images.to(device)
            targets = targets.to(device)

            output = model(images).to(torch.double)

            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            # Update on training progress every 5th iteration
            if (i + 1) % 5 == 0:
                logging.info(
                    f'epoch {epoch + 1}/{args.epochs}, step {i + 1}/{train_iterations},  loss {loss}'
                )

        # Validation set

        loss_log = np.zeros(len(validation_loader))

        for i, (images, targets) in enumerate(validation_loader):
            # rotate and resize if requested
            if args.transform:
                # Choose random rotation angle and scaling for this batch
                angle = random.choice(range(360))
                scale = random.choice(np.linspace(0.2, 2, 49))
                [new_height, new_width] = [
                    np.int(np.round(images.size()[2] * scale)),
                    np.int(np.round(images.size()[3] * scale))
                ]
                new_ims, new_targets = transform_input(images[0], targets[0],
                                                       angle, new_height,
                                                       new_width)
                for l in range(len(images)):
                    new_im, new_target = transform_input(
                        images[l], targets[l], angle, new_height, new_width)
                    new_ims = torch.cat((new_ims, new_im), dim=0)
                    new_targets = torch.cat((new_targets, new_target), dim=0)

                images = copy.deepcopy(new_ims)
                targets = copy.deepcopy(new_targets)
                del (new_ims, new_targets)

            model.eval()
            images = images.to(device)
            targets = targets.to(device)

            output = model(images)

            loss = criterion(output, targets)

            loss_log[i] = loss

            # Update on validation loss
            logging.info(
                f'===== VALIDATION epoch {epoch + 1}/{args.epochs}, step {i + 1}/{val_iterations},'
                f'validation loss {loss} =====')

        if np.mean(loss_log) < best_loss:
            best_loss = np.mean(loss_log)
            logging.info(f'Saving best to {args.save} with loss {best_loss}')
            torch.save(model.state_dict(),
                       str(args.save + '/' + args.backbone))
            torch.save(optimizer.state_dict(),
                       str(args.save + '/' + args.backbone + '_optimizer'))

        exp_lr_scheduler.step()
Example #4
0
def train(args, model):
    train_loader, val_loader = dataset.get_train_val_loader(args)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0,
                                 weight_decay=args.wd)

    if args.horovod:
        optimizer = hvd.DistributedOptimizer(
            optimizer,
            named_parameters=model.named_parameters(),
            backward_passes_per_step=args.gradient_accumulation)
        hvd.broadcast_parameters(model.state_dict(), root_rank=0)
        hvd.broadcast_optimizer_state(optimizer, root_rank=0)

    if args.fp16:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    if args.load is not None:
        best_acc = score(args, model, val_loader)
    else:
        best_acc = float('-inf')

    if args.mode == 'val':
        return

    if args.pl_epoch is not None:
        test_loader = dataset.get_test_loader(args, exclude_leak=True)
        pl_data = set()

    for epoch in range(args.start_epoch, args.epochs):
        if args.pl_epoch is not None:
            pseudo_label(args, epoch, pl_data, model, val_loader, test_loader,
                         train_loader)

        with torch.no_grad():
            avg_norm = np.mean([v.norm().item() for v in model.parameters()])

        logging.info('Train: epoch {}   avg_norm: {}'.format(epoch, avg_norm))

        model.train()
        optimizer.zero_grad()

        cum_loss = 0
        cum_acc = 0
        cum_count = 0
        tic = time.time()
        for i, (X, S, _, Y) in enumerate(train_loader):
            lr = get_learning_rate(args, epoch + i / len(train_loader))
            for g in optimizer.param_groups:
                g['lr'] = lr

            X = X.cuda()
            S = S.cuda()
            Y = Y.cuda()
            X, S, Y = transform_input(args, X, S, Y)

            loss, acc = model.train_forward(X, S, Y)
            if args.fp16:
                '''
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                    optimizer.synchronize()

                if (i + 1) % args.gradient_accumulation == 0:
                    with optimizer.skip_synchronize():
                        optimizer.step()
                    optimizer.zero_grad()
                '''
                apply_grads = (i + 1) % args.gradient_accumulation == 0
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
                    if hasattr(optimizer, "synchronize") and apply_grads:
                        optimizer.synchronize()
                if apply_grads:
                    if hasattr(optimizer, "skip_synchronize"):
                        with optimizer.skip_synchronize():
                            optimizer.step()
                    optimizer.zero_grad()
            else:
                loss.backward()
                if (i + 1) % args.gradient_accumulation == 0:
                    optimizer.step()
                    optimizer.zero_grad()

            cum_count += 1
            cum_loss += loss.item()
            cum_acc += acc
            if (i + 1) % args.disp_batches == 0:
                logging.info(
                    'Epoch: {:3d} Iter: {:4d}  ->  speed: {:6.1f}   lr: {:.9f}   loss: {:.6f}   acc: {:.6f}'
                    .format(epoch, i + 1,
                            cum_count * args.batch_size / (time.time() - tic),
                            optimizer.param_groups[0]['lr'],
                            cum_loss / cum_count, cum_acc / cum_count))
                cum_loss = 0
                cum_acc = 0
                cum_count = 0
                tic = time.time()

        acc = score(args, model, val_loader)
        torch.save(model.state_dict(), str(args.save + '.{}'.format(epoch)))
        if acc >= best_acc:
            best_acc = acc
            logging.info('Saving best to {} with score {}'.format(
                args.save, best_acc))
            torch.save(model.state_dict(), str(args.save))
Example #5
0
def train(args, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=args.lr_decay_step, gamma=args.lr_decay)
    criterion = nn.MSELoss()

    if args.retrain:
        optimizer.load_state_dict(torch.load(args.load + '_optimizer'))
        # reset lr to initial learning rate
        for g in optimizer.param_groups:
            g['lr'] = args.lr

    train_loader, validation_loader = dataset.get_train_val_loader(args)
    train_iterations = len(train_loader)
    val_iterations = len(validation_loader)

    best_loss = 10000

    for epoch in range(args.epochs):
        current_lr = optimizer.param_groups[0]['lr']
        logging.info(f'Train: epoch {epoch}   learning rate: {current_lr}')

        model.train()
        optimizer.zero_grad()

        # Train set
        for i, (images, targets) in enumerate(train_loader):
            # rotate and resize batch if requested

            if args.transform:
                # Choose random rotation angle and scaling for this batch
                angle = random.choice(range(360))
                scale = random.choice(np.linspace(0.2, 2, 49))

                [new_height, new_width] = [np.int(np.round(images.size()[2] * scale)),
                                           np.int(np.round(images.size()[3] * scale))]
                # Get transformed images and targets
                for image_ind in range(len(images)):
                    if image_ind == 0:
                        new_ims, new_targets = transform_input(images[0],
                                                               targets[0],
                                                               angle,
                                                               new_height,
                                                               new_width)
                    else:
                        new_im, new_target = transform_input(images[image_ind],
                                                             targets[image_ind],
                                                             angle,
                                                             new_height,
                                                             new_width)
                        new_ims = torch.cat((new_ims, new_im), dim=0)
                        new_targets = torch.cat((new_targets, new_target), dim=0)
                images = copy.deepcopy(new_ims)
                targets = copy.deepcopy(new_targets)
                del (new_ims, new_targets)

            # For debugging - check transformed images and targets
            # testim = images[0, 0, :, :].detach().numpy()
            # testtargs = targets[0, :].detach().numpy()
            #
            # trck_pts = np.zeros([2, 8])
            # trck_pts[0, :] = testtargs[0:8] * testim.shape[0]
            # trck_pts[1, :] = testtargs[8:16] * testim.shape[1]
            # trck_pts = np.transpose(trck_pts)
            # for ind in range(trck_pts.shape[0]):
            #     pt = trck_pts[ind, :]
            #     testim[np.int(pt[0]) - 4:np.int(pt[0]) + 4, np.int(pt[1]) - 4:np.int(pt[1]) + 4] = 1
            #
            # plt.imshow(testim)
            # plt.savefig(args.save + 'test.png', dpi=300, quality=100, format='png')

            images = images.to(device)
            targets = targets.to(device)

            output = model(images).to(torch.double)

            loss = criterion(output, targets)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            # Update on training progress every 5th iteration
            if (i + 1) % 5 == 0:
                logging.info(f'epoch {epoch + 1}/{args.epochs}, step {i + 1}/{train_iterations},  loss {loss}')

        # Validation set

        loss_log = np.zeros(len(validation_loader))

        for i, (images, targets) in enumerate(validation_loader):
            # rotate and resize if requested
            if args.transform:
                # Choose random rotation angle and scaling for this batch
                angle = random.choice(range(360))
                scale = random.choice(np.linspace(0.2, 2, 49))

                [new_height, new_width] = [np.int(np.round(images.size()[2] * scale)),
                                           np.int(np.round(images.size()[3] * scale))]
                # Get transformed images and targets
                for image_ind in range(len(images)):
                    if image_ind == 0:
                        new_ims, new_targets = transform_input(images[0],
                                                               targets[0],
                                                               angle,
                                                               new_height,
                                                               new_width)
                    else:
                        new_im, new_target = transform_input(images[image_ind],
                                                             targets[image_ind],
                                                             angle, new_height,
                                                             new_width)
                        new_ims = torch.cat((new_ims, new_im), dim=0)
                        new_targets = torch.cat((new_targets, new_target), dim=0)

                images = copy.deepcopy(new_ims)
                targets = copy.deepcopy(new_targets)
                del (new_ims, new_targets)

            model.eval()
            images = images.to(device)
            targets = targets.to(device)

            output = model(images)

            loss = criterion(output, targets)

            loss_log[i] = loss

            # Update on validation loss
            logging.info(f'===== VALIDATION epoch {epoch + 1}/{args.epochs}, step {i + 1}/{val_iterations},'
                         f'validation loss {loss} =====')

        if np.mean(loss_log) < best_loss:
            best_loss = np.mean(loss_log)
            logging.info(f'Saving best to {args.save} with loss {best_loss}')
            torch.save(model.state_dict(), str(args.save + '/' + args.backbone))
            torch.save(optimizer.state_dict(), str(args.save + '/' + args.backbone + '_optimizer'))

        exp_lr_scheduler.step()
Example #6
0
def train(args, model):
    train_loader, val_loader = dataset.get_train_val_loader(args)

    optimizer = torch.optim.Adam(model.parameters(), lr=0, weight_decay=args.wd)

    if args.fp16:
        model, optimizer = amp.initialize(model, optimizer, opt_level="O1")

    if args.load is not None:
        best_acc = score(args, model, val_loader)
    else:
        best_acc = float("-inf")

    if args.mode == "val":
        return

    if args.pl_epoch is not None:
        test_loader = dataset.get_test_loader(args, exclude_leak=True)
        pl_data = set()

    for epoch in range(args.start_epoch, args.epochs):
        if args.pl_epoch is not None:
            pseudo_label(
                args, epoch, pl_data, model, val_loader, test_loader, train_loader
            )

        with torch.no_grad():
            avg_norm = np.mean([v.norm().item() for v in model.parameters()])

        logging.info("Train: epoch {}   avg_norm: {}".format(epoch, avg_norm))

        print(f'GRAD: {torch.is_grad_enabled()}')
        model.train()
        optimizer.zero_grad()

        cum_loss = 0
        cum_acc = 0
        cum_count = 0
        tic = time.time()
        for i, (X, S, _, Y) in enumerate(train_loader):
            lr = get_learning_rate(args, epoch + i / len(train_loader))
            for g in optimizer.param_groups:
                g["lr"] = lr

            X = X.cuda()
            S = S.cuda()
            Y = Y.cuda()
            X, S, Y = transform_input(args, X, S, Y)

            loss, acc = model.train_forward(X, S, Y)
            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()
            if (i + 1) % args.gradient_accumulation == 0:
                optimizer.step()
                optimizer.zero_grad()

            cum_count += 1
            cum_loss += loss.item()
            cum_acc += acc
            if (i + 1) % args.disp_batches == 0:
                logging.info(
                    "Epoch: {:3d} Iter: {:4d}  ->  speed: {:6.1f}   lr: {:.9f}   loss: {:.6f}   acc: {:.6f}".format(
                        epoch,
                        i + 1,
                        cum_count * args.batch_size / (time.time() - tic),
                        optimizer.param_groups[0]["lr"],
                        cum_loss / cum_count,
                        cum_acc / cum_count,
                    )
                )
                cum_loss = 0
                cum_acc = 0
                cum_count = 0
                tic = time.time()

        acc = score(args, model, val_loader)
        torch.save(model.state_dict(), str(args.save + ".{}".format(epoch)))
        if acc >= best_acc:
            best_acc = acc
            logging.info("Saving best to {} with score {}".format(args.save, best_acc))
            torch.save(model.state_dict(), str(args.save))