コード例 #1
0
def main():
    args = parser.parse_args()
    import torch
    from torch.autograd import Variable
    mdl = ResNet(50, "deconv3", in_channels=4, image_shape=(192, 256)).cuda()
    x = Variable(torch.randn(1, mdl.in_channels, *mdl.image_shape).cuda())
    y = mdl(x)
    model_graph = torchviz.make_dot(y.mean(), dict(mdl.named_parameters()))
    model_graph.format = "svg"
    model_graph.render("resnet50.gv", "resnet50_render", view=True)
コード例 #2
0
def main():
    global args, best_result, output_directory, train_csv, test_csv

    # evaluation mode
    start_epoch = 0
    if args.evaluate:
        assert os.path.isfile(args.evaluate), \
        "=> no best model found at '{}'".format(args.evaluate)
        print("=> loading best model '{}'".format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        output_directory = os.path.dirname(args.evaluate)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        _, val_loader = create_data_loaders(args)
        args.evaluate = True
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        chkpt_path = args.resume
        assert os.path.isfile(chkpt_path), \
            "=> no checkpoint found at '{}'".format(chkpt_path)
        print("=> loading checkpoint '{}'".format(chkpt_path))
        checkpoint = torch.load(chkpt_path)
        args = checkpoint['args']
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        output_directory = os.path.dirname(os.path.abspath(chkpt_path))
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        train_loader, val_loader = create_data_loaders(args)
        args.resume = True

    # create new model
    else:
        train_loader, val_loader = create_data_loaders(args)
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)
        if args.arch == 'resnet50':
            model = ResNet(layers=50, decoder=args.decoder, output_size=train_loader.dataset.output_size,
                in_channels=in_channels, pretrained=args.pretrained)
        elif args.arch == 'SmallNet':
            model = SmallNet().cuda()
            model_named_params = [p for _,p in model.named_parameters() if p.requires_grad]
        elif args.arch == 'UNET':
            model = DepthCompletionNet(args).cuda()
            model_named_params = [p for _,p in model.named_parameters() if p.requires_grad]
        elif args.arch == 'DRNSeg':
            model = DRNSeg("drn_d_22", 1, pretrained_model=None,pretrained=False)
            model_named_params = [p for _,p in model.named_parameters() if p.requires_grad]
        elif args.arch == 'ERF':
            model = ERF().cuda()
            model_named_params = [p for _,p in model.named_parameters() if p.requires_grad]
        elif args.arch == 'resnet18':
            model = ResNet(layers=18, decoder=args.decoder, output_size=train_loader.dataset.output_size,
                in_channels=in_channels, pretrained=args.pretrained)
        print("=> model created.")
        #optimizer = torch.optim.SGD(model.parameters(), args.lr, \
        #    momentum=args.momentum, weight_decay=args.weight_decay)
        optimizer = torch.optim.Adam(model_named_params, lr=args.lr, weight_decay=args.weight_decay)

        # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
        model = model.cuda()

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()
    smoothloss = criteria.SmoothnessLoss().cuda()
    photometric_loss = criteria.PhotometricLoss().cuda()

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # create new csv files with only header
    if not args.resume:
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    for epoch in range(start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)
        train(train_loader, model, criterion,smoothloss, photometric_loss, optimizer, epoch) # train for one epoch
        result, img_merge = validate(val_loader, model, epoch) # evaluate on validation set

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write("epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n".
                    format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint({
            'args': args,
            'epoch': epoch,
            'arch': args.arch,
            'model': model,
            'best_result': best_result,
            'optimizer' : optimizer,
        }, is_best, epoch, output_directory)
コード例 #3
0
def train(k, epochs):

    model = ResNet(k=k)
    opt = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()

    if use_gpu:
        model.to('cuda')
        if use_horovod:
            # broadcast parameters and optimizer state from root device to other devices
            hvd.broadcast_parameters(model.state_dict(), root_rank=0)
            hvd.broadcast_optimizer_state(opt, root_rank=0)

            # Wraps the opimizer for multiGPU operation
            opt = hvd.DistributedOptimizer(
                opt, named_parameters=model.named_parameters(), op=hvd.Adasum)

    loss_dict = {'epoch': [], 'train': [], 'val': []}

    for epoch in range(epochs):
        train_loss = 0
        val_loss = 0

        # train block
        for img_batch, labels_batch in train_loader:
            if use_gpu:
                img_batch = img_batch.to('cuda')
                labels_batch = labels_batch.to('cuda')

            pred = model(img_batch)

            opt.zero_grad()
            loss = criterion(pred, labels_batch)
            loss.backward()
            opt.step()
            train_loss += loss.item()

        #val block
        with torch.no_grad():
            for img_batch, labels_batch in val_loader:
                if use_gpu:
                    img_batch = img_batch.to('cuda')
                    labels_batch = labels_batch.to('cuda')

                pred = model(img_batch)
                loss = criterion(pred, labels_batch)
                val_loss += loss.item()

        if use_horovod:
            train_loss = average_loss(train_loss, 'avg_train_loss')
            val_loss = average_loss(val_loss, 'avg_val_loss')

        loss_dict['epoch'].append(epoch + 1)
        loss_dict['train'].append(train_loss)
        loss_dict['val'].append(val_loss)

        print(",".join([
            "{}:{:.2f}".format(key, val[epoch])
            for key, val in loss_dict.items()
        ]))

    torch.save(model.state_dict(),
               "models/modelsdata/ResNet18_Cifar10_d{}.ckpt".format(k))
    save_obj(loss_dict,
             "models/modelsdata/losses/ResNet18_Cifar10_d{}".format(k))
    return loss_dict