def demo_from_best_model(resnet_layer, pretrained, num_classes, path):

    assert resnet_layer == 18 or resnet_layer == 50

    net_best = ResNet(layer_num=resnet_layer, pretrained=pretrained, num_classes=num_classes)
    net_best = net_best.to(device)
    net_best.load_state_dict(torch.load(path))
    net_best.eval()
    best_acc = save_confusion_matrix(net_best, val_loader, 'backup_demo/cm_best.png')
    print('test_best_accuracy = %.2f' % best_acc)
Esempio n. 2
0
log_columns = [
    'epoch', 'bce', 'lwlrap', 'bce_noisy', 'lwlrap_noisy', 'val_bce',
    'val_lwlrap', 'time'
]
os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1, 2, 3'

for fold, (ids_train_split, ids_valid_split) in enumerate(folds):
    if fold + 1 not in FOLD_LIST: continue

    print("fold: {}".format(fold + 1))

    train_log = pd.DataFrame(columns=log_columns)

    # build model
    model = ResNet(NUM_CLASS)
    model.to(device)

    # prepare data loaders
    df_train_fold = df_train.iloc[ids_train_split].reset_index(drop=True)
    dataset_train = MelDataset(
        df_train_fold['path'],
        df_train_fold[labels].values,
        crop=CROP_LENGTH,
        crop_mode='random',
        mixup=True,
        freqmask=True,
        gain=True,
    )
    train_loader = DataLoader(
        dataset_train,
        batch_size=BATCH_SIZE,
Esempio n. 3
0
def main():
    global args, best_result, output_directory, train_csv, test_csv, device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    args = parser.parse_args()
    if args.modality == 'rgb' and args.num_samples != 0:
        print("number of samples is forced to be 0 when input modality is rgb")
        args.num_samples = 0
    if args.modality == 'rgb' and args.max_depth != 0.0:
        print("max depth is forced to be 0.0 when input modality is rgb/rgbd")
        args.max_depth = 0.0

    sparsifier = None
    max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf
    if args.sparsifier == UniformSampling.name:
        sparsifier = UniformSampling(num_samples=args.num_samples,
                                     max_depth=max_depth)
    elif args.sparsifier == SimulatedStereo.name:
        sparsifier = SimulatedStereo(num_samples=args.num_samples,
                                     max_depth=max_depth)

    # create results folder, if not already exists
    output_directory = os.path.join(
        'results',
        '{}.sparsifier={}.modality={}.arch={}.decoder={}.criterion={}.lr={}.bs={}'
        .format(args.data, sparsifier, args.modality, args.arch, args.decoder,
                args.criterion, args.lr, args.batch_size))
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().cuda()
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().cuda()
    out_channels = 1

    # Data loading code
    print("=> creating data loaders ...")
    traindir = os.path.join('data', args.data, 'train')
    valdir = os.path.join('data', args.data, 'val')

    train_dataset = NYUDataset(traindir,
                               type='train',
                               modality=args.modality,
                               sparsifier=sparsifier)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=None)

    # set batch size to be 1 for validation
    val_dataset = NYUDataset(valdir,
                             type='val',
                             modality=args.modality,
                             sparsifier=sparsifier)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    print("=> data loaders created.")

    # evaluation mode
    if args.evaluate:
        best_model_filename = os.path.join(output_directory,
                                           'model_best.pth.tar')
        if os.path.isfile(best_model_filename):
            print("=> loading best model '{}'".format(best_model_filename))
            checkpoint = torch.load(best_model_filename)
            args.start_epoch = checkpoint['epoch']
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            print("=> loaded best model (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no best model found at '{}'".format(best_model_filename))
        validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
        return

    # optionally resume from a checkpoint
    elif args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch'] + 1
            best_result = checkpoint['best_result']
            model = checkpoint['model']
            optimizer = checkpoint['optimizer']
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
            return

    # create new model
    else:
        # define model
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)
        if args.arch == 'resnet50':
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           in_channels=in_channels,
                           out_channels=out_channels,
                           pretrained=args.pretrained)
        print("=> model created.")

        optimizer = torch.optim.SGD(model.parameters(),
                                    args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

        # create new csv files with only header
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    # model = torch.nn.DataParallel(model).cuda()
    model = model.to(device)
    print(model)
    print("=> model transferred to GPU.")

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        result, img_merge = validate(val_loader, model, epoch)

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch)
Esempio n. 4
0
def train(k, epochs):

    model = ResNet(k=k)
    opt = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = nn.CrossEntropyLoss()

    if use_gpu:
        model.to('cuda')
        if use_horovod:
            # broadcast parameters and optimizer state from root device to other devices
            hvd.broadcast_parameters(model.state_dict(), root_rank=0)
            hvd.broadcast_optimizer_state(opt, root_rank=0)

            # Wraps the opimizer for multiGPU operation
            opt = hvd.DistributedOptimizer(
                opt, named_parameters=model.named_parameters(), op=hvd.Adasum)

    loss_dict = {'epoch': [], 'train': [], 'val': []}

    for epoch in range(epochs):
        train_loss = 0
        val_loss = 0

        # train block
        for img_batch, labels_batch in train_loader:
            if use_gpu:
                img_batch = img_batch.to('cuda')
                labels_batch = labels_batch.to('cuda')

            pred = model(img_batch)

            opt.zero_grad()
            loss = criterion(pred, labels_batch)
            loss.backward()
            opt.step()
            train_loss += loss.item()

        #val block
        with torch.no_grad():
            for img_batch, labels_batch in val_loader:
                if use_gpu:
                    img_batch = img_batch.to('cuda')
                    labels_batch = labels_batch.to('cuda')

                pred = model(img_batch)
                loss = criterion(pred, labels_batch)
                val_loss += loss.item()

        if use_horovod:
            train_loss = average_loss(train_loss, 'avg_train_loss')
            val_loss = average_loss(val_loss, 'avg_val_loss')

        loss_dict['epoch'].append(epoch + 1)
        loss_dict['train'].append(train_loss)
        loss_dict['val'].append(val_loss)

        print(",".join([
            "{}:{:.2f}".format(key, val[epoch])
            for key, val in loss_dict.items()
        ]))

    torch.save(model.state_dict(),
               "models/modelsdata/ResNet18_Cifar10_d{}.ckpt".format(k))
    save_obj(loss_dict,
             "models/modelsdata/losses/ResNet18_Cifar10_d{}".format(k))
    return loss_dict
def main():
    global args, best_result, output_directory, train_csv, test_csv, eval_csv, pnp
    pnp = args.pnp

    # evaluation mode
    start_epoch = 0
    if args.evaluate:
        args_new = args
        assert os.path.isfile(args.evaluate), \
        "=> no best model found at '{}'".format(args.evaluate)
        print("=> loading best model '{}'".format(args.evaluate))
        checkpoint = torch.load(args.evaluate)
        output_directory = os.path.dirname(args.evaluate)
        eval_csv = os.path.join(output_directory, 'eval.csv')

        with open(eval_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=eval_fieldnames)
            writer.writeheader()

        args = checkpoint['args']
        args.pnp = args_new.pnp
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
        args.evaluate = True
        for num_samples in range(2, 9):
            args.num_samples = int(10**(num_samples / 2))
            _, val_loader = create_data_loaders(args)
            validate(val_loader,
                     model,
                     checkpoint['epoch'],
                     write_to_file=True)

        return

    # optionally resume from a checkpoint
    elif args.resume:
        chkpt_path = args.resume
        args_new = args
        assert os.path.isfile(chkpt_path), \
            "=> no checkpoint found at '{}'".format(chkpt_path)
        print("=> loading checkpoint '{}'".format(chkpt_path))
        checkpoint = torch.load(chkpt_path)
        args = checkpoint['args']
        args.pnp = args_new.pnp
        start_epoch = checkpoint['epoch'] + 1
        best_result = checkpoint['best_result']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']
        output_directory = os.path.dirname(os.path.abspath(chkpt_path))
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        train_loader, val_loader = create_data_loaders(args)
        args.resume = True

    # create new model
    else:
        train_loader, val_loader = create_data_loaders(args)
        print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
        in_channels = len(args.modality)
        if args.arch == 'resnet50':
            model = ResNet(layers=50,
                           decoder=args.decoder,
                           output_size=train_loader.dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'resnet18':
            model = ResNet(layers=18,
                           decoder=args.decoder,
                           output_size=train_loader.dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'vgg16':
            model = VGGNet(layers=16,
                           decoder=args.decoder,
                           output_size=train_loader.dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        elif args.arch == 'vgg19':
            model = VGGNet(layers=19,
                           decoder=args.decoder,
                           output_size=train_loader.dataset.output_size,
                           in_channels=in_channels,
                           pretrained=args.pretrained)
        print("=> model created.")
        #change here
        optimizer = torch.optim.SGD(model.parameters(), args.lr, \
            momentum=args.momentum, weight_decay=args.weight_decay)

        # model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
        model = model.to(device)

    # define loss function (criterion) and optimizer
    if args.criterion == 'l2':
        criterion = criteria.MaskedMSELoss().to(device)
    elif args.criterion == 'l1':
        criterion = criteria.MaskedL1Loss().to(device)

    # create results folder, if not already exists
    output_directory = utils.get_output_directory(args)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    train_csv = os.path.join(output_directory, 'train.csv')
    test_csv = os.path.join(output_directory, 'test.csv')
    best_txt = os.path.join(output_directory, 'best.txt')

    # create new csv files with only header
    if not args.resume:
        with open(train_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
        with open(test_csv, 'w') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

    for epoch in range(start_epoch, args.epochs):
        utils.adjust_learning_rate(optimizer, epoch, args.lr)
        train(train_loader, model, criterion, optimizer,
              epoch)  # train for one epoch
        result, img_merge = validate(val_loader, model,
                                     epoch)  # evaluate on validation set

        # remember best rmse and save checkpoint
        is_best = result.rmse < best_result.rmse
        if is_best:
            best_result = result
            with open(best_txt, 'w') as txtfile:
                txtfile.write(
                    "epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n"
                    .format(epoch, result.mse, result.rmse, result.absrel,
                            result.lg10, result.mae, result.delta1,
                            result.gpu_time))
            if img_merge is not None:
                img_filename = output_directory + '/comparison_best.png'
                utils.save_image(img_merge, img_filename)

        utils.save_checkpoint(
            {
                'args': args,
                'epoch': epoch,
                'arch': args.arch,
                'model': model,
                'best_result': best_result,
                'optimizer': optimizer,
            }, is_best, epoch, output_directory)