Example #1
0
def train(args):
    Arguments.save_args(args, args.args_path)
    train_loader, val_loader, _ = get_dataloaders(args)
    model = UNetVgg16(n_classes=args.n_classes).to(args.device)
    optimizer = get_optimizer(args.optimizer, model)
    lr_scheduler = LRScheduler(args.lr_scheduler, optimizer)
    criterion = get_loss_fn(args.loss_type, args.ignore_index).to(args.device)
    model_saver = ModelSaver(args.model_path)
    recorder = Recorder(['train_miou', 'train_acc', 'train_loss',
                         'val_miou', 'val_acc', 'val_loss'])
    for epoch in range(args.n_epochs):
        print(f"{args.experim_name} Epoch {epoch+1}:")
        train_loss, train_acc, train_miou, train_ious = train_epoch(
            model=model,
            dataloader=train_loader,
            n_classes=args.n_classes,
            optimizer=optimizer,
            lr_scheduler=lr_scheduler,
            criterion=criterion,
            device=args.device,
        )
        print(f"train | mIoU: {train_miou:.3f} | accuracy: {train_acc:.3f} | loss: {train_loss:.3f}")
        val_loss, val_scores = eval_epoch(
            model=model,
            dataloader=val_loader,
            n_classes=args.n_classes,
            criterion=criterion,
            device=args.device,
        )
        val_miou, val_ious, val_acc = val_scores['mIoU'], val_scores['IoUs'], val_scores['accuracy']
        print(f"valid | mIoU: {val_miou:.3f} | accuracy: {val_acc:.3f} | loss: {val_loss:.3f}")
        recorder.update([train_miou, train_acc, train_loss, val_miou, val_acc, val_loss])
        recorder.save(args.record_path)
        if args.metric.startswith("IoU"):
            metric = val_ious[int(args.metric.split('_')[1])]
        else: metric = val_miou
        model_saver.save_models(metric, epoch+1, model,
                                ious={'train': train_ious, 'val': val_ious})

    print(f"best model at epoch {model_saver.best_epoch} with miou {model_saver.best_score:.5f}")
Example #2
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    with open(args.config) as f:
        config = yaml.load(f)

    for key in config:
        for k, v in config[key].items():
            setattr(args, k, v)

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    #print("=> creating model '{}'".format(args.model))
    #if 'se_resnext50_32x4d_v1_sn' in args.model:
    #    model = models.__dict__[args.model](using_moving_average = args.using_moving_average, last_gamma=args.last_gamma)
    #else:
    #    model = models.__dict__[args.model](using_moving_average=args.using_moving_average)
    #model = resnet18()
    model = ResNet18()
    #model = SENet18()

    if not args.distributed:
        model = torch.nn.DataParallel(model).cuda()
    else:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(), args.base_lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # auto resume from a checkpoint
    model_dir = args.model_dir
    start_epoch = 0
    if not os.path.exists(model_dir) :
        os.makedirs(model_dir)
    if args.evaluate:
        utils.load_state_ckpt(args.checkpoint_path, model)
    else:
        best_prec1, start_epoch = utils.load_state(model_dir, model, optimizer=optimizer)
    writer = SummaryWriter(model_dir)

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    val_loader = torch.utils.data.DataLoader(
      datasets.ImageFolder(valdir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
      ])),
      batch_size=args.batch_size, shuffle=False,
      num_workers=args.workers, pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, 0, writer)
        return

    train_dataset_multi_scale = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            ColorAugmentation(),
            normalize,
        ]))

    train_dataset = datasets.ImageFolder(
      traindir,
      transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        ColorAugmentation(),
        normalize,
      ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    train_loader_multi_scale = torch.utils.data.DataLoader(
        train_dataset_multi_scale, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    train_loader = torch.utils.data.DataLoader(
      train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
      num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    if not args.using_moving_average:
        train_dataset_snhelper = datasets.ImageFolder(
          traindir,
          transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
          ]))
        train_loader_snhelper = torch.utils.data.DataLoader(
          train_dataset_snhelper, batch_size=args.batch_size * torch.cuda.device_count(), shuffle=(train_sampler is None),
          #train_dataset_snhelper, batch_size=1, shuffle=(train_sampler is None),
          num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    niters = len(train_loader)

    lr_scheduler = LRScheduler(optimizer, niters, args)

    for epoch in range(start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        # train for one epoch
        if epoch < args.epochs - 5:
            train(train_loader_multi_scale, model, criterion, optimizer, lr_scheduler, epoch, writer)
        else:
            train(train_loader, model, criterion, optimizer, lr_scheduler, epoch, writer)

        if not args.using_moving_average:
            sn_helper(train_loader_snhelper, model)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch, writer)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        utils.save_checkpoint(model_dir, {
          'epoch': epoch + 1,
          'model': args.model,
          'state_dict': model.state_dict(),
          'best_prec1': best_prec1,
          'optimizer': optimizer.state_dict(),
        }, is_best)
Example #3
0
def open_sesemi():
    args = parse_args()
    network = args.network
    nb_extra = args.nb_extra
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id

    arg2var = {
        'convnet': convnet,
        'wrn': wrn,
    }

    # Load Tiny Images.
    # Code adapted from https://github.com/smlaine2/tempens
    with open('./datasets/tiny-images/tiny_index.pkl', 'rb') as f:
        tinyimg_index = pickle.load(f, encoding='latin1')

    if nb_extra == 237203:
        print("Using all classes common with CIFAR-100.")
        with open('./datasets/cifar-100/meta', 'rb') as f:
            cifar_labels = pickle.load(f,
                                       encoding='latin1')['fine_label_names']
        cifar_to_tinyimg = {'maple_tree': 'maple', 'aquarium_fish': 'fish'}
        cifar_labels = [
            l if l not in cifar_to_tinyimg else cifar_to_tinyimg[l]
            for l in cifar_labels
        ]
        load_indices = sum(
            [list(range(*tinyimg_index[label])) for label in cifar_labels], [])
    elif nb_extra == 500000:
        print("Using %d random images." % nb_extra)
        nb_tinyimages = max(e for s, e in tinyimg_index.values())
        load_indices = np.arange(nb_tinyimages)
        rng.shuffle(load_indices)
        load_indices = load_indices[:nb_extra]
        load_indices.sort()  # sorted for faster seeks.
    else:
        raise ValueError('`--extra` must be integer 237203 or 500000.')

    print("Loading %d auxiliary unlabeled Tiny Images." % len(load_indices))
    z_train = load_tinyimages(load_indices)

    # Load CIFAR-100.
    (x_train, y_train), (x_test, y_test) = cifar100.load_data()

    x_test = global_contrast_normalize(x_test)
    x_train = global_contrast_normalize(x_train)
    z_train = global_contrast_normalize(z_train)

    zca_whiten = zca_whitener(np.concatenate([x_train, z_train], axis=0))
    x_test = zca_whiten(x_test)
    x_train = zca_whiten(x_train)
    z_train = zca_whiten(z_train)

    x_test = x_test.reshape((len(x_test), 32, 32, 3))
    x_train = x_train.reshape((len(x_train), 32, 32, 3))
    z_train = z_train.reshape((len(z_train), 32, 32, 3))

    y_train = to_categorical(y_train)

    # Shared training parameters.
    zca = True
    hflip = True
    epochs = 50
    base_lr = 0.05
    batch_size = 8
    nb_classes = 100
    lr_decay_power = 0.5
    super_dropout = 0.2
    in_network_dropout = 0.0
    input_shape = (32, 32, 3)
    max_iter = (len(x_train) // batch_size) * epochs

    # Compile the SESEMI model.
    sesemi_model, inference_model = compile_sesemi(arg2var[network],
                                                   input_shape, nb_classes,
                                                   base_lr, in_network_dropout,
                                                   super_dropout)
    print(sesemi_model.summary())

    lr_poly_decay = LRScheduler(base_lr, max_iter, lr_decay_power)
    evaluate = DenseEvaluator(inference_model, (x_test, y_test),
                              hflip,
                              oversample=True)

    super_datagen = ImageDataGenerator(
        width_shift_range=[-2, -1, 0, 1, 2],
        height_shift_range=[-2, -1, 0, 1, 2],
        horizontal_flip=hflip,
        preprocessing_function=gaussian_noise,
        fill_mode='reflect',
    )
    self_datagen = ImageDataGenerator(
        width_shift_range=[-2, -1, 0, 1, 2],
        height_shift_range=[-2, -1, 0, 1, 2],
        horizontal_flip=False,
        preprocessing_function=gaussian_noise,
        fill_mode='reflect',
    )

    super_data = super_datagen.flow(x_train,
                                    y_train,
                                    shuffle=True,
                                    batch_size=1,
                                    seed=None)
    self_data = self_datagen.flow(x_train,
                                  shuffle=True,
                                  batch_size=1,
                                  seed=None)
    extra_data = self_datagen.flow(z_train,
                                   shuffle=True,
                                   batch_size=1,
                                   seed=None)
    train_data_loader = datagen_tinyimages(super_data, self_data, extra_data,
                                           batch_size)

    # Fit the SESEMI model on mini-batches with data augmentation.
    print('Run configuration:')
    print('network=%s,' % network, 'ZCA=%s,' % zca, 'nb_epochs=%d,' % epochs, \
          'horizontal_flip=%s,' % hflip, 'nb_extra=%d,' % len(z_train), \
          'batch_size=%d,' % batch_size, 'gpu_id=%s' % args.gpu_id)
    sesemi_model.fit_generator(
        train_data_loader,
        epochs=epochs,
        verbose=1,
        steps_per_epoch=len(x_train) // batch_size,
        callbacks=[lr_poly_decay, evaluate],
    )
Example #4
0
def main():
    # Device configuration
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # Load config
    print(args.config, flush=True)
    with open(args.config, 'r') as f:
        opt = yaml.safe_load(f)
    for k, v in opt.items():
        print('{} : {}'.format(k, v), flush=True)
        setattr(args, k, v)
    if not os.path.exists(args.model_save_path):
        os.mkdir(args.model_save_path)

    # Train
    if not args.evaluate and not args.val:
        # Dataset
        train_dataset = HFDataset(args.data_root,
                                  args.train_csv_path,
                                  args.data_lens,
                                  train=True)
        val_dataset = HFDataset(args.data_root,
                                args.validation_csv_path,
                                args.data_lens,
                                train=False)
        # Data loader
        train_loader = torch.utils.data.DataLoader(
            dataset=train_dataset,
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.num_workers)
        val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                                 batch_size=32,
                                                 shuffle=False,
                                                 num_workers=1)
        # Model
        model = getattr(models, args.model)(args.num_classes).to(device)
        if args.load_model_path:
            model.load(args.load_model_path)

        # Loss and optimizer
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.base_lr,
                                     weight_decay=args.weight_decay)
        if args.loss == 'weighted_binary_crossentropy':
            weight = train_dataset.weight
            weight = torch.Tensor(weight).unsqueeze(0).to(device)
            criterion = getattr(loss, args.loss)(weight)
        else:
            criterion = getattr(loss, args.loss)

        niters = len(train_loader)
        lr_scheduler = LRScheduler(optimizer, niters, args)

        # Run
        train(train_loader, val_loader, model, optimizer, criterion,
              lr_scheduler, device, args)
    # Val
    elif args.val:
        val_dataset = HFDataset(args.data_root,
                                args.validation_csv_path,
                                args.data_lens,
                                train=False)
        val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                                 batch_size=1,
                                                 shuffle=False,
                                                 num_workers=1)
        model = getattr(models, args.model)(args.num_classes).to(device)
        assert args.load_model_path
        model.load(args.load_model_path)
        acc_val = val(val_loader, model, device, flip=args.flip)
        print('Validation Accuracy: {} %'.format(acc_val), flush=True)
    # Test
    elif args.evaluate:
        model = getattr(models, args.model)(args.num_classes).to(device)
        assert args.load_model_path
        model.load(args.load_model_path)
        test(model, args, device)
    return
        pred = np.argmax(output.data, axis=1)
        acc = np.sum(pred == labels.data)
        total_acc += acc
        total_num += labels.shape[0]

    acc = total_acc / total_num
    return acc


if __name__ == '__main__':
    freeze_random_seed()

    net = PointNet(n_classes=40)
    optimizer = nn.Adam(net.parameters(), lr=1e-3)

    lr_scheduler = LRScheduler(optimizer)

    batch_size = 32
    train_dataloader = ModelNet40(n_points=4096, batch_size=batch_size, train=True, shuffle=True)
    val_dataloader = ModelNet40(n_points=4096, batch_size=batch_size, train=False, shuffle=False)

    step = 0
    best_acc = 0
    for epoch in range(1000):
        lr_scheduler.step(len(train_dataloader) * batch_size)

        train(net, optimizer, epoch, train_dataloader)
        acc = evaluate(net, epoch, val_dataloader)

        best_acc = max(best_acc, acc)
        print(f'val acc={acc:.4f}, best={best_acc:.4f}')
Example #6
0
def open_sesemi():
    args = parse_args()
    network = args.network
    dataset = args.dataset
    nb_labels = args.nb_labels
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id

    arg2var = {
        'convnet': convnet,
        'wrn': wrn,
        'nin': nin,
        'svhn': svhn,
        'cifar10': cifar10,
        'cifar100': cifar100,
    }

    # Experiment- and dataset-dependent parameters.
    zca = True
    hflip = True
    epochs = 50
    if dataset in {'svhn', 'cifar10'}:
        if dataset == 'svhn':
            zca = False
            hflip = False
            epochs = 30
        nb_classes = 10
    elif dataset == 'cifar100':
        nb_classes = 100
    else:
        raise ValueError('`dataset` must be "svhn", "cifar10", "cifar100".')
    super_dropout = 0.2
    in_network_dropout = 0.0
    if network == 'convnet' and dataset == 'svhn':
        super_dropout = 0.5
        in_network_dropout = 0.5
    elif network == 'wrn' and dataset == 'svhn':
        super_dropout = 0.5

    # Prepare the dataset.
    (x_train, y_train), (x_test, y_test) = arg2var[dataset].load_data()

    x_test = global_contrast_normalize(x_test)
    x_train = global_contrast_normalize(x_train)

    if zca:
        zca_whiten = zca_whitener(x_train)
        x_train = zca_whiten(x_train)
        x_test = zca_whiten(x_test)

    x_test = x_test.reshape((len(x_test), 32, 32, 3))
    x_train = x_train.reshape((len(x_train), 32, 32, 3))

    if nb_labels in {50000, 73257}:
        x_labeled = x_train
        y_labeled = y_train
    else:
        labels_per_class = nb_labels // nb_classes
        sample_inds = stratified_sample(y_train, labels_per_class)
        x_labeled = x_train[sample_inds]
        y_labeled = y_train[sample_inds]

    y_labeled = to_categorical(y_labeled)

    # Shared training parameters.
    base_lr = 0.05
    batch_size = 16
    lr_decay_power = 0.5
    input_shape = (32, 32, 3)
    max_iter = (len(x_train) // batch_size) * epochs

    # Compile the SESEMI model.
    sesemi_model, inference_model = compile_sesemi(arg2var[network],
                                                   input_shape, nb_classes,
                                                   base_lr, in_network_dropout,
                                                   super_dropout)
    print(sesemi_model.summary())

    lr_poly_decay = LRScheduler(base_lr, max_iter, lr_decay_power)
    evaluate = DenseEvaluator(inference_model, (x_test, y_test),
                              hflip,
                              oversample=True)

    super_datagen = ImageDataGenerator(
        width_shift_range=[-2, -1, 0, 1, 2],
        height_shift_range=[-2, -1, 0, 1, 2],
        horizontal_flip=hflip,
        preprocessing_function=gaussian_noise,
        fill_mode='reflect',
    )
    self_datagen = ImageDataGenerator(
        width_shift_range=[-2, -1, 0, 1, 2],
        height_shift_range=[-2, -1, 0, 1, 2],
        horizontal_flip=False,
        preprocessing_function=gaussian_noise,
        fill_mode='reflect',
    )

    super_data = super_datagen.flow(x_labeled,
                                    y_labeled,
                                    shuffle=True,
                                    batch_size=1,
                                    seed=None)
    self_data = self_datagen.flow(x_train,
                                  shuffle=True,
                                  batch_size=1,
                                  seed=None)
    train_data_loader = datagen(super_data, self_data, batch_size)

    # Fit the SESEMI model on mini-batches with data augmentation.
    print('Run configuration:')
    print('network=%s,' % network, 'dataset=%s,' % dataset, \
          'horizontal_flip=%s,' % hflip, 'ZCA=%s,' % zca, \
          'nb_epochs=%d,' % epochs, 'batch_size=%d,' % batch_size, \
          'nb_labels=%d,' % len(y_labeled), 'gpu_id=%s' % args.gpu_id)
    sesemi_model.fit_generator(
        train_data_loader,
        epochs=epochs,
        verbose=1,
        steps_per_epoch=len(x_train) // batch_size,
        callbacks=[lr_poly_decay, evaluate],
    )
def main():
    args = parse_args()
    network = args.network
    dataset = args.dataset
    nb_labels = args.nb_labels
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_id)
    
    arg2var = {'convnet': convnet,
               'wrn': wrn,
			   'resnet50v2':resnet50v2,
               'svhn': svhn,
               'cifar10': cifar10,
               'cifar100': cifar100,}
    
    # Dataset-specific parameters
    hflip = True
    zca = True
    epochs = 10
    if dataset in ['svhn', 'cifar10']:
        if dataset == 'svhn':
            hflip = False
            zca = False
            epochs = 30
        nb_classes = 10
    elif dataset == 'cifar100':
        nb_classes = 100
    else:
        raise ValueError('`dataset` must be "svhn", "cifar10", "cifar100".')

    (x_train, y_train), (x_test, y_test) = arg2var[dataset].load_data()

    x_train = global_contrast_normalize(x_train)
    x_test = global_contrast_normalize(x_test)
    
    if zca:
        zca_whiten = zca_whitener(x_train)
        x_train = zca_whiten(x_train)
        x_test = zca_whiten(x_test)

    x_train = x_train.reshape((len(x_train), 32, 32, 3))
    x_test = x_test.reshape((len(x_test), 32, 32, 3))
    
    labels_per_class = nb_labels // nb_classes
    if nb_labels == 73257:
        labels_per_class = 1000000
    sample_inds = stratified_sample(y_train, labels_per_class)
    x_labeled = x_train[sample_inds]
    y_labeled = y_train[sample_inds]
    y_labeled = to_categorical(y_labeled)
    
    # Training parameters
    input_shape = (32, 32, 3)
    batch_size = 32
    base_lr = 0.05
    lr_decay_power = 0.5
    dropout_rate = 0.2
    max_iter = (len(x_train) // batch_size) * epochs

    sesemi_model, inference_model = open_sesemi(
        arg2var[network], input_shape, nb_classes, base_lr, dropout_rate)
    print(sesemi_model.summary())

    super_datagen = ImageDataGenerator(
            width_shift_range=3,
            height_shift_range=3,
            horizontal_flip=hflip,
            preprocessing_function=gaussian_noise,
            fill_mode='reflect',
        )
    self_datagen = ImageDataGenerator(
            width_shift_range=3,
            height_shift_range=3,
            horizontal_flip=False,
            preprocessing_function=gaussian_noise,
            fill_mode='reflect',
        )

    super_data = super_datagen.flow(
            x_labeled, y_labeled, shuffle=True, batch_size=1, seed=None)
    self_data = self_datagen.flow(
            x_train, shuffle=True, batch_size=1, seed=None)
    train_data_loader = datagen(super_data, self_data, batch_size)

    lr_poly_decay = LRScheduler(base_lr, max_iter, lr_decay_power)
    evaluate = DenseEvaluator(inference_model, (x_test, y_test), hflip)
    
    # Fit the SESEMI model on mini-batches with data augmentation
    print('Run configuration:')
    print('network=%s,' % network, 'dataset=%s,' % dataset, \
          'horizontal_flip=%s,' % hflip, 'ZCA=%s,' % zca, \
          'nb_epochs=%d,' % epochs, 'batch_size=%d,' % batch_size, \
          'nb_labels=%d,' % len(x_labeled), 'gpu_id=%d' % args.gpu_id)
    sesemi_model.fit_generator(train_data_loader,
                               epochs=epochs, verbose=1,
                               steps_per_epoch=len(x_train) // batch_size,
                               callbacks=[lr_poly_decay, evaluate],)
    return
Example #8
0
def main():
    global args, best_acc

    with open(args.config) as f:
        try:
            config = yaml.load(f, Loader=yaml.FullLoader)
        except:
            print('###################################################')
            print('Please update pyyaml >= 5.1')
            print('###################################################')
            config = yaml.load(f)

    for k, v in config['common'].items():
        setattr(args, k, v)

    if args.val_freq is None:
        args.val_freq = args.save_freq

    if not os.path.exists(args.save_path):
        print('Create {}.'.format(args.save_path))
        os.makedirs(args.save_path)

    print('###################################################')
    print('Parameters')
    print(args)
    print('###################################################')

    # Data loading code
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    if args.dataset == 'cifar10':
        dataloader = CIFAR10
    else:
        dataloader = CIFAR100

    train_dataset = dataloader(root=args.train_root,
                               train=True,
                               transform=transform_train)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.train_batch,
                                   shuffle=True,
                                   num_workers=args.workers)

    testset = dataloader(root=args.val_root,
                         train=False,
                         transform=transform_test)
    val_loader = data.DataLoader(testset,
                                 batch_size=args.test_batch,
                                 shuffle=False,
                                 num_workers=args.workers)
    if '18' in args.arch:
        model = ResNet18()
        print('Using ResNet18 for training')
    elif '50' in args.arch:
        model = ResNet50()
        print('Using ResNet50 for training')
    else:
        model = resnet20()
        print('Using resnet20 for training')
    model.cuda()
    cudnn.benchmark = True
    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = AdaXW(model.parameters(),
                      lr=args.base_lr,
                      weight_decay=args.weight_decay)
    # Resume
    ckpt_path = os.path.join(args.save_path, 'ckpt')
    if not os.path.exists(ckpt_path):
        print('Create {}.'.format(ckpt_path))
        os.makedirs(ckpt_path)

    weight_file = get_last_weights(ckpt_path + '/ckpt_step*.pth')
    last_step = 0
    if len(weight_file) == 0:
        print('No ckpt for resuming.')
    else:
        # Load checkpoint.
        print('==> Resuming from {}.'.format(weight_file))
        assert os.path.isfile(
            weight_file), 'Error: no checkpoint directory found!'
        # checkpoint = torch.load(weight_file)
        checkpoint = load_state(weight_file, model)
        model.load_state_dict(checkpoint['state_dict'])
        if not args.evaluate:
            last_step = checkpoint['step']
            optimizer.load_state_dict(checkpoint['optimizer'])

    if args.evaluate:
        print('\nEvaluation only')
        test_loss, test_top1, test_top5 = validate(val_loader, model,
                                                   criterion)
        print(' Test Loss:  %.8f, Test Acc.top1:  %.2f, Test Acc.top5:  %.2f' %
              (test_loss, test_top1, test_top5))
        return

    # Tensorboard
    tb_logger = SummaryWriter(args.save_path + '/tf_log')
    if not os.path.exists(args.save_path + '/tf_log'):
        print('Create {}.'.format(args.save_path + '/tf_log'))
        os.makedirs(args.save_path + '/tf_log')

    T_max = int(len(train_dataset) * args.epochs / args.train_batch)
    try:
        T_max = max(T_max, args.max_iter)
    except:
        pass
    if args.lr_multi:
        lr_multi = float(8) * args.train_batch / (8. * 32)
    else:
        lr_multi = 1.0
    print('Totally train {} steps.'.format(T_max))
    lr_scheduler = LRScheduler(max_steps=T_max,
                               optimizer=optimizer,
                               lr_mult=lr_multi,
                               args=args)
    # Train and val
    _ = train(train_loader, val_loader, model, criterion, optimizer, last_step,
              T_max, lr_scheduler, tb_logger)
def main(**kwargs):
    # 1. Parse command line arguments.
    opt._parse(kwargs)

    # 2. Visdom
    # vis = Visualizer(env=opt.env)

    # 3. GPU settings
    # n_gpu = utils.set_gpu('0,1')

    # 4. Configure model
    logging.info('==> Traing model for clothing type: {}'.format(opt.category))
    cudnn.benchmark = True
    net = getattr(models, opt.model)(opt)

    # 5. Initialize logger
    cur_time = time.strftime('%Y-%m-%dT%H:%M:%S', timm.localtime())
    initialize_logger(f'{opt.category}_{opt.model}_{cur_time}')

    # 6. Initialize checkpoints directory
    lr = opt.lr
    start_epoch = 1
    best_val_loss = float('inf')

    if opt.load_checkpoint_path:
        logging.info('==> Resuming from checkpoint...')
        checkpoint = torch.load(opt.load_checkpoint_path)
        start_epoch = checkpoint['epoch'] + 1
        lr = checkpoint['lr']
        best_val_loss = checkpoint['best_val_loss']
        net.load_state_dict(checkpoint['state_dict'])

    # 7. Data setup
    train_dataset = FashionAIKeypoints(opt, phase='train')
    logging.info('Train sample number: {}'.format(len(train_dataset)))
    train_loader = DataLoader(train_dataset,
                              batch_size=opt.batch_size,
                              shuffle=True,
                              num_workers=opt.num_workers,
                              collate_fn=train_dataset.collate_fn,
                              pin_memory=True)

    val_dataset = FashionAIKeypoints(opt, phase='val')
    logging.info('Val sample number: {}'.format(len(val_dataset)))
    val_loader = DataLoader(val_dataset,
                            batch_size=opt.batch_size,
                            shuffle=False,
                            num_workers=opt.num_workers,
                            collate_fn=val_dataset.collate_fn,
                            pin_memory=True)

    net = net.cuda()
    # net = DataParallel(net)
    loss = CPNLoss()
    loss = loss.cuda()

    # 8. Loss, optimizer and LR scheduler
    optimizer = torch.optim.SGD(net.parameters(),
                                lr,
                                momentum=0.9,
                                weight_decay=1e-4)
    lrs = LRScheduler(lr,
                      patience=3,
                      factor=0.1,
                      min_lr=0.01 * lr,
                      best_loss=best_val_loss)

    # 9. Training loop
    for epoch in range(start_epoch, opt.max_epochs + 1):
        # Training
        logging.info("Start training loop...")
        train_metrics, train_time = train(train_loader, net, loss, optimizer,
                                          lr)

        # Validating
        logging.info("Start validating loop...")
        with torch.no_grad():
            val_metrics, val_time = validate(val_loader, net, loss)

        log_model(epoch, lr, train_metrics, train_time, val_metrics, val_time)

        val_loss = np.mean(val_metrics[:, 0])
        lr = lrs.update_by_rule(val_loss)

        # Save checkpoints
        if val_loss < best_val_loss or epoch % 10 == 0 or lr is None:
            if val_loss < best_val_loss:
                best_val_loss = val_loss

            state_dict = net.module.state_dict()

            for key in state_dict.keys():
                state_dict[key] = state_dict[key].cpu()

            torch.save(
                {
                    'epoch': epoch,
                    'save_dir': opt.checkpoint_path,
                    'state_dict': state_dict,
                    'lr': lr,
                    'best_val_loss': best_val_loss
                }, opt.checkpoint_path /
                'kpt_{}_{:03d}.ckpt'.format(opt.category, epoch))

        if lr is None:
            logging.info('Training is early-stopped')
            break
Example #10
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)

    for key in config:
        for k, v in config[key].items():
            setattr(args, k, v)






    proc = subprocess.Popen( 'mmdownload -f '+args.f+' -n '+args.n+' -o ./models/'+args.f+'_'+args.n+'/' , shell=True, executable='/bin/bash')
    proc.communicate()
    proc = subprocess.Popen( 'mmconvert -sf '+args.srcFramework+' --inputShape 224,224,3 -in models/'+args.f+'_'+args.n+'/imagenet_'+args.n+'.ckpt.meta -iw models/'+args.f+'_'+args.n+'/imagenet_'+args.n+'.ckpt --dstNodeName MMdnn_Output -df '+args.dstFramework+' -om models/'+args.n+'.pth' , shell=True, executable='/bin/bash')
    proc.communicate()

    MainModel = imp.load_source('MainModel', "models/"+args.model+".py")
    model = torch.load("models/"+args.n+".pth")



    if args.cuda:
        model = torch.nn.DataParallel(model).cuda()
        criterion = nn.CrossEntropyLoss().cuda()
    else:
        model=model.cpu()
        criterion = nn.CrossEntropyLoss().cpu()


    # define loss function (criterion) and optimizer


    optimizer = torch.optim.SGD(model.parameters(), args.base_lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # auto resume from a checkpoint
    model_dir = args.model_dir
    start_epoch = 0
    if not os.path.exists(model_dir) :
        os.makedirs(model_dir)
    if args.evaluate:
        pass


    else:
        best_prec1, start_epoch = utils.load_state(model_dir, model, optimizer=optimizer)
    writer = SummaryWriter(model_dir)

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    val_loader = torch.utils.data.DataLoader(
      datasets.ImageFolder(valdir, transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
      ])),
      batch_size=args.batch_size, shuffle=False,
      num_workers=args.workers, pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, 0, writer)
        return

    train_dataset_multi_scale = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        #    ColorAugmentation(),
            normalize,
        ]))

    train_dataset = datasets.ImageFolder(
      traindir,
      transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        #ColorAugmentation(),
        normalize,
      ]))


    train_sampler = None

    train_loader_multi_scale = torch.utils.data.DataLoader(
        train_dataset_multi_scale, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    train_loader = torch.utils.data.DataLoader(
      train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
      num_workers=args.workers, pin_memory=True, sampler=train_sampler)




    niters = len(train_loader)

    lr_scheduler = LRScheduler(optimizer, niters, args)

    for epoch in range(start_epoch, args.epochs):


        # train for one epoch
        if epoch < args.epochs - 5:
            train(train_loader_multi_scale, model, criterion, optimizer, lr_scheduler, epoch, writer)
        else:
            train(train_loader, model, criterion, optimizer, lr_scheduler, epoch, writer)




        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch, writer)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        utils.save_checkpoint(model_dir, {
          'epoch': epoch + 1,
          'model': args.model,
          'state_dict': model.state_dict(),
          'best_prec1': best_prec1,
          'optimizer': optimizer.state_dict(),
        }, is_best)