Exemple #1
0
def main():
    net = Network()
    train(
        net=net,
        criterion=nn.CrossEntropyLoss(),
        optimizer=optim.RMSprop(net.parameters(), lr=0.001),
        num_epochs=NUM_EPOCHS
    )
    validate(net)
Exemple #2
0
def load_state(T, chkpt_path, device, network_temp=2):
    net = Network(T, temp=network_temp).to(device)
    optimizer = Adam(net.parameters(), lr=1e-4, weight_decay=1e-4)

    if chkpt_path is not None and os.path.exists(chkpt_path):
        checkpoint = torch.load(chkpt_path, map_location=torch.device(device))
        net.load_state_dict(checkpoint[MODEL_KEY])
        optimizer.load_state_dict(checkpoint[OPTIMIZER_KEY])
        games_trained = checkpoint[GAMES_TRAINED_KEY]
        replay_mem = checkpoint[REPLAY_MEM_KEY]
    else:
        games_trained = 0
        replay_mem = ReplayMemory()

    return net, optimizer, games_trained, replay_mem
Exemple #3
0
def main():
    save_dir = join(save_root, args.save_dir)
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    if args.gpu is not None:
        print(('Using GPU %d' % args.gpu))
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    else:
        print('CPU mode')

    print('Process number: %d' % (os.getpid()))

    ## DataLoader initialize ILSVRC2012_train_processed
    trainpath = join(args.data_path, args.domain)
    train_data = DataLoader(trainpath,
                            split='train',
                            classes=args.classes,
                            ssl=True)
    train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                               batch_size=args.batch,
                                               shuffle=True,
                                               num_workers=args.cores)

    valpath = join(args.data_path, args.domain)
    val_data = DataLoader(valpath, split='validation', classes=args.classes)
    val_loader = torch.utils.data.DataLoader(dataset=val_data,
                                             batch_size=args.batch,
                                             shuffle=True,
                                             num_workers=args.cores)

    iter_per_epoch = train_data.N / args.batch
    print('Images: train %d, validation %d' % (train_data.N, val_data.N))

    # Network initialize
    net = Network(args.classes)
    if args.gpu is not None:
        net.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(),
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=5e-4)

    logger = Logger(join(save_root, args.save_dir, 'train'))
    logger_test = Logger(join(save_root, args.save_dir, 'test'))

    ############## TESTING ###############
    if args.evaluate:
        test(net, criterion, None, val_loader, 0)
        return

    ############## TRAINING ###############
    print(('Start training: lr %f, batch size %d, classes %d' %
           (args.lr, args.batch, args.classes)))
    print(('Checkpoint: ' + args.save_dir))

    # Train the Model
    batch_time, net_time = [], []
    steps = args.iter_start
    best_acc = -1
    for epoch in range(args.epochs):
        if epoch % 10 == 0 and epoch > 0:
            net, acc = test(net, criterion, logger_test, val_loader, steps)
            if (best_acc < acc):
                net.save(join(save_dir, 'best_model.pth'))
        lr = adjust_learning_rate(optimizer,
                                  epoch,
                                  init_lr=args.lr,
                                  step=20,
                                  decay=0.1)

        end = time()
        for i, (images, labels, original) in enumerate(train_loader):
            batch_time.append(time() - end)
            if len(batch_time) > 100:
                del batch_time[0]

            images = Variable(images)
            labels = Variable(labels)
            if args.gpu is not None:
                images = images.cuda()
                labels = labels.cuda()

            # Forward + Backward + Optimize
            optimizer.zero_grad()
            t = time()
            outputs = net(images)
            net_time.append(time() - t)
            if len(net_time) > 100:
                del net_time[0]

            prec1, prec5 = compute_accuracy(outputs.cpu().data,
                                            labels.cpu().data,
                                            topk=(1, 5))
            # acc = prec1[0]
            acc = prec1

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            loss = float(loss.cpu().data.numpy())

            steps += 1

            if steps % 1000 == 0:
                filename = join(save_dir, ('%06d.pth.tar' % (steps)))
                net.save(filename)
                print('Saved: ' + args.save_dir)

            end = time()

    ###########################################################################################################
    #                                   classifier finetune                                                   #
    ###########################################################################################################
    finetune_model = Network(65)
    pretrained_dict = {
        k: v
        for k, v in net.state_dict().items()
        if k in finetune_model.state_dict()
    }
    finetune_model.state_dict().update(pretrained_dict)