def main(): net = Network() train( net=net, criterion=nn.CrossEntropyLoss(), optimizer=optim.RMSprop(net.parameters(), lr=0.001), num_epochs=NUM_EPOCHS ) validate(net)
def load_state(T, chkpt_path, device, network_temp=2): net = Network(T, temp=network_temp).to(device) optimizer = Adam(net.parameters(), lr=1e-4, weight_decay=1e-4) if chkpt_path is not None and os.path.exists(chkpt_path): checkpoint = torch.load(chkpt_path, map_location=torch.device(device)) net.load_state_dict(checkpoint[MODEL_KEY]) optimizer.load_state_dict(checkpoint[OPTIMIZER_KEY]) games_trained = checkpoint[GAMES_TRAINED_KEY] replay_mem = checkpoint[REPLAY_MEM_KEY] else: games_trained = 0 replay_mem = ReplayMemory() return net, optimizer, games_trained, replay_mem
def main(): save_dir = join(save_root, args.save_dir) if not os.path.isdir(save_dir): os.makedirs(save_dir) if args.gpu is not None: print(('Using GPU %d' % args.gpu)) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) else: print('CPU mode') print('Process number: %d' % (os.getpid())) ## DataLoader initialize ILSVRC2012_train_processed trainpath = join(args.data_path, args.domain) train_data = DataLoader(trainpath, split='train', classes=args.classes, ssl=True) train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=args.batch, shuffle=True, num_workers=args.cores) valpath = join(args.data_path, args.domain) val_data = DataLoader(valpath, split='validation', classes=args.classes) val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=args.batch, shuffle=True, num_workers=args.cores) iter_per_epoch = train_data.N / args.batch print('Images: train %d, validation %d' % (train_data.N, val_data.N)) # Network initialize net = Network(args.classes) if args.gpu is not None: net.cuda() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) logger = Logger(join(save_root, args.save_dir, 'train')) logger_test = Logger(join(save_root, args.save_dir, 'test')) ############## TESTING ############### if args.evaluate: test(net, criterion, None, val_loader, 0) return ############## TRAINING ############### print(('Start training: lr %f, batch size %d, classes %d' % (args.lr, args.batch, args.classes))) print(('Checkpoint: ' + args.save_dir)) # Train the Model batch_time, net_time = [], [] steps = args.iter_start best_acc = -1 for epoch in range(args.epochs): if epoch % 10 == 0 and epoch > 0: net, acc = test(net, criterion, logger_test, val_loader, steps) if (best_acc < acc): net.save(join(save_dir, 'best_model.pth')) lr = adjust_learning_rate(optimizer, epoch, init_lr=args.lr, step=20, decay=0.1) end = time() for i, (images, labels, original) in enumerate(train_loader): batch_time.append(time() - end) if len(batch_time) > 100: del batch_time[0] images = Variable(images) labels = Variable(labels) if args.gpu is not None: images = images.cuda() labels = labels.cuda() # Forward + Backward + Optimize optimizer.zero_grad() t = time() outputs = net(images) net_time.append(time() - t) if len(net_time) > 100: del net_time[0] prec1, prec5 = compute_accuracy(outputs.cpu().data, labels.cpu().data, topk=(1, 5)) # acc = prec1[0] acc = prec1 loss = criterion(outputs, labels) loss.backward() optimizer.step() loss = float(loss.cpu().data.numpy()) steps += 1 if steps % 1000 == 0: filename = join(save_dir, ('%06d.pth.tar' % (steps))) net.save(filename) print('Saved: ' + args.save_dir) end = time() ########################################################################################################### # classifier finetune # ########################################################################################################### finetune_model = Network(65) pretrained_dict = { k: v for k, v in net.state_dict().items() if k in finetune_model.state_dict() } finetune_model.state_dict().update(pretrained_dict)