def validation_func(): model.eval() if inference_model._executable: inference_model.attachToDevice() val_acc = test(inference_model, test_data, opts) inference_model.detachFromDevice() return val_acc
__author__ = 'Augustyn' import sharedtools import compileaddress import fulltextsearch import geocode import nearbyaddresses import validate import validateaddressid if __name__ == '__main__': sharedtools.setupUTF() tester = sharedtools.FormalTester("VÅ¡echny testy") compileaddress.test(tester) fulltextsearch.test(tester) geocode.test(tester) nearbyaddresses.test(tester) validate.test(tester) validateaddressid.test(tester) tester.saveToHTML("TestResults.html") compileaddress.test() fulltextsearch.test() geocode.test() nearbyaddresses.test() validate.test() validateaddressid.test()
if args.mode == 'train': if not os.path.exists('checkpoint/' + args.exp_name): os.makedirs('checkpoint/' + args.exp_name) if args.resume: args.model_path = os.path.join('checkpoint', args.exp_name, '{}_{}_{}.pth.tar'.format(\ args.arch, args.resume_episode, args.resume_epoch)) model, optimizer, dataset = setup_model(args) train(args, model, optimizer, dataset=dataset) elif args.mode == 'test': args.model_path = os.path.join('checkpoint', args.exp_name, '{}_{}_{}.pth.tar'.format(\ args.arch, args.ckpt_episode, args.ckpt_epoch)) model = setup_model(args) # Setup output directories if args.save_output: setup_output(args) # run validate gts, preds, uncts = test(args, model, split=args.split, verbose=True) # eval metrics eval_metrics(args, gts, preds) if args.save_output: analysis(args) else: print 'please choose mode [train, test]'
def train(args, model, optimizer, dataset, episode=0): trainloader = data.DataLoader(dataset, batch_size=args.batch_size, num_workers=8, shuffle=True, drop_last=True) class_weight = Variable(dataset.class_weight.cuda()) lr = args.l_rate n_epoch = args.n_epoch optimizer.param_groups[0]['lr'] = args.l_rate model.train() # Setup visdom for visualization if args.visdom: vis = visdom.Visdom(port=args.visdom) loss_window = vis.line(X=np.column_stack((np.zeros((1,)))), Y=np.column_stack((np.zeros((1)))), opts=dict(xlabel='epoch', ylabel='Loss', title=args.mode + '_' + args.exp_name + '_Episode_' + str(episode), legend=['Train Loss'])) t1 = time.time() start_epoch = args.start_epoch if episode == args.start_episode else 0 best_iou = -100.0 save_interval = int(floor(n_epoch*args.save_percent)) for epoch in range(1 + start_epoch, n_epoch + 1): utils.adjust_learning_rate(optimizer, args.l_rate, args.lr_decay, epoch - 1, 1) for i, (images, labels, image_name) in enumerate(trainloader): images = Variable(images.cuda()) labels = Variable(labels.cuda(async=True)) optimizer.zero_grad() outputs = model(images) loss = cross_entropy2d(outputs, labels, class_weight) loss.backward() optimizer.step() if epoch % (save_interval*args.eval_interval) == 0: gts, preds, uncts = test(args, model=model, split='val') model.train() _, score = eval_metrics(args, gts, preds, verbose=False) print 'val Mean IoU: ', score['Mean IoU : \t'] if score['Mean IoU : \t'] >= best_iou: best_iou = score['Mean IoU : \t'] state = {'episode': episode, 'epoch': epoch, 'model_state': model.state_dict(), 'optimizer_state' : optimizer.state_dict(),} print "update best model {}".format(best_iou) torch.save(state, "checkpoint/{}/{}_{}_{}_best_model.pkl".format(\ args.exp_name, args.arch, 'camvid', episode)) utils.adjust_learning_rate(optimizer, args.l_rate, args.lr_decay, epoch - 1, 1) if epoch % save_interval == 0: print 'data_size : ', len(dataset) state = { 'episode' : episode, 'epoch': epoch, 'arch': args.arch, 'loss': loss.data[0], 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(state, 'checkpoint/{}/{}_{}_{}.pth.tar'.format(\ args.exp_name, args.arch, episode, epoch)) print("Epoch [%d/%d] Loss: %.4f lr:%.4f" % (epoch, n_epoch, loss.data[0], optimizer.param_groups[0]['lr'] )) t2 = time.time() print save_interval, 'epoch time :', t2 - t1 t1 = time.time() if args.visdom: vis.line( X=np.column_stack((np.ones((1,)) * epoch)), Y=np.column_stack((np.array([loss.data[0]]))), win=loss_window, update='append') return model, optimizer
lr_scheduler = get_lr_scheduler(run_opts, optimizer) training_model = convert_to_ipu_model(model, run_opts, optimizer) train(training_model, train_data, run_opts, lr_scheduler, range(1, run_opts.epoch + 1), optimizer) if run_opts.weight_avg_strategy != 'none': average_fn = weight_avg.create_average_fn(run_opts) weight_avg.average_model_weights(run_opts.checkpoint_path, average_fn) if not run_opts.no_validation: if run_opts.checkpoint_path == "": training_model.destroy() model.eval() inference_model = poptorch.inferenceModel(model, inference_model_opts) acc = test(inference_model, test_data, run_opts) result_dict = { "validation_epoch": run_opts.epoch, "validation_iteration": run_opts.logs_per_epoch * run_opts.epoch, "validation_accuracy": acc } utils.Logger.log_validate_results(result_dict) test_data.terminate() else: training_model.destroy() checkpoint_files = [ os.path.join(run_opts.checkpoint_path, file_name) for file_name in os.listdir(run_opts.checkpoint_path) if file_name.endswith(".pt") ]
def train(config): gpu_manage(config) train_dataset = Dataset(config.train_dir) val_dataset = Dataset(config.val_dir) training_data_loader = DataLoader(dataset=train_dataset, num_workers=config.threads, batch_size=config.batchsize, shuffle=True) val_data_loader = DataLoader(dataset=val_dataset, num_workers=config.threads, batch_size=config.test_batchsize, shuffle=False) gen = UNet(in_ch=config.in_ch, out_ch=config.out_ch, gpu_ids=config.gpu_ids) if config.gen_init is not None: param = torch.load(config.gen_init) gen.load_state_dict(param) print('load {} as pretrained model'.format(config.gen_init)) dis = Discriminator(in_ch=config.in_ch, out_ch=config.out_ch, gpu_ids=config.gpu_ids) if config.dis_init is not None: param = torch.load(config.dis_init) dis.load_state_dict(param) print('load {} as pretrained model'.format(config.dis_init)) opt_gen = optim.Adam(gen.parameters(), lr=config.lr, betas=(config.beta1, 0.999), weight_decay=0.00001) opt_dis = optim.Adam(dis.parameters(), lr=config.lr, betas=(config.beta1, 0.999), weight_decay=0.00001) real_a = torch.FloatTensor(config.batchsize, config.in_ch, 256, 256) real_b = torch.FloatTensor(config.batchsize, config.out_ch, 256, 256) criterionL1 = nn.L1Loss() criterionMSE = nn.MSELoss() criterionSoftplus = nn.Softplus() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if config.cuda: gen = gen.cuda(0) dis = dis.cuda(0) criterionL1 = criterionL1.cuda(0) criterionMSE = criterionMSE.cuda(0) criterionSoftplus = criterionSoftplus.cuda(0) real_a = real_a.cuda(0) real_b = real_b.cuda(0) real_a = Variable(real_a) real_b = Variable(real_b) logreport = LogReport(log_dir=config.out_dir) testreport = TestReport(log_dir=config.out_dir) for epoch in range(1, config.epoch + 1): print('Epoch', epoch, datetime.now()) for iteration, batch in enumerate(tqdm(training_data_loader)): real_a, real_b = batch[0], batch[1] real_a = F.interpolate(real_a, size=256).to(device) real_b = F.interpolate(real_b, size=256).to(device) fake_b = gen.forward(real_a) # Update D opt_dis.zero_grad() fake_ab = torch.cat((real_a, fake_b), 1) pred_fake = dis.forward(fake_ab.detach()) batchsize, _, w, h = pred_fake.size() real_ab = torch.cat((real_a, real_b), 1) pred_real = dis.forward(real_ab) loss_d_fake = torch.sum(criterionSoftplus(pred_fake)) / batchsize / w / h loss_d_real = torch.sum(criterionSoftplus(-pred_real)) / batchsize / w / h loss_d = loss_d_fake + loss_d_real loss_d.backward() if epoch % config.minimax == 0: opt_dis.step() # Update G opt_gen.zero_grad() fake_ab = torch.cat((real_a, fake_b), 1) pred_fake = dis.forward(fake_ab) loss_g_gan = torch.sum(criterionSoftplus(-pred_fake)) / batchsize / w / h loss_g = loss_g_gan + criterionL1(fake_b, real_b) * config.lamb loss_g.backward() opt_gen.step() if iteration % 100 == 0: logreport({ 'epoch': epoch, 'iteration': len(training_data_loader) * (epoch - 1) + iteration, 'gen/loss': loss_g.item(), 'dis/loss': loss_d.item(), }) with torch.no_grad(): log_test = test(config, val_data_loader, gen, criterionMSE, epoch) testreport(log_test) if epoch % config.snapshot_interval == 0: checkpoint(config, epoch, gen, dis) logreport.save_lossgraph() testreport.save_lossgraph() print('Done', datetime.now())