val_acc) plotter.plot('attention_auc', 'val', 'Attention AUC', epoch, val_auc) plotter.plot('attention_f1', 'val', 'Attention F1', epoch, val_f1) plotter.save(['Tutorial Plots Attention']) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) return model # In[24]: if __name__ == "__main__": # model_ft = Resnet_Classifier() model_ft = Attention(path="model34") model_ft = model_ft.to(device) # for param in model_ft.parameters(): # print(param.requires_grad) # print(param,size()) criterion = nn.CrossEntropyLoss( weight=torch.Tensor([1.0 / 165.0, 1.0 / 122.0]).to(device)) optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.0001) scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.1) # model_ft = train_model(model_ft, criterion, optimizer_ft, scheduler, num_epochs=200) global plotter plotter = utils.VisdomLinePlotter(env_name='Tutorial Plots Resnet') # In[ ]:
def run(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_worker = 2 n_epoch = args.epochs torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # backward pass print('Load Train and Test Set') train_loader = DataLoader(MnistBags(target_number=args.target_number, min_target_count=args.min_target_count, mean_bag_length=args.mean_bag_length, var_bag_length=args.var_bag_length, scale=args.scale, num_bag=args.num_bags_train, seed=args.seed, train=True), batch_size=args.batchsize, shuffle=True, num_workers=n_worker, pin_memory=torch.cuda.is_available()) test_loader = DataLoader(MnistBags(target_number=args.target_number, min_target_count=args.min_target_count, mean_bag_length=args.mean_bag_length, var_bag_length=args.var_bag_length, scale=args.scale, num_bag=args.num_bags_test, seed=args.seed, train=False), batch_size=args.batchsize, shuffle=False, num_workers=n_worker, pin_memory=torch.cuda.is_available()) # resume checkpoint checkpoint = load_ckpt() if checkpoint: print('Resume training ...') start_epoch = checkpoint.epoch model = checkpoint.model else: print('Grand new training ...') start_epoch = 0 model = Attention() # put model to multiple GPUs if available if torch.cuda.device_count() > 1: print("Let's use ", torch.cuda.device_count(), " GPUs!") model = nn.DataParallel(model) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999), weight_decay=args.reg) if checkpoint: try: optimizer.load_state_dict(checkpoint.optimizer) except: print( '[WARNING] optimizer not restored from last checkpoint, continue without previous state' ) # free checkpoint reference del checkpoint log_dir = os.path.join('logs', args.logname) n_cv_epoch = 1 #2 with SummaryWriter(log_dir) as writer: print('\nTraining started ...') for epoch in range(start_epoch + 1, n_epoch + start_epoch + 1): # 1 base train(model, optimizer, train_loader, epoch, writer) if epoch % n_cv_epoch == 0: with torch.no_grad(): test(model, optimizer, test_loader, epoch, writer) save_ckpt(model, optimizer, epoch) print('\nTraining finished ...')