def evaluate_model(model, device, data_loader): """ Evaluate loss in subsample of data_loader """ model.eval() with torch.no_grad(): for data, targets in data_loader: # Reshape data targets, angles = rotate_tensor(data.numpy()) targets = torch.from_numpy(targets).to(device) angles = torch.from_numpy(angles).to(device) angles = angles.view(angles.size(0), 1) # Forward passes data = data.to(device) f_data = model(data) # [N,2,1,1] f_targets = model(targets) #[N,2,1,1] #Apply rotatin matrix to f_data with feature transformer f_data_trasformed = feature_transformer(f_data, angles, device) #Define Loss forb_distance = torch.nn.PairwiseDistance() loss = (forb_distance(f_data_trasformed.view(-1, 2), f_targets.view(-1, 2))**2).mean() break return loss
def evaluate_model(args, model, device, data_loader): """ Evaluate loss in subsample of data_loader """ model.eval() with torch.no_grad(): for data, targets in data_loader: # Reshape data data, targets, angles = rotate_tensor(data.numpy(), args.init_rot_range, args.relative_rot_range) targets = torch.from_numpy(targets).to(device) angles = torch.from_numpy(angles).to(device) angles = angles.view(angles.size(0), 1) data = torch.from_numpy(data).to(device) # Forward passes f_data = model(data) # [N,2,1,1] f_targets = model(targets) #[N,2,1,1] #Apply rotation matrix to f_data with feature transformer f_data_trasformed = feature_transformer(f_data, angles, device) #Define loss loss = define_loss(args, f_data_trasformed, f_targets) break return loss.cpu()
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument( '--test-batch-size', type=int, default=10000, metavar='N', help='input batch size for reconstruction testing (default: 10,000)') parser.add_argument('--epochs', type=int, default=20, metavar='N', help='number of epochs to train (default: 100)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument( '--store-interval', type=int, default=100, metavar='N', help='how many batches to wait before storing training loss') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") # Set up dataloaders kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=False, transform=transforms.Compose([transforms.ToTensor()])), batch_size=args.test_batch_size, shuffle=True, **kwargs) train_loader_eval = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=args.test_batch_size, shuffle=True, **{}) # Init model and optimizer model = Encoder(device).to(device) #Initialise weights and train path = "./output" #Initialise weights model.apply(weights_init) optimizer = optim.Adam(model.parameters(), lr=args.lr) #Get rotation loss in t # rotation_test_loss=rotation_test(args, model_encoder, 'cpu', test_loader_disc) rotation_test_loss = [] train_loss = [] test_loss = [] # Where the magic happens for epoch in range(1, args.epochs + 1): for batch_idx, (data, targets) in enumerate(train_loader): model.train() # Reshape data targets, angles = rotate_tensor(data.numpy()) targets = torch.from_numpy(targets).to(device) angles = torch.from_numpy(angles).to(device) angles = angles.view(angles.size(0), 1) # Forward passes data = data.to(device) optimizer.zero_grad() f_data = model(data) # [N,2,1,1] f_targets = model(targets) #[N,2,1,1] #Apply rotatin matrix to f_data with feature transformer f_data_trasformed = feature_transformer(f_data, angles, device) #Define Loss forb_distance = torch.nn.PairwiseDistance() loss = (forb_distance(f_data_trasformed.view(-1, 2), f_targets.view(-1, 2))**2).sum() # Backprop loss.backward() optimizer.step() #Log progress if batch_idx % args.log_interval == 0: sys.stdout.write( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\r'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss)) sys.stdout.flush() #Store training and test loss if batch_idx % args.store_interval == 0: #Train Lossq train_loss.append( evaluate_model(model, device, train_loader_eval)) #Test Loss test_loss.append(evaluate_model(model, device, test_loader)) #Rotation loss rotation_test_loss.append( rotation_test(model, device, test_loader)) #Save model save_model(args, model) #Save losses train_loss = np.array(train_loss) test_loss = np.array(test_loss) rotation_test_loss = np.array(rotation_test_loss) np.save(path + '/training_loss', train_loss) np.save(path + '/test_loss', test_loss) np.save(path + '/rotation_test_loss', rotation_test_loss) plot_learning_curve(args, train_loss, test_loss, rotation_test_loss)
def main(): # Training settings list_of_choices = ['forbenius', 'cosine_squared', 'cosine_abs'] parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch rotation test (default: 1000)') parser.add_argument('--epochs', type=int, default=20, metavar='N', help='number of epochs to train (default: 20)') parser.add_argument('--lr', type=float, default=0.0001, metavar='LR', help='learning rate (default: 0.0001)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument( '--store-interval', type=int, default=50, metavar='N', help='how many batches to wait before storing training loss') parser.add_argument( '--name', type=str, default='', help='name of the run that is added to the output directory') parser.add_argument( "--loss", dest='loss', default='forbenius', choices=list_of_choices, help= 'Decide type of loss, (forbenius) norm, difference of (cosine), (default=forbenius)' ) parser.add_argument( '--init-rot-range', type=float, default=360, help= 'Upper bound of range in degrees of initial random rotation of digits, (Default=360)' ) parser.add_argument('--relative-rot-range', type=float, default=90, metavar='theta', help='Relative rotation range (-theta, theta)') parser.add_argument('--eval-batch-size', type=int, default=200, metavar='N', help='batch-size for evaluation') args = parser.parse_args() #Print arguments for arg in vars(args): sys.stdout.write('{} = {} \n'.format(arg, getattr(args, arg))) sys.stdout.flush() sys.stdout.write('Random torch seed:{}\n'.format(torch.initial_seed())) sys.stdout.flush() args.init_rot_range = args.init_rot_range * np.pi / 180 args.relative_rot_range = args.relative_rot_range * np.pi / 180 # Create save path path = "./output_" + args.name if not os.path.exists(path): os.makedirs(path) sys.stdout.write('Start training\n') sys.stdout.flush() use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") writer = SummaryWriter(path, comment='Encoder atan2 MNIST') # Set up dataloaders kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( '../data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=args.batch_size, shuffle=True, **kwargs) train_loader_eval = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=args.test_batch_size, shuffle=True, **kwargs) # Init model and optimizer model = Encoder(device).to(device) #Initialise weights model.apply(weights_init) optimizer = optim.Adam(model.parameters(), lr=args.lr) #Init losses log prediction_mean_error = [] #Average rotation prediction error in degrees prediction_error_std = [] #Std of error for rotation prediciton train_loss = [] #Train n_iter = 0 for epoch in range(1, args.epochs + 1): sys.stdout.write('Epoch {}/{} \n '.format(epoch, args.epochs)) sys.stdout.flush() for batch_idx, (data, targets) in enumerate(train_loader): model.train() # Reshape data data, targets, angles = rotate_tensor(data.numpy(), args.init_rot_range, args.relative_rot_range) data = torch.from_numpy(data).to(device) targets = torch.from_numpy(targets).to(device) angles = torch.from_numpy(angles).to(device) angles = angles.view(angles.size(0), 1) # Forward passes optimizer.zero_grad() f_data = model(data) # [N,2,1,1] f_targets = model(targets) #[N,2,1,1] #Apply rotatin matrix to f_data with feature transformer f_data_trasformed = feature_transformer(f_data, angles, device) #Define loss loss = define_loss(args, f_data_trasformed, f_targets) # Backprop loss.backward() optimizer.step() #Log progress if batch_idx % args.log_interval == 0: sys.stdout.write( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\r'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss)) sys.stdout.flush() writer.add_scalar('Training Loss', loss, n_iter) #Store training and test loss if batch_idx % args.store_interval == 0: #Train Loss train_loss.append( evaluate_model(args, model, device, train_loader_eval)) #Rotation loss in trainign set mean, std = rotation_test(args, model, device, train_loader_eval) prediction_mean_error.append(mean) writer.add_scalar('Mean test error', mean, n_iter) prediction_error_std.append(std) n_iter += 1 save_model(args, model) #Save model #Save losses train_loss = np.array(train_loss) prediction_mean_error = np.array(prediction_mean_error) prediction_error_std = np.array(prediction_error_std) np.save(path + '/training_loss', train_loss) np.save(path + '/prediction_mean_error', prediction_mean_error) np.save(path + '/prediction_error_std', prediction_error_std) plot_learning_curve(args, train_loss, prediction_mean_error, prediction_error_std, path) #Get diagnostics per digit get_error_per_digit(args, model, device)