def main(): print(1) args = parser.parse_args() torch.manual_seed(999) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False D_xs = Net(args.window_size) D_xs.apply(weights_init) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #torch.cuda.set_device(0) x = torch.FloatTensor(args.batch_size, 3, args.window_size, 256, 256) args.cuda = True if args.cuda: print("Let's use", torch.cuda.device_count(), "GPUs!") D_xs = torch.nn.DataParallel(D_xs).cuda() #D_xs = D_xs.cuda() x = x.cuda() x = Variable(x) #D_xs.to(device) print(2) lr = args.learning_rate lr_s = args.learning_rate_s ourBetas = [args.momentum, args.momentum2] batch_size = args.batch_size snapshot = args.nsnapshot D_xs_solver = optim.Adam( D_xs.parameters(), lr=lr, ) cudnn.benchmark = True l1Loss = nn.L1Loss().cuda() e_shift = 0 min_val_loss = 99999 no_improve_epoch = 0 now = datetime.now() print('begin') log_path = 'log/lr_{}_time:{}'.format(args.learning_rate, now.strftime("%Y%m%d-%H%M%S")) # try: # os.mkdir(log_path) # except: # pass writer = SummaryWriter(log_path) for epoch in range(args.epochs): path = 'dataset/' train_loader = torch.utils.data.DataLoader(EyeBrowDataset( 'dataset/train_all.mat', '', args.window_size, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) print('len of train_loader', len(train_loader)) mes_sum = 0 for i, (data, value) in enumerate(train_loader): if len(data) < args.batch_size: continue D_xs.zero_grad() imgseq = data x.data.resize_(imgseq.size()).copy_(imgseq) vv = value.type(torch.FloatTensor) vv = vv.cuda() #vv = vv.to(device) vv = Variable(vv, requires_grad=False) #x= x.to(device) score = D_xs(x) v_loss = l1Loss(score, vv) v_loss.backward() D_xs_solver.step() mes_sum += v_loss.item() if True: # s.append(mes_sum) # scio.savemat('log/log_after'+str(i)+'_iteration.mat',{'loss':s}) writer.add_scalar('train/loss', mes_sum, i) mes_sum = 0 print('epoch:[%2d] [%4d/%4d] loss: %.4f' % (epoch + e_shift, i, len(data), v_loss.item())) print('epoch:', epoch) path = 'dataset/' vali_loader = torch.utils.data.DataLoader(EyeBrowDataset( 'dataset/validation_all.mat', '', args.window_size, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) mse_sum = 0 print('len of train_loader', len(vali_loader)) for i, (data, value) in enumerate(vali_loader): imgseq = data with torch.no_grad(): x.data.resize_(imgseq.size()).copy_(imgseq) vv = value.type(torch.FloatTensor) vv = vv.cuda() vv = Variable(vv, requires_grad=False) score = D_xs(x) v_loss = l1Loss(score, vv) mse_sum = mse_sum + v_loss.item() print('epoch:[%2d] [%4d/%4d] loss: %.4f' % (epoch + e_shift, i, len(data), v_loss.item())) val_loss = mse_sum / float(i + 1) print("*** Epoch: [%2d], " "val_mse: %.6f ***" % (epoch + e_shift, val_loss)) # if performance improve save the new model # if performance does not increase for patiences epochs, stop training if val_loss < min_val_loss: min_val_loss = val_loss no_improve_epoch = 0 val_loss = round(val_loss, 2) torch.save( D_xs.state_dict(), '{}/netD_xs_epoch_{}_val_loss_{}.pth'.format( args.outf, epoch + e_shift, val_loss)) print("performance improve, saved the new model......") else: no_improve_epoch += 1 if no_improve_epoch > args.patiences: print("stop training....") break
def train(n_epochs=n_epochs, train_loader=train_loader, valid_loader=valid_loader, save_location_path=save_location_path): train_on_gpu = torch.cuda.is_available() def init_weights(m): if isinstance(m, nn.Linear): I.xavier_uniform_(m.weight) model = Net() model.apply(init_weights) if train_on_gpu: model.cuda() criterion = nn.MSELoss() optimizer = optim.Adam(params=model.parameters(), lr=0.001) valid_loss_min = np.Inf model.train() for epoch in range(1, n_epochs + 1): # Keep track of training and validation loss train_loss = 0.0 valid_loss = 0.0 for data in train_loader: # Grab the image and its corresponding label images = data['image'] key_pts = data['keypoints'] if train_on_gpu: images, key_pts = images.cuda(), key_pts.cuda() # Flatten keypoints & convert data to FloatTensor for regression loss key_pts = key_pts.view(key_pts.size(0), -1) if train_on_gpu: key_pts = key_pts.type(torch.cuda.FloatTensor) images = images.type(torch.cuda.FloatTensor) else: key_pts = key_pts.type(torch.FloatTensor) images = images.type(torch.FloatTensor) optimizer.zero_grad() # Clear the gradient output = model(images) # Forward loss = criterion(output, key_pts) # Compute the loss loss.backward() # Compute the gradient optimizer.step() # Perform updates using calculated gradients train_loss += loss.item() * images.size(0) # Validation model.eval() for data in valid_loader: images = data['image'] key_pts = data['keypoints'] if train_on_gpu: images, key_pts = images.cuda(), key_pts.cuda() key_pts = key_pts.view(key_pts.size(0), -1) if train_on_gpu: key_pts = key_pts.type(torch.cuda.FloatTensor) images = images.type(torch.cuda.FloatTensor) else: key_pts = key_pts.type(torch.FloatTensor) images = images.type(torch.FloatTensor) output = model(images) loss = criterion(output, key_pts) valid_loss += loss.item() * images.size(0) # calculate average losses train_loss = train_loss / len(train_loader) valid_loss = valid_loss / len(valid_loader) print( f"epoch: {epoch} \t trainLoss: {train_loss} \t valLoss: {valid_loss}" ) eviz.send_data(current_epoch=epoch, current_train_loss=train_loss, current_val_loss=valid_loss)
class Training(): def __init__(self): self.net = Net() self.net.apply(self.init_weights) #Basic logging logging.basicConfig(filename="cnn2.log", level=logging.DEBUG) logging.info(self.net) logging.info("Number of parameters: {}".format( self.count_parameters(self.net))) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.optimizer = torch.optim.SGD(self.net.parameters(), lr=args.lr, momentum=0.9) self.criterion = nn.CrossEntropyLoss().to(self.device) self.best_acc = 0 self.net.to(self.device) def loader(self): # Define transformers to apply on the input data transform_train = transforms.Compose([ transforms.RandomCrop(28, padding=4), transforms.ToTensor(), # transforms.Normalize((mean,), (std,)), ]) transform_valid = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((mean,), (std,)), ]) train = datasets.EMNIST(args.root, split="balanced", train=True, download=True, transform=transform_train) test = datasets.EMNIST(args.root, split="balanced", train=False, download=True, transform=transform_valid) self.train_loader = torch.utils.data.DataLoader( train, batch_size=args.batch_size, shuffle=True, num_workers=2, drop_last=True) self.test_loader = torch.utils.data.DataLoader( test, batch_size=args.test_batch_size, shuffle=False, num_workers=2, drop_last=True) def init_weights(self, m): if type(m) == nn.Linear: torch.nn.init.xavier_uniform_(m.weight) m.bias.data.fill_(0.01) def count_parameters(self, model): return sum(p.numel() for p in model.parameters() if p.requires_grad) def inf_generator(self, iterable): """Allows training with DataLoaders in a single infinite loop: for i, (x, y) in enumerate(inf_generator(train_loader)): """ iterator = iterable.__iter__() while True: try: yield iterator.__next__() except StopIteration: iterator = iterable.__iter__() def train(self): self.loader() data_gen = self.inf_generator(self.train_loader) batches_per_epoch = len(self.train_loader) for itr in range(args.EPOCHS * batches_per_epoch): self.optimizer.zero_grad() x, y = data_gen.__next__() x = x.view(-1, 28, 28, 1) x = torch.transpose(x, 1, 2) x = x.to(self.device) y = y.to(self.device) logits = self.net(x) loss = self.criterion(logits, y) loss.backward() self.optimizer.step() if itr % batches_per_epoch == 0: with torch.no_grad(): train_acc = self.accuracy(self.net, self.train_loader) val_acc = self.accuracy(self.net, self.test_loader) if val_acc > self.best_acc: torch.save({"state_dict": self.net.state_dict()}, "alpha_weights.pth") self.best_acc = val_acc logging.info("Epoch {:04d}" "Train Acc {:.4f} | Test Acc {:.4f}".format( itr // batches_per_epoch, train_acc, val_acc)) print("Epoch {:04d}" "Train Acc {:.4f} | Test Acc {:.4f}".format( itr // batches_per_epoch, train_acc, val_acc)) def one_hot(self, x, K): return np.array(x[:, None] == np.arange(K)[None, :], dtype=int) def accuracy(self, model, dataset_loader): total_correct = 0 for x, y in dataset_loader: x = x.view(-1, 28, 28, 1) x = torch.transpose(x, 1, 2) x = x.to(self.device) y = self.one_hot(np.array(y.numpy()), 47) target_class = np.argmax(y, axis=1) predicted_class = np.argmax(model(x).cpu().detach().numpy(), axis=1) total_correct += np.sum(predicted_class == target_class) return total_correct / len(dataset_loader.dataset)