def train_net(train_loader=None, net=None, batch_size=128, n_epochs=500, learning_rate=0.01, opt=0, saved_model=None): #Print all of the hyperparameters of the training iteration: print("===== HYPERPARAMETERS =====") print("batch_size=", batch_size) print("epochs=", n_epochs) print("learning_rate=", learning_rate) print("=" * 30) n_batches = len(train_loader) #Create our loss and optimizer functions loss, optimizer = get_loss_optimizer(net, learning_rate) training_start_time = time.time() #Training results printed to csv-different for each optimizer f_out = open("tr_" + str(opt) + ".csv", "w") wrt = csv.writer(f_out) total_train_loss = 0 #net = net.to('cuda') #Loop for n_epochs for epoch in range(n_epochs): running_loss = 0.0 print_every = n_batches // 10 start_time = time.time() total_train_loss = 0 wrt.writerow([epoch, total_train_loss]) if (((epoch + 1) % 250) == 0): checkpoint = { 'model': net, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } file_name = 'checkpoint.pt' torch.save(checkpoint, file_name) i = 0 for data in train_loader: #Get inputs #Wrap them in a Variable object [inputs, labels, snr] = data #inputs, labels,snr = Variable(inputs).to('cuda'), Variable(labels).to('cuda'), Variable(snr) inputs, labels, snr = Variable(inputs), Variable(labels), Variable( snr) #Set the parameter gradients to zero optimizer.zero_grad() #Forward pass, backward pass, optimize outputs = net(inputs) loss_size = loss(outputs, np.argmax(labels, axis=1)) loss_size.backward() optimizer.step() #Print statistics running_loss += loss_size.data total_train_loss += loss_size.data #Print every 10th batch of an epoch if (i + 1) % (print_every + 1) == 0: print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s". format(epoch + 1, int(100 * (i + 1) / n_batches), total_train_loss / print_every, time.time() - start_time)) #Reset running loss and time running_loss = 0.0 start_time = time.time() i += 1 print("Training finished, took {:.2f}s".format(time.time() - training_start_time)) final = { 'model': net, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(final, saved_model)
def train_net(train_loader=None, net=None, batch_size=128, n_epochs=5, learning_rate=0.001, saved_model=None, fname=None): #Print all of the hyperparameters of the training iteration: print("===== HYPERPARAMETERS =====") print("batch_size=", batch_size) print("epochs=", n_epochs) print("learning_rate=", learning_rate) print("=" * 30) #Get training and test data n_batches = len(train_loader) #Create our loss and optimizer functions loss, optimizer = get_loss_optimizer(net, learning_rate) #Time for printing training_start_time = time.time() f_out = open(fname, "w") wrt = csv.writer(f_out) total_train_loss = 0 scheduler = StepLR(optimizer, step_size=250, gamma=0.1) net = net.float() net = net.to('cuda') #Loop for n_epochs for epoch in range(n_epochs): running_loss = 0.0 print_every = n_batches // 10 start_time = time.time() wrt.writerow([epoch, total_train_loss]) total_train_loss = 0 if (((epoch + 1) % 250) == 0): checkpoint = { 'model': net, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } file_name = 'checkpoint.pt' torch.save(checkpoint, file_name) i = 0 for data in train_loader: [inputs, labels, snr] = data #print(inputs.shape) #Wrap them in a Variable object inputs, labels, snr = Variable(inputs).to('cuda'), Variable( labels).to('cuda'), Variable(snr).to('cuda') #inputs,labels,snr = Variable(inputs), Variable(labels), Variable(snr) #Set the parameter gradients to zero optimizer.zero_grad() #Forward pass, backward pass, optimize outputs = net(inputs.float()) labels = labels.squeeze_().cpu() loss_size = loss(outputs.cpu(), np.argmax(labels, axis=1)) #loss_size = loss(outputs, np.argmax(labels,axis=1)) loss_size.backward() optimizer.step() #Print statistics running_loss += loss_size.data total_train_loss += loss_size.data #Print loss from every 10% (then resets to 0) of a batch of an epoch if (i + 1) % (print_every + 1) == 0: print("Epoch {}, {:d}% \t train_loss: {:.4f} took: {:.2f}s". format(epoch + 1, int(100 * (i + 1) / n_batches), total_train_loss / print_every, time.time() - start_time)) #Reset running loss and time running_loss = 0.0 start_time = time.time() i += 1 scheduler.step() print("Training finished, took {:.2f}s".format(time.time() - training_start_time)) final = { 'model': net, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(final, saved_model) f_out.close()
# net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) for epoch in range(4): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(dataload.train_dataloader): # get the inputs; data is a list of [inputs, labels] inputs, labels = data # inputs, labels = data[0].to(device), data[1].to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 200 == 199: # print every 400 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss/2000)) running_loss = 0.0 print('Finished Training') torch.save(net.state_dict(), './try/model_trained.pth')