Exemple #1
0
def train_net(train_loader=None,
              net=None,
              batch_size=128,
              n_epochs=500,
              learning_rate=0.01,
              opt=0,
              saved_model=None):
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", n_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)

    n_batches = len(train_loader)

    #Create our loss and optimizer functions
    loss, optimizer = get_loss_optimizer(net, learning_rate)

    training_start_time = time.time()

    #Training results printed to csv-different for each optimizer

    f_out = open("tr_" + str(opt) + ".csv", "w")
    wrt = csv.writer(f_out)

    total_train_loss = 0

    #net = net.to('cuda')
    #Loop for n_epochs
    for epoch in range(n_epochs):

        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0

        wrt.writerow([epoch, total_train_loss])

        if (((epoch + 1) % 250) == 0):
            checkpoint = {
                'model': net,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            file_name = 'checkpoint.pt'
            torch.save(checkpoint, file_name)
        i = 0
        for data in train_loader:

            #Get inputs
            #Wrap them in a Variable object
            [inputs, labels, snr] = data
            #inputs, labels,snr = Variable(inputs).to('cuda'), Variable(labels).to('cuda'), Variable(snr)
            inputs, labels, snr = Variable(inputs), Variable(labels), Variable(
                snr)

            #Set the parameter gradients to zero
            optimizer.zero_grad()

            #Forward pass, backward pass, optimize
            outputs = net(inputs)
            loss_size = loss(outputs, np.argmax(labels, axis=1))
            loss_size.backward()
            optimizer.step()

            #Print statistics
            running_loss += loss_size.data
            total_train_loss += loss_size.data

            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".
                      format(epoch + 1, int(100 * (i + 1) / n_batches),
                             total_train_loss / print_every,
                             time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()
            i += 1

    print("Training finished, took {:.2f}s".format(time.time() -
                                                   training_start_time))
    final = {
        'model': net,
        'state_dict': net.state_dict(),
        'optimizer': optimizer.state_dict()
    }
    torch.save(final, saved_model)
Exemple #2
0
def train_net(train_loader=None,
              net=None,
              batch_size=128,
              n_epochs=5,
              learning_rate=0.001,
              saved_model=None,
              fname=None):
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", n_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)

    #Get training and test data
    n_batches = len(train_loader)

    #Create our loss and optimizer functions
    loss, optimizer = get_loss_optimizer(net, learning_rate)

    #Time for printing
    training_start_time = time.time()

    f_out = open(fname, "w")
    wrt = csv.writer(f_out)

    total_train_loss = 0

    scheduler = StepLR(optimizer, step_size=250, gamma=0.1)
    net = net.float()
    net = net.to('cuda')
    #Loop for n_epochs
    for epoch in range(n_epochs):

        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()

        wrt.writerow([epoch, total_train_loss])

        total_train_loss = 0

        if (((epoch + 1) % 250) == 0):
            checkpoint = {
                'model': net,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            file_name = 'checkpoint.pt'
            torch.save(checkpoint, file_name)

        i = 0

        for data in train_loader:

            [inputs, labels, snr] = data
            #print(inputs.shape)
            #Wrap them in a Variable object

            inputs, labels, snr = Variable(inputs).to('cuda'), Variable(
                labels).to('cuda'), Variable(snr).to('cuda')

            #inputs,labels,snr = Variable(inputs), Variable(labels), Variable(snr)
            #Set the parameter gradients to zero
            optimizer.zero_grad()
            #Forward pass, backward pass, optimize
            outputs = net(inputs.float())
            labels = labels.squeeze_().cpu()
            loss_size = loss(outputs.cpu(), np.argmax(labels, axis=1))
            #loss_size = loss(outputs, np.argmax(labels,axis=1))
            loss_size.backward()
            optimizer.step()

            #Print statistics

            running_loss += loss_size.data
            total_train_loss += loss_size.data

            #Print loss from every 10% (then resets to 0) of a batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.4f} took: {:.2f}s".
                      format(epoch + 1, int(100 * (i + 1) / n_batches),
                             total_train_loss / print_every,
                             time.time() - start_time))
                #Reset running loss and time
                running_loss = 0.0
                start_time = time.time()

            i += 1
        scheduler.step()

    print("Training finished, took {:.2f}s".format(time.time() -
                                                   training_start_time))
    final = {
        'model': net,
        'state_dict': net.state_dict(),
        'optimizer': optimizer.state_dict()
    }

    torch.save(final, saved_model)
    f_out.close()
# net.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(4):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(dataload.train_dataloader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 200 == 199:    # print every 400 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss/2000))
            running_loss = 0.0

print('Finished Training')
torch.save(net.state_dict(), './try/model_trained.pth')