test_counter = [num * training_dataset.num_images for num in range(num_epochs + 1)]

#The loss function is created. The loss function evaluates how well the neural network is doing
#We use Cross Entropy Loss as it punishes the model more heavily for being confident in a wrong answer.  
loss_function = nn.CrossEntropyLoss()
#The learning rate is how quickly the weights of the neural net change.  Too high of a learning rate may make
#the model skip the optimal value, while too low may make the model get stuck in a local minimum
learning_rate = .2
#Momentum in stochastic gradient descent
momentum = .9
#The optimizer tweaks the weights of the network in order to minimize the loss function.  This makes the 
#model as accurate as possible.  Here we are using Stochastic Gradient Descent, which samples a subset
#of the data to determine how to change the weights.
#neural_net.parameters() - Look for the "Parameters In-Depth" heading here: 
#https://www.deeplearningwizard.com/deep_learning/practical_pytorch/pytorch_feedforward_neuralnetwork/#parameters-in-depth
optimizer = torch.optim.SGD(neural_net.parameters(), lr = learning_rate, momentum = momentum)

#Sets the seed for randomization manually so that results can be reproducible during testing
random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)


#This is the training function.
def train(epoch):
    #For loop that enumerates over the training loader.  It has a batch index and gets out the images and labels
    for batch_idx, (images, labels) in enumerate(training_loader):
        #If you do not call this, the gradients will accumulate over time.  As gradients are computed during
        #loss.backward() but the gradients themselves are not used to proceed gradient descent until
        #optimizer.step(), we have to manually zero the gradients at the start of the loop.
        optimizer.zero_grad()