def testNonLinear(X, Y): sigmoide = Non_linear.Sigmoide() tanh = Non_linear.Tanh() coche1 = Linear(2, 5) coche2 = Linear(5, 1) mse = MSE() res_sigmoide = None loss = [] maxIter = 100 for _ in range(maxIter): #forward res_lin1 = coche1.forward(X) res_tanh = tanh.forward(res_lin1) res_lin2 = coche2.forward(res_tanh) res_sigmoide = sigmoide.forward(res_lin2) #loss res = np.array([[1 if res_sigmoide[i] > 0.5 else 0] for i in range(len(res_sigmoide))]) # res = np.array([res_sigmoide[i] > 0 for i in range(len(res_sigmoide))]) # print("res",res.shape) loss.append(sum(mse.forward(Y.reshape(-1, 1), res))) #retro-propager res_mse = mse.backward(Y.reshape(-1, 1), res) # print("mse",res_mse.shape) delta_sig = sigmoide.backward_delta(res_lin2, res_mse) coche2.zero_grad() coche2.backward_update_gradient(res_tanh, delta_sig) coche2.update_parameters(0.05) delta_lin2 = coche2.backward_delta(X, delta_sig) delta_tanh = tanh.backward_delta(res_lin1, delta_lin2) coche1.zero_grad() coche1.backward_update_gradient(X, delta_tanh) coche1.update_parameters(0.05) return loss, "Nonlinear", maxIter
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-m', '--modelName', help=default('modelConfig')) parser.add_option('-d', '--data', help=default('input'), default='data.bin', type="string") parser.add_option('-t', '--target', help=default('gradoutput'), default='labels.bin', type="string") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} model_name = options.modelName training_data_path = options.data target_labels_path = options.target Data = torchfile.load(training_data_path) Labels = torchfile.load(target_labels_path) Data = torch.tensor(normalize(Data)).double() Data = Data.reshape(Data.shape[0], 108 * 108) Labels = torch.tensor(Labels).long() # trainingData = Data[0:int(Data.shape[0]*0.9),:] # trainingLabels = Labels[0:int(Data.shape[0]*0.9)] my_model = Model.Model() my_model.addLayer(Linear(108 * 108, 1024)) my_model.addLayer(ReLu()) my_model.addLayer(Linear(1024, 256)) my_model.addLayer(ReLu()) my_model.addLayer(Linear(256, 6)) my_model.addLayer(ReLu()) train_and_test(my_model, Data, Labels, 1, 432, 0.01, 0.001) try: os.mkdir(model_name) except: pass weights = [] weights.append(my_model.layers[0].W) weights.append(my_model.layers[0].B) weights.append(my_model.layers[2].W) weights.append(my_model.layers[2].B) weights.append(my_model.layers[4].W) weights.append(my_model.layers[4].B) torch.save(weights, model_name + "/model.bin")
def __init__(self, hidden_layers): Items = [] linear = Linear(2, 25) Items.append(linear) Items.append(ReLu()) for i in range(hidden_layers - 1): Items.append(Linear(25, 25)) Items.append(ReLu()) Items.append(tanh()) Items.append(Linear(25, 2)) self.model = Sequential(Items)
def testSGD(X, Y): #construct seq = Sequential() seq.add_module(Linear(2, 5)) seq.add_module(Non_linear.Tanh()) seq.add_module(Linear(5, 1)) seq.add_module(Non_linear.Sigmoide()) def fctSig(res): return np.array([[1 if res[i] > 0.5 else 0] for i in range(len(res))]) #evolute maxIter = 300 rn = SGD(seq, X, Y, 50, MSE, fctSig, maxIter) return rn.moduleList.histLoss, "SGD", maxIter
def testAutoEncoder(): #pepre data uspsdatatrain = "data/USPS_train.txt" uspsdatatest = "data/USPS_test.txt" alltrainx, alltrainy = load_usps(uspsdatatrain) alltestx, alltesty = load_usps(uspsdatatest) neg = 9 pos = 6 datax, datay = get_usps([neg, pos], alltrainx, alltrainy) datay = np.array([1 if datay[i] == 6 else 0 for i in range(len(datay))]) testx, testy = get_usps([neg, pos], alltestx, alltesty) maxIter = 100 #rn encodage encodage = Sequential() encodage.add_module(Linear(256, 100)) encodage.add_module(Non_linear.Tanh()) encodage.add_module(Linear(100, 10)) encodage.add_module(Non_linear.Tanh()) # rn decodage encodage.add_module(Linear(10, 100)) encodage.add_module(Non_linear.Tanh()) encodage.add_module(Linear(100, 256)) encodage.add_module(Non_linear.Sigmoide()) #rn decodage # decodage = Sequential() # decodage.add_module(Linear(10, 100)) # decodage.add_module(Non_linear.Tanh()) # decodage.add_module(Linear(100, 256)) # decodage.add_module(Non_linear.Sigmoide()) for i in range(maxIter): #forward # print(datax[0]) print(i) encodage.forward(datax) # print(encodage.forwards[-1][0]) encodage.backward(datax, datax, loss=BCE, gradient_step=0.1) if i % 10 == 0: # plt.figure() # plt.imshow(datax[0].reshape(16, 16), cmap="gray") # plt.title("Image original de 9: {}".format(datay[0])) # plt.savefig("plot/num/origine9.png") # plt.close() plt.figure() plt.imshow(encodage.forwards[-1][-10].reshape(16, 16), cmap="gray") plt.title("Image apres autoEncoder de 6".format(datay[0])) plt.savefig("plot/num/6_iter" + i.__str__() + ".png") plt.close() return encodage.histLoss, "AutoEncoder", maxIter
def main(): #while True: #try: filename = str(raw_input("Enter filename (Test data used if blank): ")) if (filename == ""): filename = "testData.txt" #break #except: #ValueError("File not found!") input_data = np.loadtxt(filename) # load data as vectors by taking transpose xValues = np.matrix(input_data[:,0]).transpose() yValues = np.matrix(input_data[:,1]).transpose() errorValues = np.matrix(input_data[:,2]).transpose() chi_sq = ChiSq(xValues, yValues, errorValues, function=Linear()) parameters = [0.0, 1.0] chi_sq.setParameters(parameters) chi_sq_value = chi_sq.evaluateChiSq() print('Chi squared is: '+ str(chi_sq_value))
def testOptim(X, Y): #construct seq = Sequential() seq.add_module(Linear(2, 5)) seq.add_module(Non_linear.Tanh()) seq.add_module(Linear(5, 1)) seq.add_module(Non_linear.Sigmoide()) def fctSig(res): return np.array([[1 if res[i] > 0.5 else 0] for i in range(len(res))]) #evolute maxIter = 100 optim = Optim(seq, fctsort=fctSig) for _ in range(maxIter): optim.step(X, Y) return optim.moduleList.histLoss, "Optim", maxIter
def testSequential(X, Y): #construct seq = Sequential() seq.add_module(Linear(2, 5)) seq.add_module(Non_linear.Tanh()) seq.add_module(Linear(5, 1)) seq.add_module(Non_linear.Sigmoide()) def fctSig(res): return np.array([[1 if res[i] > 0.5 else 0] for i in range(len(res))]) #evolute maxIter = 100 for _ in range(maxIter): seq.forward(X) seq.backward(X, Y, fctsort=fctSig) return seq.histLoss, "Sequential", maxIter
def setUp(self): self.feature_count = 50 self.number_docs = 1000 self.docs = range(self.number_docs) self.features = np.random.rand(self.number_docs, self.feature_count) self.linear_model = Linear(self.feature_count) self.linear_w = self.linear_model.initialize_weights("random") self.hidden_model = OneHiddenLayer(self.feature_count) self.hidden_w = self.hidden_model.initialize_weights("random")
def Solve(self): if self.method is 'Constant': from Constant import Constant self.yn = Constant(self.x0, self.y0, self.xn) elif self.method is 'Linear': from Linear import Linear self.yn = Linear(self.x0, self.y0, self.xn) elif self.method is 'Polynomial': from Polynomial import Polynomial self.yn = Polynomial(self.x0, self.y0, self.xn) elif self.method is 'Splines': from Splines import Splines self.yn = Splines(self.x0, self.y0, self.xn)
def testSoftmax(): uspsdatatrain = "data/USPS_train.txt" uspsdatatest = "data/USPS_test.txt" X, Y = load_usps(uspsdatatrain) Xtest, Ytest = load_usps(uspsdatatest) onehot = np.zeros((Y.size, 10), dtype=np.int) onehot[np.arange(Y.size), Y] = 1 # print(X.shape,Y.shape,onehot.shape) seq = Sequential() seq.add_module(Linear(256, 50)) seq.add_module(Non_linear.Tanh()) seq.add_module(Linear(50, 10)) seq.add_module(Softmax()) # print(X[0]) # return 0 # evolute maxIter = 100 optim = Optim(seq, loss=CrossEntropy, eps=0.01) # print(onehot[0]) for _ in range(maxIter): optim.step(X, onehot) return optim.moduleList.histLoss, "Softmax", maxIter
def testlinear(datax, datay): ## Lineaire et MSE linear = Linear(10, 1) linear.zero_grad() mse = MSE() # print(datax,datay) l_mse = [] for _ in range(100): res_lin = linear.forward(datax) res_mse = mse.forward(datay, res_lin) delta_mse = mse.backward(datay, res_lin) l_mse.append(sum(res_mse)) linear.zero_grad() linear.backward_update_gradient(datax, delta_mse) linear.update_parameters() return l_mse, "Linear", 100
def __init__(self, vac_size: int, hidden_size: int, seq_size: int): self.vac_size = vac_size self.hidden_size = hidden_size self.seq_size = seq_size #Encode self.lstm1 = LSTMCell(vac_size=self.vac_size, hidden_size=self.hidden_size, return_seq=False) self.repeat = RepeatVector(self.seq_size) #Decode self.lstm2 = LSTMCell(self.hidden_size, self.vac_size, return_seq=True) self.linear = Linear(self.vac_size, self.vac_size)
def __init__(self, vac_size: int, hidden_sizes: Tuple[int, int], seq_size: int): """ Class implements RNNAutoencoder. Architecture of RNNAutoencoder have 2 lstm layers in encoder and 2 lstm layers with linear layer in decoder :param vac_size: int :param hidden_sizes: Tuple[int, int] :param seq_size: int """ self.vac_size = vac_size self.hidden_size_1 = hidden_sizes[0] self.hidden_size_2 = hidden_sizes[1] self.seq_size = seq_size #Encode self.lstm1 = LSTMCell(vac_size=self.vac_size, hidden_size=self.hidden_size_1, return_seq=True) self.lstm2 = LSTMCell(vac_size=self.hidden_size_1, hidden_size=self.hidden_size_2, return_seq=False) self.repeat = RepeatVector(self.seq_size) #Decode self.lstm3 = LSTMCell(self.hidden_size_2, self.hidden_size_1, return_seq=True) self.lstm4 = LSTMCell(self.hidden_size_1, self.vac_size, return_seq=True) self.linear = Linear(self.vac_size, self.vac_size)
def buildModel(config): f = open(config, "r") n = int(f.readline()) testModel = Model() for i in range(n): tokens = f.readline().split() if tokens[0] == "linear": inpNodes = int(tokens[1]) outNodes = int(tokens[2]) linLayer = Linear(inpNodes, outNodes) testModel.addLayer(linLayer) if tokens[0] == "relu": reluLayer = ReLU() testModel.addLayer(reluLayer) tokens = f.readline().split() weightsPath = tokens[0] weights = torchfile.load(weightsPath) tokens = f.readline().split() biasPath = tokens[0] biases = torchfile.load(biasPath) cnt = 0 for i in range(n): if testModel.Layers[i].type == 0: testModel.Layers[i].W = torch.from_numpy(weights[cnt].T).double() testModel.Layers[i].B = torch.from_numpy( biases[cnt].T).double().unsqueeze(1) cnt += 1 f.close() return testModel
Ytrain = torchfile.load(args.ytrain) Ytrain = torch.from_numpy(Ytrain).long().unsqueeze(1) Ytest = Ytrain[test[0:5000], :] Ytrain = Ytrain[test[5000:], :] noTrain = Xtrain.shape[0] batchSize = args.b epochs = args.e alpha = args.a moment = 0.9 myModel = Model(moment) myModel.addLayer(Flatten()) myModel.addLayer(Linear(108 * 108, 80)) myModel.addLayer(batchNorm()) myModel.addLayer(sigactiv()) myModel.addLayer(Dropout(0.7)) myModel.addLayer(Linear(80, 20)) myModel.addLayer(batchNorm()) myModel.addLayer(sigactiv()) myModel.addLayer(Linear(20, 10)) myModel.addLayer(batchNorm()) myModel.addLayer(sigactiv()) myModel.addLayer(Linear(10, 6)) criterion = Criterion() if args.loadModel: model = torch.load("modelParams.txt") k = 3
def train_model(): # Transform the image by normalizing it transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # Download Training data trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) # Download Test data testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) # Make trainloader trainloader = torch.utils.data.DataLoader(trainset, batch_size=1, shuffle=True) # Make testloader testloader = torch.utils.data.DataLoader(testset, batch_size=1) # Class present in training and test data classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') """ # Function to display Image # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # dataiter = iter(trainloader) # images, labels = dataiter.next() """ # My model my_model = Model() my_model.addLayer(Linear(3072, 1024)) my_model.addLayer(ReLu()) # my_model.addLayer(Linear(2048, 1024)) # my_model.addLayer(ReLu()) my_model.addLayer(Linear(1024, 512)) my_model.addLayer(ReLu()) my_model.addLayer(Linear(512, 2)) # my_model.addLayer(Softmax()) # my_model.addLayer(CrossEntropy()) # Loop to train the Model running_loss = 0 # Number of epochs epochs = 7 train_count = 0 train_losses, test_losses = [], [] train_correct = 0 i = 0 for epoch in range(epochs): for images, labels in trainloader: # Transfer it to GPU if train_on_gpu: images, labels = images.to(device), labels.to(device) if labels == 0 or labels == 1: # To count number to training image in each epoch train_count += 1 # Flatteing of image to bring it to size batch_sizex(32*32*3) images = images.view(images.size(0), -1) # forward the image through the model final_prob = my_model.forward(images) # Calculate the backward gradient of CrossEntropy backward_grad = CrossEntropy().backward(final_prob, labels) # changing in to exp score ps = torch.exp(final_prob) # getting the top class top_p, top_class = ps.topk(1, dim=1) if top_class == labels: train_correct += 1 # Backpropagate the model my_model.backward(images, backward_grad, alpha=0.001) # calculate the running loss running_loss += (CrossEntropy().forward(final_prob, labels)) # Function to Calculate Validation loss and accuracy on Validation data if (train_count + 1) % 500 == 0: i = i + 1 test_loss = 0 correct_class = 0 test_count = 0 for images, labels in testloader: if train_on_gpu: images, labels = images.to(device), labels.to(device) if labels == 0 or labels == 1: test_count += 1 # Flatteing of image images = images.view(images.size(0), -1) # forward the image in trained model score = my_model.forward(images) # calculate loss test_loss += CrossEntropy().forward(score, labels) # selct the top class with max score ps = torch.exp(score) top_p, top_class = ps.topk(1, dim=1) # if top_class is same as the target label than increse correct count by 1 if top_class == labels: correct_class += 1 # Append to plot graph train_losses.append(running_loss / (train_count + 1)) test_losses.append(test_loss / (test_count + 1)) print(f"Epoch {i}.. " f"Train loss: {running_loss/(train_count):.3f} .." f"Test loss: {test_loss/(test_count):.3f} .." f"Train accuracy: {train_correct/(train_count):.3f}.." f"Test accuracy: {correct_class/(test_count):.3f}") train_correct = 0 train_count = 0 running_loss = 0 plt.plot(train_losses, label='Training loss') plt.plot(test_losses, label='Validation loss') plt.legend(frameon=False) #plt.savefig('mlp2.png', dpi=100) return my_model
sys.path.append('dl/') from Sequential import Sequential from Linear import Linear from Functionnals import Relu import Optimizer import Criterion from helpers import train, generate_disc_data, compute_accuracy #setting the type of tensor torch.set_default_dtype(torch.float32) #disable autograd torch.set_grad_enabled(False) #create model model = Sequential(Linear(2, 25), Relu(), Linear(25, 25), Relu(), Linear(25, 25), Relu(), Linear(25, 2)) #create data_sets with one hot encoding for MSE train_input, train_target = generate_disc_data(one_hot_labels=True) test_input, test_target = generate_disc_data(one_hot_labels=True) #normalize the data mean, std = train_input.mean(), train_input.std() train_input.sub_(mean).div_(std) test_input.sub_(mean).div_(std) #define loss criterion = Criterion.MSE() #define optimizer
def train_model(): transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=1, shuffle=True) testloader = torch.utils.data.DataLoader(testset, batch_size=1) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # def imshow(img): # img = img / 2 + 0.5 # unnormalize # npimg = img.numpy() # plt.imshow(np.transpose(npimg, (1, 2, 0))) # plt.show() # # dataiter = iter(trainloader) # images, labels = dataiter.next() my_model = Model() my_model.addLayer(Linear(3072, 1024)) my_model.addLayer(ReLu()) # my_model.addLayer(Linear(2048, 1024)) # my_model.addLayer(ReLu()) my_model.addLayer(Linear(1024, 512)) my_model.addLayer(ReLu()) # my_model.addLayer(Linear(512, 5)) # my_model.addLayer(Softmax()) # my_model.addLayer(CrossEntropy()) running_loss = 0 epochs = 7 train_count = 0 train_losses, test_losses = [], [] i = 0 for epoch in range(epochs): for images, labels in trainloader: if train_on_gpu: images, labels = images.to(device), labels.to(device) # print(labels) if labels <= 4: train_count += 1 images = images.view(images.size(0), -1) final_prob = my_model.forward(images) backward_grad = CrossEntropy().backward(final_prob, labels) # print(backward_grad) my_model.backward(images, backward_grad, alpha=0.001) running_loss += (CrossEntropy().forward(final_prob, labels)) if (train_count + 1) % 500 == 0: i = i + 1 test_loss = 0 accuracy = 0 correct_class = 0 test_count = 0 for images, labels in testloader: if train_on_gpu: images, labels = images.to(device), labels.to(device) if labels <= 4: test_count += 1 images = images.view(images.size(0), -1) score = my_model.forward(images) test_loss += CrossEntropy().forward(score, labels) ps = torch.exp(score) top_p, top_class = ps.topk(1, dim=1) if top_class == labels: correct_class += 1 train_losses.append(running_loss / (train_count + 1)) test_losses.append(test_loss / (test_count + 1)) # plt.plot(train_losses, label='Training loss') # plt.plot(test_losses, label='Validation loss') # plt.savefig('myfilename.png', dpi=100) print(f"Epoch {i}.. " f"Train loss: {running_loss/(train_count):.3f} .." f"Test loss: {test_loss/(test_count + 1):.3f} .." f"Test accuracy: {correct_class/(test_count + 1):.3f}") test_count = 0 train_count = 0 running_loss = 0 plt.plot(train_losses, label='Training loss') plt.plot(test_losses, label='Validation loss') plt.legend(frameon=False) plt.savefig('final.png', dpi=100) return my_model
loss = -np.log(prob[0, target]) return loss # def softmax(input): # return np.exp(input)/np.sum(np.exp(input), axis=1) ################################################################################ """Training and Testing of Regressor Function""" import pickle from struct import unpack import gzip np.random.seed(10) Linear = Linear() img_num = 60000 def train(x_train, y_train, alpha=0.001, weight_decay=0.0, epochs=40): for i in range(epochs): train_loss = 0 count = 0 for j in range(img_num): input = x_train[j].copy().reshape(1, 784) / 255.0 target = y_train[j].copy() #Forward linear = Linear.forward(input) loss = cross_entropy(linear, target) train_loss += loss
def run_all_model(train_input, train_target, test_input, test_target, Sample_number, save_plot=False): # Define constants along the test hidden_nb = 25 std = 0.1 eta = 3e-1 batch_size = 200 epochs_number = 1000 # Model 1. No dropout; constant learning rate (SGD) print('\nModel 1: Optimizer: SGD; No dropout; ReLU; CrossEntropy') # Define model name for plots mname = 'Model1' # Define structure of the network linear_1 = Linear(2, hidden_nb) relu_1 = Relu() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Relu() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Relu() linear_4 = Linear(hidden_nb, 2) loss = CrossEntropy() model_1 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) # Initialize weights model_1.normalize_parameters(mean=0, std=std) # Define optimizer optimizer = Sgd(eta) # Train model my_loss_1 = train_model(model_1, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_1_perf = evaluate_model(model_1, train_input, train_target, test_input, test_target, my_loss_1, save_plot, mname=mname) # Model 2. No dropout; decreasing learning rate (DecreaseSGD) print('\nModel 2: Optimizer: DecreaseSGD; No dropout; ReLU; CrossEntropy') # Define model name for plots mname = 'Model2' # Define structure of the network linear_1 = Linear(2, hidden_nb) relu_1 = Relu() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Relu() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Relu() linear_4 = Linear(hidden_nb, 2) model_2 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) # Initialize weights model_2.normalize_parameters(mean=0, std=std) # Define optimizer optimizer = DecreaseSGD(eta) # Train model my_loss_2 = train_model(model_2, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_2_perf = evaluate_model(model_2, train_input, train_target, test_input, test_target, my_loss_2, save_plot, mname=mname) # Model 3. No dropout; Adam Optimizer print('\nModel 3: Optimizer: Adam; No dropout; ReLU; CrossEntropy') # Define model name for plots mname = 'Model3' # Custom hyperparameters eta_adam = 1e-3 epochs_number_adam = 500 # Define structure of the network linear_1 = Linear(2, hidden_nb) relu_1 = Relu() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Relu() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Relu() linear_4 = Linear(hidden_nb, 2) loss = CrossEntropy() model_3 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) # Initialize weights model_3.normalize_parameters(mean=0, std=std) # Define optimizer optimizer = Adam(eta_adam, 0.9, 0.99, 1e-8) # Train model my_loss_3 = train_model(model_3, train_input, train_target, optimizer, epochs_number_adam, Sample_number, batch_size) # Evalute model and produce plots model_3_perf = evaluate_model(model_3, train_input, train_target, test_input, test_target, my_loss_3, save_plot, mname=mname) # PLOT TO COMPARE OPTIMIZERS if save_plot: fig = plt.figure(figsize=(10, 4)) plt.plot(range(0, epochs_number), my_loss_1, linewidth=1) plt.plot(range(0, epochs_number), my_loss_2, linewidth=1) plt.plot(range(0, epochs_number_adam), my_loss_3, linewidth=1) plt.legend(["SGD", "Decreasing SGD", "Adam"]) plt.title("Loss") plt.xlabel("Epochs") plt.savefig('output/compare_optimizers.pdf', bbox_inches='tight') plt.close(fig) # Model 4. Dropout; SGD print('\nModel 4: Optimizer: SGD; Dropout; ReLU; CrossEntropy') # Define model name for plots mname = 'Model4' # Define structure of the network dropout = 0.15 linear_1 = Linear(2, hidden_nb) relu_1 = Relu() linear_2 = Linear(hidden_nb, hidden_nb, dropout=dropout) relu_2 = Relu() linear_3 = Linear(hidden_nb, hidden_nb, dropout=dropout) relu_3 = Relu() linear_4 = Linear(hidden_nb, 2) model_4 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) # Initialize weights model_4.normalize_parameters(mean=0, std=std) # Define optimizer optimizer = Sgd(eta) # Train model my_loss_4 = train_model(model_4, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_4_perf = evaluate_model(model_4, train_input, train_target, test_input, test_target, my_loss_4, save_plot, mname=mname) # PLOT TO COMPARE DROPOUT AND NO DROPOUT if save_plot: fig = plt.figure(figsize=(10, 4)) plt.plot(range(0, epochs_number), my_loss_1, linewidth=1) plt.plot(range(0, epochs_number), my_loss_4, linewidth=1) plt.legend(["Without Dropout", "With Dropout"]) plt.title("Loss") plt.xlabel("Epochs") plt.savefig('output/compare_dropout.pdf', bbox_inches='tight') plt.close(fig) print('\nEvaluation of different activation functions\n') # Model 5. No Dropout; SGD; Tanh print('\nModel 5: Optimizer: SGD; No dropout; Tanh; CrossEntropy') # Define model name for plots mname = 'Model5' # Define structure of the network linear_1 = Linear(2, hidden_nb) relu_1 = Tanh() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Tanh() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Tanh() linear_4 = Linear(hidden_nb, 2) model_5 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) # Initialize weights model_5.normalize_parameters(mean=0, std=std) # Define optimizer optimizer = Sgd(eta) # Train model my_loss_5 = train_model(model_5, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_5_perf = evaluate_model(model_5, train_input, train_target, test_input, test_target, my_loss_5, save_plot, mname=mname) # Model 6. Xavier Initialization print( '\nModel 6: Optimizer: SGD; No dropout; Tanh; Xavier initialization; CrossEntropy' ) # Define model name for plots mname = 'Model6' # Define network structure linear_1 = Linear(2, hidden_nb) relu_1 = Tanh() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Tanh() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Tanh() linear_4 = Linear(hidden_nb, 2) model_6 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) model_6.xavier_parameters() optimizer = Sgd() # Train model my_loss_6 = train_model(model_6, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_6_perf = evaluate_model(model_6, train_input, train_target, test_input, test_target, my_loss_6, save_plot, mname=mname) # Model 7. Sigmoid print('\nModel 7: Optimizer: SGD; No dropout; Sigmoid; CrossEntropy') # Define model name for plots mname = 'Model7' # Define parameter for sigmoid activation p_lambda = 0.1 # Define network structure linear_1 = Linear(2, hidden_nb) relu_1 = Sigmoid(p_lambda) linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Sigmoid(p_lambda) linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Sigmoid(p_lambda) linear_4 = Linear(hidden_nb, 2) model_7 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) model_7.normalize_parameters(mean=0.5, std=1) optimizer = Sgd(eta=0.5) # Train model my_loss_7 = train_model(model_7, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_7_perf = evaluate_model(model_7, train_input, train_target, test_input, test_target, my_loss_7, save_plot, mname=mname) # PLOT TO COMPARE EFFECT OF DIFFERENT ACTIVATIONS if save_plot: fig = plt.figure(figsize=(10, 4)) plt.plot(range(0, epochs_number), my_loss_1, linewidth=0.5) plt.plot(range(0, epochs_number), my_loss_5, linewidth=0.5, alpha=0.8) plt.plot(range(0, epochs_number), my_loss_6, linewidth=0.5, alpha=0.8) plt.plot(range(0, epochs_number), my_loss_7, linewidth=0.5) plt.legend(["Relu", "Tanh", "Tanh (Xavier)", "Sigmoid"]) plt.title("Loss") plt.xlabel("Epochs") plt.savefig('output/compare_activations.pdf', bbox_inches='tight') plt.close(fig) print('\nEvaluation of base model with MSE loss\n') # Model 8. MSE loss print('\nModel 8: Optimizer: SGD; No dropout; Relu; MSE') # Define model name for plots mname = 'Model8' linear_1 = Linear(2, hidden_nb) relu_1 = Relu() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Relu() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Relu() linear_4 = Linear(hidden_nb, 2) loss = LossMSE() model_8 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=loss) model_8.normalize_parameters(mean=0, std=std) optimizer = Sgd(eta) # Train model my_loss_8 = train_model(model_8, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_8_perf = evaluate_model(model_8, train_input, train_target, test_input, test_target, my_loss_8, save_plot, mname=mname) print('Evaluation done! ') train_loss = torch.tensor([ model_1_perf[0], model_2_perf[0], model_3_perf[0], model_4_perf[0], model_5_perf[0], model_6_perf[0], model_7_perf[0], model_8_perf[0] ]) train_error = torch.tensor([ model_1_perf[1], model_2_perf[1], model_3_perf[1], model_4_perf[1], model_5_perf[1], model_6_perf[1], model_7_perf[1], model_8_perf[1] ]) test_loss = torch.tensor([ model_1_perf[2], model_2_perf[2], model_3_perf[2], model_4_perf[2], model_5_perf[2], model_6_perf[2], model_7_perf[2], model_8_perf[2] ]) test_error = torch.tensor([ model_1_perf[3], model_2_perf[3], model_3_perf[3], model_4_perf[3], model_5_perf[3], model_6_perf[3], model_7_perf[3], model_8_perf[3] ]) return train_loss, train_error, test_loss, test_error
def model_tanh(): return Sequential(Linear(2,25),Tanh(),Linear(25,25), Tanh(), Linear(25,25), Tanh(),Linear(25,2))
def check_all_gradients(num_checks: int = 5): print('Checking Layers Only') print('Checking Linear Layer') for _ in range(num_checks): seq_size = np.random.randint(low=1, high=128) n_in = np.random.randint(low=1, high=128) n_out = np.random.randint(low=1, high=128) assert check_layer_gradient(Linear(n_in=n_in, n_out=n_out), np.random.randn(seq_size, n_in)) print('Checking Linear Layer Paramter W') for _ in range(num_checks): seq_size = np.random.randint(low=1, high=128) n_in = np.random.randint(low=1, high=128) n_out = np.random.randint(low=1, high=128) assert check_layer_param_gradient(Linear(n_in=n_in, n_out=n_out), np.random.randn(seq_size, n_in), 'W') print('Checking Linear Layer Paramter b') for _ in range(num_checks): seq_size = np.random.randint(low=1, high=128) n_in = np.random.randint(low=1, high=128) n_out = np.random.randint(low=1, high=128) assert check_layer_param_gradient(Linear(n_in=n_in, n_out=n_out), np.random.randn(seq_size, n_in), 'b') print('Checking RepeatVector Layer') for _ in range(num_checks): seq_size = np.random.randint(low=1, high=128) n_in = np.random.randint(low=1, high=128) assert check_layer_gradient(RepeatVector(seq_size=seq_size), np.random.randn(n_in, 1)) print('Checking LSTM Layer') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) hidden_size = np.random.randint(low=1, high=128) seq_size = np.random.randint(low=1, high=32) assert check_layer_gradient( LSTMCell.LSTMCell(vac_size=vac_size, hidden_size=hidden_size, return_seq=True), np.random.randn(seq_size, vac_size, 1)) print('Checking LSTM Parameter W_forget') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) hidden_size = np.random.randint(low=1, high=32) seq_size = np.random.randint(low=1, high=32) assert check_layer_param_gradient( LSTMCell.LSTMCell(vac_size=vac_size, hidden_size=hidden_size, return_seq=True), np.random.randn(seq_size, vac_size, 1), 'W_forget') print('Checking LSTM Parameter W_input') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) hidden_size = np.random.randint(low=1, high=32) seq_size = np.random.randint(low=1, high=32) assert check_layer_param_gradient( LSTMCell.LSTMCell(vac_size=vac_size, hidden_size=hidden_size, return_seq=True), np.random.randn(seq_size, vac_size, 1), 'W_input') print('Checking LSTM Parameter W_cell_state') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) hidden_size = np.random.randint(low=1, high=32) seq_size = np.random.randint(low=1, high=32) assert check_layer_param_gradient( LSTMCell.LSTMCell(vac_size=vac_size, hidden_size=hidden_size, return_seq=True), np.random.randn(seq_size, vac_size, 1), 'W_cell_state') print('Checking LSTM Parameter W_output') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) hidden_size = np.random.randint(low=1, high=32) seq_size = np.random.randint(low=1, high=32) assert check_layer_param_gradient( LSTMCell.LSTMCell(vac_size=vac_size, hidden_size=hidden_size, return_seq=True), np.random.randn(seq_size, vac_size, 1), 'W_output') print('Checking LSTM Parameter b_forget') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) hidden_size = np.random.randint(low=1, high=32) seq_size = np.random.randint(low=1, high=32) assert check_layer_param_gradient( LSTMCell.LSTMCell(vac_size=vac_size, hidden_size=hidden_size, return_seq=True), np.random.randn(seq_size, vac_size, 1), 'b_forget') print('Checking LSTM Parameter b_input') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) hidden_size = np.random.randint(low=1, high=128) seq_size = np.random.randint(low=1, high=32) assert check_layer_param_gradient( LSTMCell.LSTMCell(vac_size=vac_size, hidden_size=hidden_size, return_seq=True), np.random.randn(seq_size, vac_size, 1), 'b_input') print('Checking LSTM Parameter b_cell_state') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) hidden_size = np.random.randint(low=1, high=32) seq_size = np.random.randint(low=1, high=32) assert check_layer_param_gradient( LSTMCell.LSTMCell(vac_size=vac_size, hidden_size=hidden_size, return_seq=True), np.random.randn(seq_size, vac_size, 1), 'b_cell_state') print('Checking LSTM Parameter b_output') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) hidden_size = np.random.randint(low=1, high=32) seq_size = np.random.randint(low=1, high=32) assert check_layer_param_gradient( LSTMCell.LSTMCell(vac_size=vac_size, hidden_size=hidden_size, return_seq=True), np.random.randn(seq_size, vac_size, 1), 'b_output') print('Checking All Two Layer Model Paramters') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) seq_size = np.random.randint(low=1, high=32) ds = Dataset(vac_size=vac_size, seq_size=seq_size) X, y = ds.generate_seq() assert check_model_gradient(model=RNNAutoencoder(vac_size=vac_size, hidden_sizes=(12, 12), seq_size=seq_size), X=X, y=y) print('Checking All One Layer Model Paramters') for _ in range(num_checks): vac_size = np.random.randint(low=10, high=32) seq_size = np.random.randint(low=1, high=32) ds = Dataset(vac_size=vac_size, seq_size=seq_size) X, y = ds.generate_seq() assert check_model_gradient(model=RNNAutoencoderOneLayer( vac_size=vac_size, hidden_size=12, seq_size=seq_size), X=X, y=y) print('All Gradients Are Fine! Lets Train Model!')
def model_relu(): return Sequential(Linear(2,25),Relu(),Linear(25,25), Relu(),Linear(25,25), Relu(),Linear(25,2))
def readCommand(argv): "Processes the command used to run from the command line." from optparse import OptionParser parser = OptionParser(USAGE_STRING) parser.add_option('-c', '--config', help=default('modelConfig'), default='CS 763 Deep Learning HW/modelConfig_1.txt') parser.add_option('-i', '--i', help=default('input'), default='CS 763 Deep Learning HW/input_sample_1.bin', type="string") parser.add_option( '-g', '--og', help=default('gradoutput'), default='CS 763 Deep Learning HW/gradOutput_sample_1.bin', type="string") parser.add_option('-o', '--o', help=default('output'), type="string") parser.add_option('-w', '--ow', help=default('gradweights'), type="string") parser.add_option('-b', '--ob', help=default('gradb'), type="string") parser.add_option('-d', '--ig', help=default('gradinput'), type="string") options, otherjunk = parser.parse_args(argv) if len(otherjunk) != 0: raise Exception('Command line input not understood: ' + str(otherjunk)) args = {} model_config_path = options.config input_path = options.i gradoutput_path = options.og output_path = options.o gradweights_path = options.ow gradb_path = options.ob gradinput_path = options.ig modelConfig_file = open(model_config_path, "r") data = modelConfig_file.readlines() my_model = Model.Model() my_criterion = Criterion.Criterion() input_weight = 0 Bias_weight = 0 Number_layer = int(data[0]) for i in range(Number_layer): layer = data[1 + i].split() if (len(layer) > 1): my_model.addLayer(Linear(int(layer[1]), int(layer[2]))) else: my_model.addLayer(ReLu()) Path_sample_weight = data[Number_layer + 1][:-1] Path_sample_bias = data[Number_layer + 2][:-1] input = torchfile.load(input_path) input = torch.tensor(input).double().reshape((input.shape[0], -1)) input_weight = torchfile.load(Path_sample_weight) input_bias = torchfile.load(Path_sample_bias) input_weight = [torch.tensor(weight).double() for weight in input_weight] input_bias = [ torch.tensor(bias).double().reshape((-1, 1)) for bias in input_bias ] Outputs = my_model.forward2(input, input_weight, input_bias, True) dl_do = my_criterion.backward(Outputs, trLabels) # gradoutput = my_model.backward(input, dl_do, 0, 0) [gradInput, gradWeights, gradBias] = my_model.backward2(input, dl_do, 0, 0) torch.save(Outputs, output_path) torch.save(gradWeights, gradweights_path) torch.save(gradBias, gradb_path) torch.save(gradInput, gradinput_path)
target_loc = sys.argv[i + 1] if not os.path.exists(model_name): os.makedirs(model_name) batch_size = 12 criterion = Criterion() dataset = Dataset(batch_size) model = Model(2, 128, 153, 153, True) dataset.read_data(data_loc, 'X_train') dataset.read_data(target_loc, 'Y_train') train_data_len = len(dataset.X_train) model.add_layer(RNN(153, 128, 20)) model.add_layer(Linear(128, 2)) train(8, 1) train(3, 1e-1) accuracy(0, train_data_len) train(6, 1e-2) accuracy(0, train_data_len) train(3, 1e-3) accuracy(0, train_data_len) train(8, 1e-6) accuracy(0, train_data_len) train(3, 1e-7) accuracy(0, train_data_len) file = open(model_name + '/model.bin', 'wb') torch.save(model, file)
return Other # ---------------------------------------------------------------------- # Module Tests # ---------------------------------------------------------------------- if __name__ == '__main__': import numpy as np from Linear import Linear S = ScalingBunch() S.X = Linear(10.0, 0.0) S.Y = Linear(2.0, 1.0) data = OrderedBunch() data.X = 10.0 data.Y = np.array([1, 2, 3.]) print data data = data / S print data data = data * S print data
Data = torchfile.load("data.bin") Labels = torchfile.load("labels.bin") Data = torch.tensor(normalize(Data)).double() Data = Data.reshape(Data.shape[0], 108 * 108) Labels = torch.tensor(Labels).long() trainingData = Data[0:int(Data.shape[0] * 0.9), :] trainingLabels = Labels[0:int(Data.shape[0] * 0.9)] validationData = Data[int(Data.shape[0] * 0.9):Data.shape[0], :] validationLabels = Labels[int(Data.shape[0] * 0.9):Data.shape[0]] my_model = Model.Model() my_model.addLayer(Linear(108 * 108, 1024)) my_model.addLayer(ReLu()) my_model.addLayer(Linear(1024, 256)) my_model.addLayer(ReLu()) my_model.addLayer(Linear(256, 6)) my_model.addLayer(ReLu()) my_criterion = Criterion.Criterion() def train_and_test(trainingData, trainingLabels, validationData, validationLabels, noIters, batchSize, alpha, lr): # can add lambda global my_model noBatches = int(trainingLabels.shape[0] / batchSize)
def cross_entropy(input, target): prob = np.exp(input) / np.sum(np.exp(input), axis=1) loss = -np.log(prob[0, target]) return loss ################################################################################ """Training and Testing of Regressor Function""" import pickle from struct import unpack import gzip np.random.seed(2) Linear = Linear(in_size=144, out_size=10) #calling model for creating features model_test = RBM(n_visible=28 * 28, n_hidden=144) #Loading Weight rbm_weight_file = open("model/rbm_weight.npy", 'rb') model_test.weight = pickle.load(rbm_weight_file) #Loading visible layer bias rbm_v_bias_file = open("model/rbm_v_bias.npy", 'rb') model_test.v_bias = pickle.load(rbm_v_bias_file) #Loading hidden layer bias rbm_h_bias_file = open("model/rbm_h_bias.npy", 'rb') model_test.h_bias = pickle.load(rbm_h_bias_file)