def test(self, name): ''' Takes a supposedly unseen dataset and finds the top-1 predicted labels. Stores those values in a csv file called name.csv, where name is the value set for the name parameter for this function. ''' loader, iteration = data_util.load_data(partition='test') #iteration = 1 data_iter = data_util.inf_generator(loader) results = [] for i in range(iteration): X, y = data_iter.__next__() X = [x.numpy()[0] for x in X] predX = Variable(torch.FloatTensor([X]), requires_grad=True).to(device) y = Variable(torch.LongTensor([y]), requires_grad=False).to(device) y_pred = self.model(predX) results.append( [y.cpu().numpy()[0], y_pred.max(-1)[1].cpu().numpy()[0]]) pd.DataFrame(results, columns=['y_true', 'y_pred' ]).to_csv(f'../model_train_results/{name}.csv', index=False)
def test(self, name): ''' Returns the top-1 accuracy on an unseen test dataset. ''' loader, iteration = data_util.load_data(partition='test') data_iter = data_util.inf_generator(loader) results = [] for i in range(iteration): X, y = data_iter.__next__() X = [x.numpy()[0] for x in X] predX = Variable(torch.FloatTensor([X]), requires_grad=True).to(device) y = Variable(torch.LongTensor([y]), requires_grad=False).to(device) y_pred = self.model(predX) results.append( [y.cpu().numpy()[0], y_pred.max(-1)[1].cpu().numpy()[0]]) pd.DataFrame(results, columns=['y_true', 'y_pred' ]).to_csv(f'../model_train_results/{name}.csv', index=False)
def score(self): loader, iteration = data_util.load_data(partition='test') data_iter = data_util.inf_generator(loader) correct = 0 for i in range(iteration): X, y = data_iter.__next__() X=[x.numpy()[0] for x in X] predX = Variable(torch.FloatTensor([X]), requires_grad=True).to(device) y = Variable(torch.LongTensor([y]), requires_grad=False).to(device) y_pred = self.model(predX) if y_pred.max(-1)[1]==y: correct += 1 return correct/iteration
def score(self): ''' Returns the top-1 accuracy on an unseen test dataset. ''' loader, iteration = data_util.load_data(partition='test') #iteration = 1 data_iter = data_util.inf_generator(loader) correct = 0 for i in range(iteration): X, y = data_iter.__next__() X = [x.numpy()[0] for x in X] predX = Variable(torch.FloatTensor([X]), requires_grad=True).to(device) y = Variable(torch.LongTensor([y]), requires_grad=False).to(device) y_pred = self.model(predX) if y_pred.max( -1 )[1] == y: # if class corresponding to max log softmax is the ground truth class correct += 1 return correct / iteration
def fit(self, name, save_weights=False): ''' Trains model using predefined number of epochs, learning rate and number of neurons in each hidden layer. Saves epoch results to a file name.csv, where name is replaced with the value put in for the name parameter. name: (str) The name of the file to save the results of this run save_weights: (bool) Saves the weights of the best iteration based on validation accuracy ''' print(name) optimizer = optim.Adam(self.model.parameters(), lr=self.lr) loss = nn.CrossEntropyLoss() # cross-entropy loss lossVal = [] # to create dataframe for logging bestValAcc = 0 # to check for saving weights for i in range(self.epochs): start = time.time() loader, iteration = data_util.load_data() data_iter = data_util.inf_generator( loader ) # inf generator taken from rtiqchen implementation of NODE train_size = int(iteration * 0.8) # takes 80% of data as train and 20 as test val_size = int(iteration * 0.2) epoch_train_loss = [ ] # collect values to update log for post-processing epoch_val_loss = [] train_correct = 0 val_correct = 0 for j in range( train_size ): #calculated train size to do train dev split will calculate mean loss at end X, y = data_iter.__next__() X = [x.numpy()[0] for x in X] X = Variable(torch.FloatTensor([X]), requires_grad=True).to( device) # have to convert to tensor #print(X.shape) y = Variable(torch.LongTensor([y]), requires_grad=False).to(device) optimizer.zero_grad() y_pred = self.model(X) output = loss(y_pred, y) epoch_train_loss.append(output.cpu().detach().numpy()) if y_pred.max( -1 )[1] == y: # if max log softmax corresponds to correct class add 1 train_correct += 1 output.backward() optimizer.step() #print(list(self.model.parameters())) for k in range(val_size): X, y = data_iter.__next__() X = [x.numpy()[0] for x in X] X = Variable(torch.FloatTensor([X]), requires_grad=True).to( device) # have to convert to tensor y = Variable(torch.LongTensor([y]), requires_grad=False).to(device) optimizer.zero_grad() y_pred = self.model(X) output = loss(y_pred, y) epoch_val_loss.append(output.cpu().detach().numpy()) if y_pred.max( -1 )[1] == y: # if max log softmax corresponds to correct class add 1 val_correct += 1 valAcc = val_correct / val_size if save_weights and valAcc > bestValAcc: torch.save(self.model.state_dict(), f'../model_weights/{name}.pt' ) # save if we do better than current best end = time.time() lossVal.append([(end - start) / 60, np.mean(epoch_train_loss), np.mean(epoch_val_loss), train_correct / train_size, val_correct / val_size ]) # save values for reporting print('epoch time:', (end - start) / 60, 'min', 'epoch:', '{0}/{1}'.format(i, self.epochs), 'train accuracy:', train_correct / train_size, ', val accuracy:', val_correct / val_size) print( f'Train loss: {np.mean(epoch_train_loss)} Val loss: {np.mean(epoch_val_loss)}' ) if 'model_train_results' not in os.listdir('../'): os.mkdir('../model_train_results') pd.DataFrame(lossVal, columns=[ 'epoch_time', 'mean_train_loss', 'mean_val_loss', 'train_acc', 'val_acc' ]).to_csv('../model_train_results/' + name + '.csv', index=False) # add epoch length
def fit(self,name): optimizer = optim.SGD(self.model.parameters(),lr=self.lr,momentum=0.4) loss = nn.CrossEntropyLoss() # cross-entropy loss lossVal = [] for i in range(self.epochs): start = time.time() loader,iteration = data_util.load_data() data_iter = data_util.inf_generator(loader) train_size = int(iteration*0.8) val_size = int(iteration*0.2) epoch_train_loss = [] epoch_val_loss = [] train_correct = 0 val_correct = 0 for j in range(train_size): #calculated train size to do train dev split will calculate mean loss at end X, y = data_iter.__next__() X=[x.numpy()[0] for x in X] X = Variable(torch.FloatTensor([X]), requires_grad=True).to(device) # have to convert to tensor y = Variable(torch.LongTensor([y]), requires_grad=False).to(device) X = X.squeeze().t().unsqueeze(0) print(X.shape) optimizer.zero_grad() y_pred = self.model(X) output = loss(y_pred, y) epoch_train_loss.append(output.cpu().detach().numpy()) if y_pred.max(-1)[1]==y: train_correct += 1 output.backward() optimizer.step() for k in range(val_size): X, y = data_iter.__next__() X=[x.numpy()[0] for x in X] X = Variable(torch.FloatTensor([X]), requires_grad=True).to(device) # have to convert to tensor y = Variable(torch.LongTensor([y]), requires_grad=False).to(device) optimizer.zero_grad() y_pred = self.model(X) output = loss(y_pred, y) epoch_val_loss.append(output.cpu().detach().numpy()) if y_pred.max(-1)[1]==y: val_correct += 1 lossVal.append([np.mean(epoch_train_loss),np.mean(epoch_val_loss),train_correct/train_size,val_correct/val_size]) print('epoch time:',time.time()-start,'seconds','epoch:','{0}/{1}'.format(i,self.epochs),'train accuracy:',train_correct/train_size,', val accuracy:',val_correct/val_size) if 'model_train_results' not in os.listdir('../'): os.mkdir('../model_train_results') pd.DataFrame(lossVal,columns=['mean_train_loss','mean_val_loss','train_acc','val_acc']).to_csv('../model_train_results/'+name+'.csv',index=False) torch.save(self.model.state_dict(), '../model_weights/VDCNN.pt')
def fit(self, name, save_weights=False): ''' Trains model using predefined number of epochs, learning rate and number of neurons in each hidden layer. Saves epoch results to a file name.csv, where name is replaced with the value put in for the name parameter. name: (str) The name of the file to save the results of this run save_weights: (bool) Saves the weights of the best iteration based on validation accuracy ''' optimizer = optim.Adam(self.model.parameters(), lr=self.lr) loss = nn.CrossEntropyLoss() lossVal = [] bestValAcc = 0 for i in range(self.epochs): start = time.time() loader, iteration = data_util.load_data() data_iter = data_util.inf_generator(loader) train_size = int(iteration * 0.8) val_size = int(iteration * 0.2) epoch_train_loss = [] epoch_val_loss = [] train_correct = 0 val_correct = 0 c = 0 for j in range(train_size): X, y = data_iter.__next__() X = [x.numpy()[0] for x in X] X = Variable(torch.FloatTensor([X]), requires_grad=True).to(device) y = Variable(torch.LongTensor([y]), requires_grad=False).to(device) optimizer.zero_grad() y_pred = self.model(X) output = loss(y_pred, y) epoch_train_loss.append(output.cpu().detach().numpy()) if y_pred.max(-1)[1] == y: train_correct += 1 output.backward() optimizer.step() if not c % 1000: print('train time:', (time.time() - start) / 60, 'min', c, 'loops') c += 1 for k in range(val_size): X, y = data_iter.__next__() X = [x.numpy()[0] for x in X] X = Variable(torch.FloatTensor([X]), requires_grad=True).to( device) # have to convert to tensor y = Variable(torch.LongTensor([y]), requires_grad=False).to(device) optimizer.zero_grad() y_pred = self.model(X) output = loss(y_pred, y) epoch_val_loss.append(output.cpu().detach().numpy()) if y_pred.max(-1)[1] == y: val_correct += 1 valAcc = val_correct / val_size if save_weights and valAcc > bestValAcc: torch.save(self.model.state_dict(), f'../model_weights/{name}.pt' ) # save if we do better than current best end = time.time() lossVal.append([(end - start) / 60, np.mean(epoch_train_loss), np.mean(epoch_val_loss), train_correct / train_size, val_correct / val_size ]) # save values for reporting print('epoch time:', (end - start) / 60, 'min', 'epoch:', '{0}/{1}'.format(i, self.epochs), 'train accuracy:', train_correct / train_size, ', val accuracy:', val_correct / val_size) print( f'Train loss: {np.mean(epoch_train_loss)} Val loss: {np.mean(epoch_val_loss)}' ) if 'model_train_results' not in os.listdir('../'): os.mkdir('../model_train_results') pd.DataFrame(lossVal, columns=[ 'epoch_time', 'mean_train_loss', 'mean_val_loss', 'train_acc', 'val_acc' ]).to_csv('../model_train_results/' + name + '.csv', index=False) # add epoch length