def translate(): #data = LanguageLoader(en_path, fr_path, vocab_size, max_length) #rnn = RNN(data.input_size, data.output_size) model = RNN(data.input_size, data.output_size) model.load_state_dict(torch.load('models/baseline.module')) vecs = data.sentence_to_vec("Madam president<EOS>") print("in translate-- ",vecs) translation = model.eval(vecs) print("final result ",data.vec_to_sentence(translation))
def load_model(path, hyper, inference=True, dictionary_path=args.dictionary_path, LSTM=False): assert os.path.exists( path), 'directory for model {} could not be found'.format(path) voc_2_index, _, writer = load_dictionaries(dictionary_path) model = RNN(hyper['--embed_size'], hyper['--hidden_size'], len(voc_2_index), hyper['--num_layers'], add_writer=hyper['--writer_codes'], writer_number=len(writer), writer_embed_size=hyper['--writers_embeddings'], add_writer_as_hidden=hyper['--initialise_hidden'], LSTM=LSTM) # lod = torch.load(os.path.join(path,'model.pt')) model.load_state_dict(torch.load(os.path.join(path, 'model.pt'))) if inference: model.eval() return model
def main(args): """ Evaluate SNLI model on MNLI data set """ # Use CUDA use_cuda = args.use_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # Fix random seed torch.manual_seed(args.seed) # Generate token-to-index and index-to-token mapping tok2id, id2tok = data_loader.build_or_load_vocab(args.train, overwrite=False) print("*" * 5) print(args) # Create DataLoader() objects params = { "batch_size": args.batch_size, "collate_fn": data_loader.collate_fn, "shuffle": args.shuffle, "num_workers": args.num_workers, } # train_dataset = data_loader.SNLIDataSet(args.train, tok2id) # train_loader = torch.utils.data.DataLoader(train_dataset, **params) val_dataset = data_loader.SNLIDataSet(args.val, tok2id) val_loader = torch.utils.data.DataLoader(val_dataset, **params) # Initialize model if args.model == "rnn": # RNN model model = RNN( vocab_size=const.MAX_VOCAB_SIZE, # Vocabulary size emb_dim=const.EMB_DIM, # Embedding dimensions hidden_dim=args.hidden_dim, # Hidden dimensions dropout_prob=args.dropout_prob, # Dropout probability padding_idx=const.PAD_IDX, # Padding token index num_classes=const.NUM_CLASSES, # Number of class labels id2tok=id2tok, # Vocabulary ).to(device) # Load model weights from disk model.load_state_dict(torch.load(const.MODELS + "rnn.pt")) model.eval() elif args.model == "cnn": # CNN model model = CNN( vocab_size=const.MAX_VOCAB_SIZE, # Vocabulary size emb_dim=const.EMB_DIM, # Embedding dimensions hidden_dim=args.hidden_dim, # Hidden dimensions kernel_size=args.kernel_size, # Kernel size dropout_prob=args.dropout_prob, # Dropout probability padding_idx=const.PAD_IDX, # Padding token index num_classes=const.NUM_CLASSES, # Number of class labels id2tok=id2tok, # Vocabulary ).to(device) # Load model weights from disk model.load_state_dict(torch.load(const.MODELS + "cnn.pt")) model.eval() else: print("Invalid model specification, exiting") exit() # Criterion criterion = torch.nn.CrossEntropyLoss() # Model parameters params = [p for p in model.parameters() if p.requires_grad] # Inspect correct/incorrect predictions if args.inspect: right, wrong = eval_model(val_loader, model, device, criterion, inspect=True) print("\nValidation premises with correct predictions:\n") for i, item in enumerate(right): text = " ".join([id2tok[idx] for idx in item if idx > 0]) print("#{}\n {}".format(i + 1, text)) print("\nValidation premises with incorrect predictions:\n") for i, item in enumerate(wrong): text = " ".join([id2tok[idx] for idx in item if idx > 0]) print("#{}\n {}".format(i + 1, text)) return # Validation val_acc, _ = eval_model(val_loader, model, device, criterion) print("\n Validation accuracy: {}".format(val_acc)) print("*" * 5 + "\n")
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) img = transform(Image.open(img)) image = img.unsqueeze(0) #image = Variable(image).cuda() image = Variable(image) cnn = alexnet(embedding_dim=embedding_size) rnn = RNN(embedding_dim=embedding_size, hidden_dim=hidden_size, vocab_size=vocab.index) #cnn.cuda() #rnn.cuda() #cnn_file = str(train_time) + '_iter_' + str(epoch) + '_cnn.pkl' #rnn_file = str(train_time) + '_iter_' + str(epoch) + '_rnn.pkl' cnn_file = 'alex_iter_' + str(epoch) + '_cnn.pkl' rnn_file = 'alex_iter_' + str(epoch) + '_rnn.pkl' cnn.load_state_dict( torch.load(os.path.join('train_file', cnn_file), map_location='cpu')) rnn.load_state_dict( torch.load(os.path.join('train_file', rnn_file), map_location='cpu')) cnn_out = cnn(image) word_id = rnn.search(cnn_out) sentence = vocab.get_sentence(word_id) print(sentence) showimage = Image.open(args.img) plt.imshow(np.asarray(showimage))
class TENG: def __init__(self): ############visualize################ self.log_dir = './tf' self.writer = SummaryWriter(self.log_dir) self.input_size = 9 self.output_size = 25 self.hidden_size = 300 self.num_layers = 5 self.learning_rate = 0.01 # 0.1 self.sequence_length = 1 self.batch_size = 100 # 400 self.epochs = 50 self.model = RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, output_size=self.output_size) # print('\nModel Info: ', self.model) ''' (rnn): RNN(9, 25, num_layers=2, batch_first=True, dropout=0.1) (fc): Linear(in_features=25, out_features=25, bias=True) (relu): ReLU() ''' print(self.model.rnn) self.model.to(device) self.loss_function = nn.CrossEntropyLoss() # self.updateParams = optim.SGD( # self.model.parameters(), lr=self.learning_rate, momentum=0.9, weight_decay=5e-4, nesterov=True) self.updateParams = optim.Adam(self.model.parameters(), weight_decay=5e-4, lr=self.learning_rate) self.scheduler = torch.optim.lr_scheduler.MultiStepLR( self.updateParams, milestones=[10, 20, 30], gamma=0.1) #######get the data######## train_datasets, test_datasets = get_data() self.train_loader = torch.utils.data.DataLoader( train_datasets, batch_size=self.batch_size, shuffle=True) self.test_loader = torch.utils.data.DataLoader( test_datasets, batch_size=self.batch_size, shuffle=True) # mini-batch print( '#####Model Initialization is completed and ready for the training process.#####' ) print('\n') time.sleep(0.1) model_file = "better_RNN_model_checkpoint.pth.tar" if os.path.isfile(model_file): print("#############Loading the pre-trained model#############") checkpoint = torch.load(model_file) self.start_epoch = checkpoint['epoch'] self.best_accuracy = checkpoint['best_accuracy'] self.model.load_state_dict(checkpoint['state_dict']) self.updateParams.load_state_dict(checkpoint['optimizer']) self.training_accuracy = checkpoint['training_accuracy'] self.validation_accuracy = checkpoint['validation_accuracy'] self.training_loss = checkpoint['training_loss'] self.validation_loss = checkpoint['validation_loss'] self.time_list = checkpoint['time'] print('\n') print('preivous model accuracy:', self.best_accuracy) print('\n') else: self.start_epoch = 0 self.best_accuracy = 0 self.training_accuracy = [] self.validation_accuracy = [] self.training_loss = [] self.validation_loss = [] self.time_list = [] print('NEW model accuracy:', self.best_accuracy) def train(self): def save_checkpoint(state, better, file='RNN_model_checkpoint.pth.tar'): torch.save(state, file) if better: shutil.copyfile(file, 'better_RNN_model_checkpoint.pth.tar') def training(epochs): step = 0 self.model.train() # initializing the training print("CNN training starts__epoch: {}, LR= {}".format( epochs, self.scheduler.get_lr())) training_loss = 0 total = 0 final_score = 0 self.scheduler.step() # dynamically change the learning rate self.loss = 0 for batch_id, (X_batch, y_batch) in enumerate(self.train_loader): X_batch = X_batch.view(-1, self.sequence_length, self.input_size) X_batch = X_batch.float().to(device) y_batch = y_batch.to(device) y_batch = y_batch.to(device).detach() if X_batch.requires_grad: pass else: print('AutoGrad is OFF!') self.updateParams.zero_grad( ) # zero gradient before the backward result = self.model(X_batch) batch_loss = self.loss_function(result, y_batch) # wihout .item(),in gpu model, not enough memory training_loss += batch_loss.item() batch_loss.backward() self.updateParams.step( ) # performs a parameter update based on the current gradient _, predict = torch.max((result), 1) # dim=1->each row final_score += predict.eq(y_batch).cpu().sum().type( torch.DoubleTensor).item() # check the gradient # print('ID', batch_id) # print('after back prop--parameter: ', list(self.model.parameters()) # [0].grad) # the gradient is so very small training_loss_mean = training_loss / \ (len(self.train_loader.dataset)/(self.batch_size)) training_accuracy = 100*final_score / \ (len(self.train_loader.dataset)) print("Training-epoch-{}-training_loss_mean: {:.4f}".format( epochs, training_loss_mean)) print("Training-epoch-{}-training_accuracy: {:.4f}%".format( epochs, training_accuracy)) # self.writer.add_image('Output', vutils.make_grid(output.data, normalize=True, scale_each=True), niter) return (training_loss_mean, training_accuracy) def validation(epochs): self.model.eval() validation_loss = 0 total = 0 final_score = 0 with torch.no_grad( ): # temporarily set all the requires_grad flag to false for batch_id, (test_data, target_test) in enumerate(self.test_loader): test_data = test_data.view(-1, self.sequence_length, self.input_size) test_data = test_data.float().to(device) target_test = target_test.to(device) result = self.model(test_data) batch_loss = self.loss_function(result, target_test) validation_loss += batch_loss _, predict = torch.max((result), 1) # dim=1->each row final_score += predict.eq(target_test).cpu().sum().type( torch.DoubleTensor).item() validation_loss_mean = validation_loss / \ (len(self.test_loader.dataset)/(self.batch_size)) validation_accuracy = 100*final_score / \ (len(self.test_loader.dataset)) print("Validation-epoch-{}-Validation_loss_mean: {:.4f}".format( epochs, validation_loss_mean)) print('Validation Accuracy: {:.4f}%'.format(validation_accuracy)) self.model_accuracy_cur_epoch = validation_accuracy return (validation_loss_mean, validation_accuracy) if __name__ == "__main__": print("######CIFAR100 Training-Validation Starts######") epoch_iter = range(1, self.epochs) self.model_accuracy_cur_epoch = 0 if self.start_epoch == self.epochs: pass else: for i in range(self.start_epoch + 1, self.epochs): time_begin = time.time() training_result = training(i) self.training_loss.append(training_result[0]) self.training_accuracy.append(training_result[1]) vali_result = validation(i) self.validation_loss.append(vali_result[0]) self.validation_accuracy.append(vali_result[1]) time_end = time.time() - time_begin self.time_list.append(time_end) progress = float(i * 100 // len(epoch_iter)) print('Progress: {:.4f}%'.format(progress)) print('\n') ####################################### # Tensorboard Visualization niter = i # tensorboard --logdir=tf --port 6066 self.writer.add_scalars( 'Loss Curve', { 'Training Loss': training_result[0], 'Validation Loss': vali_result[0] }, niter) # attention->add_scalarS self.writer.add_scalars( 'Accuracy Curve', { 'Training Accuracy': training_result[1], 'Validation Accuracy': vali_result[1] }, niter) ####################################### better = self.model_accuracy_cur_epoch > self.best_accuracy self.best_accuracy = max(self.best_accuracy, self.model_accuracy_cur_epoch) # if better: # torch.save(self.model.state_dict(), 'CNN_MODEL.pt') save_checkpoint( { 'epoch': i, 'best_accuracy': self.best_accuracy, 'state_dict': self.model.state_dict(), 'optimizer': self.updateParams.state_dict(), 'training_loss': self.training_loss, 'training_accuracy': self.training_accuracy, 'validation_loss': self.validation_loss, 'validation_accuracy': self.validation_accuracy, 'time': self.time_list, }, better - 1) print( 'Model Updated, proceeding to next epoch, best accuracy= {}' .format(self.best_accuracy)) # save the model after training torch.save(self.model.state_dict(), 'CNN_MODEL.pt') # ploting # loss function plt.figure(1) sns.set_style('whitegrid') plt.plot(epoch_iter, self.training_loss, color='red', linestyle='solid', linewidth='3.0', marker='p', markerfacecolor='red', markersize='10', label='Training Loss') plt.plot(epoch_iter, self.validation_loss, color='green', linestyle='solid', linewidth='3.0', marker='o', markerfacecolor='green', markersize='10', label='Validation Loss') plt.ylabel('Loss', fontsize=18) plt.xlabel('Epochs', fontsize=18) title = "RNN Result-loss" plt.title(title, fontsize=12) plt.legend(fontsize=14) plt.grid(True) plt.show() # Training accuracy plt.figure(2) sns.set_style('whitegrid') plt.plot(epoch_iter, self.training_accuracy, color='blue', linestyle='solid', linewidth='3.0', marker='s', markerfacecolor='blue', markersize='10', label='Training Accuracy') plt.plot(epoch_iter, self.validation_accuracy, color='green', linestyle='solid', linewidth='3.0', marker='s', markerfacecolor='green', markersize='10', label='Validation Accuracy') title = "RNN Result-accuracy" plt.title(title, fontsize=12) plt.xlabel('Epochs', fontsize=18) plt.title("Model Accuracy", fontsize=14) plt.legend(fontsize=14) plt.show() plt.figure(3) sns.set_style('whitegrid') plt.plot(epoch_iter, self.time_list, color='blue', linestyle='solid', linewidth='3.0', marker='s', markerfacecolor='blue', markersize='10', label='Validation Loss') plt.ylabel('Time (s)', fontsize=18) plt.xlabel('Epochs', fontsize=18) plt.title("Speed", fontsize=14) plt.legend(fontsize=14) plt.show() def forward_EM(self, filepath, target): # data processing df = pd.read_csv(filepath, header=None) # no column names!!! df_x = df.iloc[:, :9] df_x = df_x.div(df_x.sum(axis=1), axis=0) # normalize X = df_x X_scaling = StandardScaler().fit_transform(X) # numpy.array input_data = torch.tensor(X_scaling, requires_grad=True) input_data = input_data.view(-1, self.sequence_length, self.input_size) y_new = df.iloc[:, -1] y_new -= 1 input_data = input_data.float().to(device) ############## self.model.eval() result = self.model(input_data) _, predict = torch.max(result, 1) predict = predict.cpu() i = 0 for elem in predict: if elem == target: i += 1 # for i in range(len(predict)): # # print(predict) # if predict[i] == y_new[i]: # count += 1 acc = float(i / len(predict)) # print('Accuracy: {}%'.format(acc*100)) # from sklearn.metrics import confusion_matrix # confusion_matrix = confusion_matrix( # y_true=y_new, y_pred=predict) # # #Normalize CM # confusion_matrix = cm = confusion_matrix.astype( # 'float') / confusion_matrix.sum(axis=1)[:, np.newaxis] # df_cm = pd.DataFrame(confusion_matrix) # # plot confusion matrix # fig, ax = plt.subplots() # sns.heatmap(df_cm, cmap="coolwarm", annot=False) # fig.set_size_inches(8, 6) # ax.set_title("Confusion Matrix of RNN, Data: {}".format(filepath)) # ax.set_xlabel('Perdicted Label', fontsize=12) # ax.set_ylabel('Actual Label', fontsize=12) # plt.show() return predict, acc def forward_ni(self, filepath): # data processing df = pd.read_csv(filepath, header=None) # no column names!!! df_x = df.iloc[:, :9] df_x = df_x.div(df_x.sum(axis=1), axis=0) # normalize X = df_x X_scaling = StandardScaler().fit_transform(X) # numpy.array input_data = torch.tensor(X_scaling, requires_grad=True) input_data = input_data.view(-1, self.sequence_length, self.input_size) y_new = df.iloc[:, -1] input_data = input_data.float().to(device) ############## self.model.eval() result = self.model(input_data) _, predict = torch.max(result, 1) predict = predict.cpu() predict = predict.numpy() i = 0 print(predict) print(y_new.head(10)) count = 0 for i in range(len(predict)): # print(predict) if predict[i] == y_new[i]: count += 1 acc = float(count / len(predict)) # print('Accuracy: {}%'.format(acc*100)) from sklearn.metrics import confusion_matrix confusion_matrix = confusion_matrix(y_true=y_new, y_pred=predict) # #Normalize CM confusion_matrix = cm = confusion_matrix.astype( 'float') / confusion_matrix.sum(axis=1)[:, np.newaxis] df_cm = pd.DataFrame(confusion_matrix) # plot confusion matrix fig, ax = plt.subplots() sns.heatmap(df_cm, cmap="coolwarm", annot=False) fig.set_size_inches(8, 6) ax.set_title("Confusion Matrix of RNN, Data: {}".format(filepath)) ax.set_xlabel('Perdicted Label', fontsize=12) ax.set_ylabel('Actual Label', fontsize=12) plt.show() return predict, acc
def main(): start = time.time() #Initialize Utils class and load data U = Utils() n_words, n_categories, Word2Index, df_train, df_val = U.load_data( data_size=config["data_size"], data_dir=config["data_dir"]) #Initialize RNN model: rnn = RNN(n_words, config["n_hidden"], n_categories) #If training from checkpoint, load model state_dict here: if config["train_from_savedmodel"]: save_dir = "saves/save_hn_" + str(config["n_hidden"]) + "_lr_" + str( config["learning_rate"]) saved_model_dir = save_dir + "/saved_model.pth" rnn.load_state_dict(torch.load(saved_model_dir)) #Initialize empty lists to record training moving average (5000) accuracy on training and validation datasets train_scores_right, train_scores_wrong, val_scores_right, val_scores_wrong = [], [], [], [] best_acc_val = 0 #If not training from saved checkpoint, create new save folder if not config["train_from_savedmodel"]: try: os.makedirs("saves/save_hn_" + str(config["n_hidden"]) + "_lr_" + str(config["learning_rate"])) except OSError as e: raise ( "Save directory: save_hn_XXX_lr_x.xxxx_already exists. delete it to start training from scratch" ) with open( "saves/save_hn_" + str(config["n_hidden"]) + "_lr_" + str(config["learning_rate"]) + "/vocab.txt", "wb") as f: pickle.dump(Word2Index, f) #Main training loop for iter in range(config["n_iters"]): category, line, category_tensor, line_tensor = U.randomTrainingExample( df_train) output, rnn = U.train(category_tensor, line_tensor, iter, rnn, config['learning_rate'], config['learning_rate_decay']) #Training iteration, save last 5000 scores to evaluate training accuracy guess, _ = U.categoryFromOutput(output) if guess == category: train_scores_right.append(1) train_scores_wrong.append(0) else: train_scores_wrong.append(1) train_scores_right.append(0) acc_train = round( 100 * sum(train_scores_right) / (sum(train_scores_right) + sum(train_scores_wrong)), 1) #validation evaluation, save last 5000 scores to evaluate validation accuracy category_val, _, category_tensor_val, line_tensor_val = U.randomTrainingExample( df_val) output_val, rnn = U.evaluate(line_tensor_val, rnn) guess_val, _ = U.categoryFromOutput(output_val) if guess_val == category_val: val_scores_right.append(1) val_scores_wrong.append(0) else: val_scores_right.append(0) val_scores_wrong.append(1) acc_val = round( 100 * sum(val_scores_right) / (sum(val_scores_right) + sum(val_scores_wrong)), 1) # Print details and show the example of a training prediction if iter % config["print_every"] == 0 and iter >= 100: correct = '✓' if guess == category else '✗ (%s)' % category print('%d %d%% (%s) | %s / %s %s' % (iter, iter / config["n_iters"] * 100, U.timeSince(start), line, guess, correct)) print('Train Accuracy: ' + str(acc_train) + '% | Validation Accuracy: ' + str(acc_val) + '%') # Save model parameters for best model, minimum val accuracy of 65% allowed before starting to save if iter > 5000 and (acc_val > best_acc_val) and acc_val > 65: #Only start saving after 66+ validation accuracy best_acc_val = acc_val torch.save( rnn.state_dict(), "saves/save_hn_" + str(config["n_hidden"]) + "_lr_" + str(config["learning_rate"]) + "/saved_model.pth") print("New Best Model with validation accuracy = " + str(best_acc_val) + "! Saving model parameters...") #Maintain last 5000 scores of training and validation to calculate accuracy train_scores_right = train_scores_right[-5000:] train_scores_wrong = train_scores_wrong[-5000:] val_scores_right = val_scores_right[-5000:] val_scores_wrong = val_scores_wrong[-5000:]