def translate(): #data = LanguageLoader(en_path, fr_path, vocab_size, max_length) #rnn = RNN(data.input_size, data.output_size) model = RNN(data.input_size, data.output_size) model.load_state_dict(torch.load('models/baseline.module')) vecs = data.sentence_to_vec("Madam president<EOS>") print("in translate-- ",vecs) translation = model.eval(vecs) print("final result ",data.vec_to_sentence(translation))
def load_model(path, hyper, inference=True, dictionary_path=args.dictionary_path, LSTM=False): assert os.path.exists( path), 'directory for model {} could not be found'.format(path) voc_2_index, _, writer = load_dictionaries(dictionary_path) model = RNN(hyper['--embed_size'], hyper['--hidden_size'], len(voc_2_index), hyper['--num_layers'], add_writer=hyper['--writer_codes'], writer_number=len(writer), writer_embed_size=hyper['--writers_embeddings'], add_writer_as_hidden=hyper['--initialise_hidden'], LSTM=LSTM) # lod = torch.load(os.path.join(path,'model.pt')) model.load_state_dict(torch.load(os.path.join(path, 'model.pt'))) if inference: model.eval() return model
def main(args): """ Evaluate SNLI model on MNLI data set """ # Use CUDA use_cuda = args.use_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # Fix random seed torch.manual_seed(args.seed) # Generate token-to-index and index-to-token mapping tok2id, id2tok = data_loader.build_or_load_vocab(args.train, overwrite=False) print("*" * 5) print(args) # Create DataLoader() objects params = { "batch_size": args.batch_size, "collate_fn": data_loader.collate_fn, "shuffle": args.shuffle, "num_workers": args.num_workers, } # train_dataset = data_loader.SNLIDataSet(args.train, tok2id) # train_loader = torch.utils.data.DataLoader(train_dataset, **params) val_dataset = data_loader.SNLIDataSet(args.val, tok2id) val_loader = torch.utils.data.DataLoader(val_dataset, **params) # Initialize model if args.model == "rnn": # RNN model model = RNN( vocab_size=const.MAX_VOCAB_SIZE, # Vocabulary size emb_dim=const.EMB_DIM, # Embedding dimensions hidden_dim=args.hidden_dim, # Hidden dimensions dropout_prob=args.dropout_prob, # Dropout probability padding_idx=const.PAD_IDX, # Padding token index num_classes=const.NUM_CLASSES, # Number of class labels id2tok=id2tok, # Vocabulary ).to(device) # Load model weights from disk model.load_state_dict(torch.load(const.MODELS + "rnn.pt")) model.eval() elif args.model == "cnn": # CNN model model = CNN( vocab_size=const.MAX_VOCAB_SIZE, # Vocabulary size emb_dim=const.EMB_DIM, # Embedding dimensions hidden_dim=args.hidden_dim, # Hidden dimensions kernel_size=args.kernel_size, # Kernel size dropout_prob=args.dropout_prob, # Dropout probability padding_idx=const.PAD_IDX, # Padding token index num_classes=const.NUM_CLASSES, # Number of class labels id2tok=id2tok, # Vocabulary ).to(device) # Load model weights from disk model.load_state_dict(torch.load(const.MODELS + "cnn.pt")) model.eval() else: print("Invalid model specification, exiting") exit() # Criterion criterion = torch.nn.CrossEntropyLoss() # Model parameters params = [p for p in model.parameters() if p.requires_grad] # Inspect correct/incorrect predictions if args.inspect: right, wrong = eval_model(val_loader, model, device, criterion, inspect=True) print("\nValidation premises with correct predictions:\n") for i, item in enumerate(right): text = " ".join([id2tok[idx] for idx in item if idx > 0]) print("#{}\n {}".format(i + 1, text)) print("\nValidation premises with incorrect predictions:\n") for i, item in enumerate(wrong): text = " ".join([id2tok[idx] for idx in item if idx > 0]) print("#{}\n {}".format(i + 1, text)) return # Validation val_acc, _ = eval_model(val_loader, model, device, criterion) print("\n Validation accuracy: {}".format(val_acc)) print("*" * 5 + "\n")
num_layers=num_layers, nonlinearity=nonlinearity, dropout=dropout, bidirectional=bidirectional).to(device) criterion = nn.CrossEntropyLoss().to(device) # 损失函数 optimizer = optim.Adam(model.parameters(), lr=lr) # 优化器 # Training train(model, epoch, trainDataLoader, criterion, optimizer) # testSet及处理 test_sentences = ["i hate me", "you love me"] test_labels = [0, 1] testInput, testTarget = make_data(test_sentences, word2idx, test_labels) testInput = torch.LongTensor(testInput).to(device) testTarget = torch.LongTensor(testTarget).to(device) # 封装成数据集 加载器 testDataSet = Data.TensorDataset(testInput, testTarget) testDataLoader = Data.DataLoader(testDataSet, 2, shuffle=False) # 不打乱 # Predict model = model.eval() for testInput, _ in testDataLoader: num = testInput.shape[0] predict = model(testInput).data.max(1, keepdim=True)[1] # 查一下 .max() 这个函数 for i in range(num): if predict[i][0] == 0: print(test_sentences[i], "is Bad Mean...") else: print(test_sentences[i], "is Good Mean!!")
class TENG: def __init__(self): ############visualize################ self.log_dir = './tf' self.writer = SummaryWriter(self.log_dir) self.input_size = 9 self.output_size = 25 self.hidden_size = 300 self.num_layers = 5 self.learning_rate = 0.01 # 0.1 self.sequence_length = 1 self.batch_size = 100 # 400 self.epochs = 50 self.model = RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, output_size=self.output_size) # print('\nModel Info: ', self.model) ''' (rnn): RNN(9, 25, num_layers=2, batch_first=True, dropout=0.1) (fc): Linear(in_features=25, out_features=25, bias=True) (relu): ReLU() ''' print(self.model.rnn) self.model.to(device) self.loss_function = nn.CrossEntropyLoss() # self.updateParams = optim.SGD( # self.model.parameters(), lr=self.learning_rate, momentum=0.9, weight_decay=5e-4, nesterov=True) self.updateParams = optim.Adam(self.model.parameters(), weight_decay=5e-4, lr=self.learning_rate) self.scheduler = torch.optim.lr_scheduler.MultiStepLR( self.updateParams, milestones=[10, 20, 30], gamma=0.1) #######get the data######## train_datasets, test_datasets = get_data() self.train_loader = torch.utils.data.DataLoader( train_datasets, batch_size=self.batch_size, shuffle=True) self.test_loader = torch.utils.data.DataLoader( test_datasets, batch_size=self.batch_size, shuffle=True) # mini-batch print( '#####Model Initialization is completed and ready for the training process.#####' ) print('\n') time.sleep(0.1) model_file = "better_RNN_model_checkpoint.pth.tar" if os.path.isfile(model_file): print("#############Loading the pre-trained model#############") checkpoint = torch.load(model_file) self.start_epoch = checkpoint['epoch'] self.best_accuracy = checkpoint['best_accuracy'] self.model.load_state_dict(checkpoint['state_dict']) self.updateParams.load_state_dict(checkpoint['optimizer']) self.training_accuracy = checkpoint['training_accuracy'] self.validation_accuracy = checkpoint['validation_accuracy'] self.training_loss = checkpoint['training_loss'] self.validation_loss = checkpoint['validation_loss'] self.time_list = checkpoint['time'] print('\n') print('preivous model accuracy:', self.best_accuracy) print('\n') else: self.start_epoch = 0 self.best_accuracy = 0 self.training_accuracy = [] self.validation_accuracy = [] self.training_loss = [] self.validation_loss = [] self.time_list = [] print('NEW model accuracy:', self.best_accuracy) def train(self): def save_checkpoint(state, better, file='RNN_model_checkpoint.pth.tar'): torch.save(state, file) if better: shutil.copyfile(file, 'better_RNN_model_checkpoint.pth.tar') def training(epochs): step = 0 self.model.train() # initializing the training print("CNN training starts__epoch: {}, LR= {}".format( epochs, self.scheduler.get_lr())) training_loss = 0 total = 0 final_score = 0 self.scheduler.step() # dynamically change the learning rate self.loss = 0 for batch_id, (X_batch, y_batch) in enumerate(self.train_loader): X_batch = X_batch.view(-1, self.sequence_length, self.input_size) X_batch = X_batch.float().to(device) y_batch = y_batch.to(device) y_batch = y_batch.to(device).detach() if X_batch.requires_grad: pass else: print('AutoGrad is OFF!') self.updateParams.zero_grad( ) # zero gradient before the backward result = self.model(X_batch) batch_loss = self.loss_function(result, y_batch) # wihout .item(),in gpu model, not enough memory training_loss += batch_loss.item() batch_loss.backward() self.updateParams.step( ) # performs a parameter update based on the current gradient _, predict = torch.max((result), 1) # dim=1->each row final_score += predict.eq(y_batch).cpu().sum().type( torch.DoubleTensor).item() # check the gradient # print('ID', batch_id) # print('after back prop--parameter: ', list(self.model.parameters()) # [0].grad) # the gradient is so very small training_loss_mean = training_loss / \ (len(self.train_loader.dataset)/(self.batch_size)) training_accuracy = 100*final_score / \ (len(self.train_loader.dataset)) print("Training-epoch-{}-training_loss_mean: {:.4f}".format( epochs, training_loss_mean)) print("Training-epoch-{}-training_accuracy: {:.4f}%".format( epochs, training_accuracy)) # self.writer.add_image('Output', vutils.make_grid(output.data, normalize=True, scale_each=True), niter) return (training_loss_mean, training_accuracy) def validation(epochs): self.model.eval() validation_loss = 0 total = 0 final_score = 0 with torch.no_grad( ): # temporarily set all the requires_grad flag to false for batch_id, (test_data, target_test) in enumerate(self.test_loader): test_data = test_data.view(-1, self.sequence_length, self.input_size) test_data = test_data.float().to(device) target_test = target_test.to(device) result = self.model(test_data) batch_loss = self.loss_function(result, target_test) validation_loss += batch_loss _, predict = torch.max((result), 1) # dim=1->each row final_score += predict.eq(target_test).cpu().sum().type( torch.DoubleTensor).item() validation_loss_mean = validation_loss / \ (len(self.test_loader.dataset)/(self.batch_size)) validation_accuracy = 100*final_score / \ (len(self.test_loader.dataset)) print("Validation-epoch-{}-Validation_loss_mean: {:.4f}".format( epochs, validation_loss_mean)) print('Validation Accuracy: {:.4f}%'.format(validation_accuracy)) self.model_accuracy_cur_epoch = validation_accuracy return (validation_loss_mean, validation_accuracy) if __name__ == "__main__": print("######CIFAR100 Training-Validation Starts######") epoch_iter = range(1, self.epochs) self.model_accuracy_cur_epoch = 0 if self.start_epoch == self.epochs: pass else: for i in range(self.start_epoch + 1, self.epochs): time_begin = time.time() training_result = training(i) self.training_loss.append(training_result[0]) self.training_accuracy.append(training_result[1]) vali_result = validation(i) self.validation_loss.append(vali_result[0]) self.validation_accuracy.append(vali_result[1]) time_end = time.time() - time_begin self.time_list.append(time_end) progress = float(i * 100 // len(epoch_iter)) print('Progress: {:.4f}%'.format(progress)) print('\n') ####################################### # Tensorboard Visualization niter = i # tensorboard --logdir=tf --port 6066 self.writer.add_scalars( 'Loss Curve', { 'Training Loss': training_result[0], 'Validation Loss': vali_result[0] }, niter) # attention->add_scalarS self.writer.add_scalars( 'Accuracy Curve', { 'Training Accuracy': training_result[1], 'Validation Accuracy': vali_result[1] }, niter) ####################################### better = self.model_accuracy_cur_epoch > self.best_accuracy self.best_accuracy = max(self.best_accuracy, self.model_accuracy_cur_epoch) # if better: # torch.save(self.model.state_dict(), 'CNN_MODEL.pt') save_checkpoint( { 'epoch': i, 'best_accuracy': self.best_accuracy, 'state_dict': self.model.state_dict(), 'optimizer': self.updateParams.state_dict(), 'training_loss': self.training_loss, 'training_accuracy': self.training_accuracy, 'validation_loss': self.validation_loss, 'validation_accuracy': self.validation_accuracy, 'time': self.time_list, }, better - 1) print( 'Model Updated, proceeding to next epoch, best accuracy= {}' .format(self.best_accuracy)) # save the model after training torch.save(self.model.state_dict(), 'CNN_MODEL.pt') # ploting # loss function plt.figure(1) sns.set_style('whitegrid') plt.plot(epoch_iter, self.training_loss, color='red', linestyle='solid', linewidth='3.0', marker='p', markerfacecolor='red', markersize='10', label='Training Loss') plt.plot(epoch_iter, self.validation_loss, color='green', linestyle='solid', linewidth='3.0', marker='o', markerfacecolor='green', markersize='10', label='Validation Loss') plt.ylabel('Loss', fontsize=18) plt.xlabel('Epochs', fontsize=18) title = "RNN Result-loss" plt.title(title, fontsize=12) plt.legend(fontsize=14) plt.grid(True) plt.show() # Training accuracy plt.figure(2) sns.set_style('whitegrid') plt.plot(epoch_iter, self.training_accuracy, color='blue', linestyle='solid', linewidth='3.0', marker='s', markerfacecolor='blue', markersize='10', label='Training Accuracy') plt.plot(epoch_iter, self.validation_accuracy, color='green', linestyle='solid', linewidth='3.0', marker='s', markerfacecolor='green', markersize='10', label='Validation Accuracy') title = "RNN Result-accuracy" plt.title(title, fontsize=12) plt.xlabel('Epochs', fontsize=18) plt.title("Model Accuracy", fontsize=14) plt.legend(fontsize=14) plt.show() plt.figure(3) sns.set_style('whitegrid') plt.plot(epoch_iter, self.time_list, color='blue', linestyle='solid', linewidth='3.0', marker='s', markerfacecolor='blue', markersize='10', label='Validation Loss') plt.ylabel('Time (s)', fontsize=18) plt.xlabel('Epochs', fontsize=18) plt.title("Speed", fontsize=14) plt.legend(fontsize=14) plt.show() def forward_EM(self, filepath, target): # data processing df = pd.read_csv(filepath, header=None) # no column names!!! df_x = df.iloc[:, :9] df_x = df_x.div(df_x.sum(axis=1), axis=0) # normalize X = df_x X_scaling = StandardScaler().fit_transform(X) # numpy.array input_data = torch.tensor(X_scaling, requires_grad=True) input_data = input_data.view(-1, self.sequence_length, self.input_size) y_new = df.iloc[:, -1] y_new -= 1 input_data = input_data.float().to(device) ############## self.model.eval() result = self.model(input_data) _, predict = torch.max(result, 1) predict = predict.cpu() i = 0 for elem in predict: if elem == target: i += 1 # for i in range(len(predict)): # # print(predict) # if predict[i] == y_new[i]: # count += 1 acc = float(i / len(predict)) # print('Accuracy: {}%'.format(acc*100)) # from sklearn.metrics import confusion_matrix # confusion_matrix = confusion_matrix( # y_true=y_new, y_pred=predict) # # #Normalize CM # confusion_matrix = cm = confusion_matrix.astype( # 'float') / confusion_matrix.sum(axis=1)[:, np.newaxis] # df_cm = pd.DataFrame(confusion_matrix) # # plot confusion matrix # fig, ax = plt.subplots() # sns.heatmap(df_cm, cmap="coolwarm", annot=False) # fig.set_size_inches(8, 6) # ax.set_title("Confusion Matrix of RNN, Data: {}".format(filepath)) # ax.set_xlabel('Perdicted Label', fontsize=12) # ax.set_ylabel('Actual Label', fontsize=12) # plt.show() return predict, acc def forward_ni(self, filepath): # data processing df = pd.read_csv(filepath, header=None) # no column names!!! df_x = df.iloc[:, :9] df_x = df_x.div(df_x.sum(axis=1), axis=0) # normalize X = df_x X_scaling = StandardScaler().fit_transform(X) # numpy.array input_data = torch.tensor(X_scaling, requires_grad=True) input_data = input_data.view(-1, self.sequence_length, self.input_size) y_new = df.iloc[:, -1] input_data = input_data.float().to(device) ############## self.model.eval() result = self.model(input_data) _, predict = torch.max(result, 1) predict = predict.cpu() predict = predict.numpy() i = 0 print(predict) print(y_new.head(10)) count = 0 for i in range(len(predict)): # print(predict) if predict[i] == y_new[i]: count += 1 acc = float(count / len(predict)) # print('Accuracy: {}%'.format(acc*100)) from sklearn.metrics import confusion_matrix confusion_matrix = confusion_matrix(y_true=y_new, y_pred=predict) # #Normalize CM confusion_matrix = cm = confusion_matrix.astype( 'float') / confusion_matrix.sum(axis=1)[:, np.newaxis] df_cm = pd.DataFrame(confusion_matrix) # plot confusion matrix fig, ax = plt.subplots() sns.heatmap(df_cm, cmap="coolwarm", annot=False) fig.set_size_inches(8, 6) ax.set_title("Confusion Matrix of RNN, Data: {}".format(filepath)) ax.set_xlabel('Perdicted Label', fontsize=12) ax.set_ylabel('Actual Label', fontsize=12) plt.show() return predict, acc