shuffle=True) test_loader = torch.utils.data.DataLoader(DataUtils.ECGDataset(train_path, test_path, test=True), batch_size=args.test_batch_size, drop_last=True, shuffle=True) #model = LSTM(28*28, 64, 10) MNIST dataset #model = LSTM(140, 64, 5) #model = FC(28 * 28, 300, 100, 10) #model = TTRNN([4,7,4,7], [4,2,4,4], [1,3,4,2,1], 1, 0.8, 'ttgru') #model = RNN([2,5,2,7], [4,4,2,4], [1,2,5,3,1], 0.8, 5) model = RNN([1, 5, 2, 1], [2, 2, 2, 2], [1, 2, 2, 2, 1], 0.8, 5) if args.cuda: model.cuda() optimizer = TorchOptim.Adam(model.parameters(), lr=args.lr) def train(epoch): #model.train() for step, data in enumerate(train_loader): train = data[0] target = data[1].type(torch.LongTensor) sequence_length = data[2] / args.feature_size if args.cuda: data, target = train.cuda(), target.cuda() #data, target = TorchAutograd.Variable(data), TorchAutograd.Variable(target) output = model(data.view(args.train_batch_size, -1, args.feature_size).float(), lengths=sequence_length) optimizer.zero_grad()
def main(args): """ Main function """ # Use CUDA use_cuda = args.use_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # Fix random seed torch.manual_seed(args.seed) # Generate token-to-index and index-to-token mapping tok2id, id2tok = data_loader.build_or_load_vocab(args.train, overwrite=True) print("*" * 5) print(args) # Create DataLoader() objects params = { "batch_size": args.batch_size, "collate_fn": data_loader.collate_fn, "shuffle": args.shuffle, "num_workers": args.num_workers, } train_dataset = data_loader.SNLIDataSet(args.train, tok2id) train_loader = torch.utils.data.DataLoader(train_dataset, **params) val_dataset = data_loader.SNLIDataSet(args.val, tok2id) val_loader = torch.utils.data.DataLoader(val_dataset, **params) # Initialize model if args.model == "rnn": # RNN model model = RNN( vocab_size=const.MAX_VOCAB_SIZE, # Vocabulary size emb_dim=const.EMB_DIM, # Embedding dimensions hidden_dim=args.hidden_dim, # Hidden dimensions dropout_prob=args.dropout_prob, # Dropout probability padding_idx=const.PAD_IDX, # Padding token index num_classes=const.NUM_CLASSES, # Number of class labels id2tok=id2tok, # Vocabulary ).to(device) elif args.model == "cnn": # CNN model model = CNN( vocab_size=const.MAX_VOCAB_SIZE, # Vocabulary size emb_dim=const.EMB_DIM, # Embedding dimensions hidden_dim=args.hidden_dim, # Hidden dimensions kernel_size=args.kernel_size, # Kernel size dropout_prob=args.dropout_prob, # Dropout probability padding_idx=const.PAD_IDX, # Padding token index num_classes=const.NUM_CLASSES, # Number of class labels id2tok=id2tok, # Vocabulary ).to(device) else: print("Invalid model specification, exiting") exit() # Criterion criterion = torch.nn.CrossEntropyLoss() # Model parameters params = [p for p in model.parameters() if p.requires_grad] global num_params num_params = sum([np.prod(p.size()) for p in params]) # Optimizer optimizer = torch.optim.Adam(params, lr=args.lr) # Logging global logging logging = { "train_accs": [], "train_loss": [], "val_accs": [], "val_loss": [], "num_params": int(num_params), } # Main training loop for epoch in range(1, args.epochs + 1): # Log epoch print("\n{} epoch: {} {}".format("=" * 20, epoch, "=" * 20)) # Train model train(args, model, device, train_loader, val_loader, optimizer, criterion, epoch) print("*" * 5 + "\n")
def main(args): """ Evaluate SNLI model on MNLI data set """ # Use CUDA use_cuda = args.use_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # Fix random seed torch.manual_seed(args.seed) # Generate token-to-index and index-to-token mapping tok2id, id2tok = data_loader.build_or_load_vocab(args.train, overwrite=False) print("*" * 5) print(args) # Create DataLoader() objects params = { "batch_size": args.batch_size, "collate_fn": data_loader.collate_fn, "shuffle": args.shuffle, "num_workers": args.num_workers, } # train_dataset = data_loader.SNLIDataSet(args.train, tok2id) # train_loader = torch.utils.data.DataLoader(train_dataset, **params) val_dataset = data_loader.SNLIDataSet(args.val, tok2id) val_loader = torch.utils.data.DataLoader(val_dataset, **params) # Initialize model if args.model == "rnn": # RNN model model = RNN( vocab_size=const.MAX_VOCAB_SIZE, # Vocabulary size emb_dim=const.EMB_DIM, # Embedding dimensions hidden_dim=args.hidden_dim, # Hidden dimensions dropout_prob=args.dropout_prob, # Dropout probability padding_idx=const.PAD_IDX, # Padding token index num_classes=const.NUM_CLASSES, # Number of class labels id2tok=id2tok, # Vocabulary ).to(device) # Load model weights from disk model.load_state_dict(torch.load(const.MODELS + "rnn.pt")) model.eval() elif args.model == "cnn": # CNN model model = CNN( vocab_size=const.MAX_VOCAB_SIZE, # Vocabulary size emb_dim=const.EMB_DIM, # Embedding dimensions hidden_dim=args.hidden_dim, # Hidden dimensions kernel_size=args.kernel_size, # Kernel size dropout_prob=args.dropout_prob, # Dropout probability padding_idx=const.PAD_IDX, # Padding token index num_classes=const.NUM_CLASSES, # Number of class labels id2tok=id2tok, # Vocabulary ).to(device) # Load model weights from disk model.load_state_dict(torch.load(const.MODELS + "cnn.pt")) model.eval() else: print("Invalid model specification, exiting") exit() # Criterion criterion = torch.nn.CrossEntropyLoss() # Model parameters params = [p for p in model.parameters() if p.requires_grad] # Inspect correct/incorrect predictions if args.inspect: right, wrong = eval_model(val_loader, model, device, criterion, inspect=True) print("\nValidation premises with correct predictions:\n") for i, item in enumerate(right): text = " ".join([id2tok[idx] for idx in item if idx > 0]) print("#{}\n {}".format(i + 1, text)) print("\nValidation premises with incorrect predictions:\n") for i, item in enumerate(wrong): text = " ".join([id2tok[idx] for idx in item if idx > 0]) print("#{}\n {}".format(i + 1, text)) return # Validation val_acc, _ = eval_model(val_loader, model, device, criterion) print("\n Validation accuracy: {}".format(val_acc)) print("*" * 5 + "\n")
trainInput, trainTarget = torch.LongTensor(trainInput).to( device), torch.LongTensor(trainTarget).to(device) trainDataSet = Data.TensorDataset(trainInput, trainTarget) trainDataLoader = Data.DataLoader(trainDataSet, batch_size, shuffle=True) # 打乱 # RNN model = RNN(vocab_size, emb_size, hidden_size, num_classes, num_layers=num_layers, nonlinearity=nonlinearity, dropout=dropout, bidirectional=bidirectional).to(device) criterion = nn.CrossEntropyLoss().to(device) # 损失函数 optimizer = optim.Adam(model.parameters(), lr=lr) # 优化器 # Training train(model, epoch, trainDataLoader, criterion, optimizer) # testSet及处理 test_sentences = ["i hate me", "you love me"] test_labels = [0, 1] testInput, testTarget = make_data(test_sentences, word2idx, test_labels) testInput = torch.LongTensor(testInput).to(device) testTarget = torch.LongTensor(testTarget).to(device) # 封装成数据集 加载器 testDataSet = Data.TensorDataset(testInput, testTarget) testDataLoader = Data.DataLoader(testDataSet, 2, shuffle=False) # 不打乱
ans2_pros.append(output2.data[0][0]) if sorted(ans1_pros, reverse=True)[0] > sorted( ans2_pros, reverse=True)[0]: predicty[index][0] = 1 predicty[index + 1][0] = 0 else: predicty[index][0] = 0 predicty[index + 1][0] = 1 index += 2 print("changed num", count) return predicty # train rnn = RNN(100, 128, len(vocab)) optimizer = optim.SGD(rnn.parameters(), lr=0.1) loss_function = nn.BCELoss() losses, acc = rnn_train(data.trainset, rnn, optimizer, loss_function, data.testset) # plt.xlabel("Train epoch") # plt.ylabel("loss") # plt.plot([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], losses) # plt.show() plt.xlabel("Train epoch") plt.ylabel("accuracy") plt.plot([1, 2, 3, 4, 5, 6], acc) plt.show() # test y, predicty = rnn_test(data.testset, rnn) eval = Evaluation() eval.accuracy(y, predicty, data)
#set up CNN and RNN cnn = CNN(embedding_size=embedding_size) rnn = RNN(embedding_dim=embedding_size, hidden_dim=hidden_size, vocab_size=vocab.index) #running with CUDA if torch.cuda.is_available(): with torch.cuda.device(gpu_device): cnn.cuda() rnn.cuda() # set up loss function and optimizer criterion = nn.CrossEntropyLoss() params = list(cnn.linear.parameters()) + list(rnn.parameters()) optimizer = torch.optim.Adam(params, lr=learning_rate) losses, iter = [], [] n = 0 # training print('start training') for epoch in range(epochs): #img_sf, cap_sf = shuffle_data(data, seed= epoch) img_sf, cap_sf = data.shuffle(seed=epoch) cap_len = len(cap_sf) loss_tot = [] tic = time.time() for i in range(cap_len): img_id = img_sf[i] image = data.get_img(img_id)
class TENG: def __init__(self): ############visualize################ self.log_dir = './tf' self.writer = SummaryWriter(self.log_dir) self.input_size = 9 self.output_size = 25 self.hidden_size = 300 self.num_layers = 5 self.learning_rate = 0.01 # 0.1 self.sequence_length = 1 self.batch_size = 100 # 400 self.epochs = 50 self.model = RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, output_size=self.output_size) # print('\nModel Info: ', self.model) ''' (rnn): RNN(9, 25, num_layers=2, batch_first=True, dropout=0.1) (fc): Linear(in_features=25, out_features=25, bias=True) (relu): ReLU() ''' print(self.model.rnn) self.model.to(device) self.loss_function = nn.CrossEntropyLoss() # self.updateParams = optim.SGD( # self.model.parameters(), lr=self.learning_rate, momentum=0.9, weight_decay=5e-4, nesterov=True) self.updateParams = optim.Adam(self.model.parameters(), weight_decay=5e-4, lr=self.learning_rate) self.scheduler = torch.optim.lr_scheduler.MultiStepLR( self.updateParams, milestones=[10, 20, 30], gamma=0.1) #######get the data######## train_datasets, test_datasets = get_data() self.train_loader = torch.utils.data.DataLoader( train_datasets, batch_size=self.batch_size, shuffle=True) self.test_loader = torch.utils.data.DataLoader( test_datasets, batch_size=self.batch_size, shuffle=True) # mini-batch print( '#####Model Initialization is completed and ready for the training process.#####' ) print('\n') time.sleep(0.1) model_file = "better_RNN_model_checkpoint.pth.tar" if os.path.isfile(model_file): print("#############Loading the pre-trained model#############") checkpoint = torch.load(model_file) self.start_epoch = checkpoint['epoch'] self.best_accuracy = checkpoint['best_accuracy'] self.model.load_state_dict(checkpoint['state_dict']) self.updateParams.load_state_dict(checkpoint['optimizer']) self.training_accuracy = checkpoint['training_accuracy'] self.validation_accuracy = checkpoint['validation_accuracy'] self.training_loss = checkpoint['training_loss'] self.validation_loss = checkpoint['validation_loss'] self.time_list = checkpoint['time'] print('\n') print('preivous model accuracy:', self.best_accuracy) print('\n') else: self.start_epoch = 0 self.best_accuracy = 0 self.training_accuracy = [] self.validation_accuracy = [] self.training_loss = [] self.validation_loss = [] self.time_list = [] print('NEW model accuracy:', self.best_accuracy) def train(self): def save_checkpoint(state, better, file='RNN_model_checkpoint.pth.tar'): torch.save(state, file) if better: shutil.copyfile(file, 'better_RNN_model_checkpoint.pth.tar') def training(epochs): step = 0 self.model.train() # initializing the training print("CNN training starts__epoch: {}, LR= {}".format( epochs, self.scheduler.get_lr())) training_loss = 0 total = 0 final_score = 0 self.scheduler.step() # dynamically change the learning rate self.loss = 0 for batch_id, (X_batch, y_batch) in enumerate(self.train_loader): X_batch = X_batch.view(-1, self.sequence_length, self.input_size) X_batch = X_batch.float().to(device) y_batch = y_batch.to(device) y_batch = y_batch.to(device).detach() if X_batch.requires_grad: pass else: print('AutoGrad is OFF!') self.updateParams.zero_grad( ) # zero gradient before the backward result = self.model(X_batch) batch_loss = self.loss_function(result, y_batch) # wihout .item(),in gpu model, not enough memory training_loss += batch_loss.item() batch_loss.backward() self.updateParams.step( ) # performs a parameter update based on the current gradient _, predict = torch.max((result), 1) # dim=1->each row final_score += predict.eq(y_batch).cpu().sum().type( torch.DoubleTensor).item() # check the gradient # print('ID', batch_id) # print('after back prop--parameter: ', list(self.model.parameters()) # [0].grad) # the gradient is so very small training_loss_mean = training_loss / \ (len(self.train_loader.dataset)/(self.batch_size)) training_accuracy = 100*final_score / \ (len(self.train_loader.dataset)) print("Training-epoch-{}-training_loss_mean: {:.4f}".format( epochs, training_loss_mean)) print("Training-epoch-{}-training_accuracy: {:.4f}%".format( epochs, training_accuracy)) # self.writer.add_image('Output', vutils.make_grid(output.data, normalize=True, scale_each=True), niter) return (training_loss_mean, training_accuracy) def validation(epochs): self.model.eval() validation_loss = 0 total = 0 final_score = 0 with torch.no_grad( ): # temporarily set all the requires_grad flag to false for batch_id, (test_data, target_test) in enumerate(self.test_loader): test_data = test_data.view(-1, self.sequence_length, self.input_size) test_data = test_data.float().to(device) target_test = target_test.to(device) result = self.model(test_data) batch_loss = self.loss_function(result, target_test) validation_loss += batch_loss _, predict = torch.max((result), 1) # dim=1->each row final_score += predict.eq(target_test).cpu().sum().type( torch.DoubleTensor).item() validation_loss_mean = validation_loss / \ (len(self.test_loader.dataset)/(self.batch_size)) validation_accuracy = 100*final_score / \ (len(self.test_loader.dataset)) print("Validation-epoch-{}-Validation_loss_mean: {:.4f}".format( epochs, validation_loss_mean)) print('Validation Accuracy: {:.4f}%'.format(validation_accuracy)) self.model_accuracy_cur_epoch = validation_accuracy return (validation_loss_mean, validation_accuracy) if __name__ == "__main__": print("######CIFAR100 Training-Validation Starts######") epoch_iter = range(1, self.epochs) self.model_accuracy_cur_epoch = 0 if self.start_epoch == self.epochs: pass else: for i in range(self.start_epoch + 1, self.epochs): time_begin = time.time() training_result = training(i) self.training_loss.append(training_result[0]) self.training_accuracy.append(training_result[1]) vali_result = validation(i) self.validation_loss.append(vali_result[0]) self.validation_accuracy.append(vali_result[1]) time_end = time.time() - time_begin self.time_list.append(time_end) progress = float(i * 100 // len(epoch_iter)) print('Progress: {:.4f}%'.format(progress)) print('\n') ####################################### # Tensorboard Visualization niter = i # tensorboard --logdir=tf --port 6066 self.writer.add_scalars( 'Loss Curve', { 'Training Loss': training_result[0], 'Validation Loss': vali_result[0] }, niter) # attention->add_scalarS self.writer.add_scalars( 'Accuracy Curve', { 'Training Accuracy': training_result[1], 'Validation Accuracy': vali_result[1] }, niter) ####################################### better = self.model_accuracy_cur_epoch > self.best_accuracy self.best_accuracy = max(self.best_accuracy, self.model_accuracy_cur_epoch) # if better: # torch.save(self.model.state_dict(), 'CNN_MODEL.pt') save_checkpoint( { 'epoch': i, 'best_accuracy': self.best_accuracy, 'state_dict': self.model.state_dict(), 'optimizer': self.updateParams.state_dict(), 'training_loss': self.training_loss, 'training_accuracy': self.training_accuracy, 'validation_loss': self.validation_loss, 'validation_accuracy': self.validation_accuracy, 'time': self.time_list, }, better - 1) print( 'Model Updated, proceeding to next epoch, best accuracy= {}' .format(self.best_accuracy)) # save the model after training torch.save(self.model.state_dict(), 'CNN_MODEL.pt') # ploting # loss function plt.figure(1) sns.set_style('whitegrid') plt.plot(epoch_iter, self.training_loss, color='red', linestyle='solid', linewidth='3.0', marker='p', markerfacecolor='red', markersize='10', label='Training Loss') plt.plot(epoch_iter, self.validation_loss, color='green', linestyle='solid', linewidth='3.0', marker='o', markerfacecolor='green', markersize='10', label='Validation Loss') plt.ylabel('Loss', fontsize=18) plt.xlabel('Epochs', fontsize=18) title = "RNN Result-loss" plt.title(title, fontsize=12) plt.legend(fontsize=14) plt.grid(True) plt.show() # Training accuracy plt.figure(2) sns.set_style('whitegrid') plt.plot(epoch_iter, self.training_accuracy, color='blue', linestyle='solid', linewidth='3.0', marker='s', markerfacecolor='blue', markersize='10', label='Training Accuracy') plt.plot(epoch_iter, self.validation_accuracy, color='green', linestyle='solid', linewidth='3.0', marker='s', markerfacecolor='green', markersize='10', label='Validation Accuracy') title = "RNN Result-accuracy" plt.title(title, fontsize=12) plt.xlabel('Epochs', fontsize=18) plt.title("Model Accuracy", fontsize=14) plt.legend(fontsize=14) plt.show() plt.figure(3) sns.set_style('whitegrid') plt.plot(epoch_iter, self.time_list, color='blue', linestyle='solid', linewidth='3.0', marker='s', markerfacecolor='blue', markersize='10', label='Validation Loss') plt.ylabel('Time (s)', fontsize=18) plt.xlabel('Epochs', fontsize=18) plt.title("Speed", fontsize=14) plt.legend(fontsize=14) plt.show() def forward_EM(self, filepath, target): # data processing df = pd.read_csv(filepath, header=None) # no column names!!! df_x = df.iloc[:, :9] df_x = df_x.div(df_x.sum(axis=1), axis=0) # normalize X = df_x X_scaling = StandardScaler().fit_transform(X) # numpy.array input_data = torch.tensor(X_scaling, requires_grad=True) input_data = input_data.view(-1, self.sequence_length, self.input_size) y_new = df.iloc[:, -1] y_new -= 1 input_data = input_data.float().to(device) ############## self.model.eval() result = self.model(input_data) _, predict = torch.max(result, 1) predict = predict.cpu() i = 0 for elem in predict: if elem == target: i += 1 # for i in range(len(predict)): # # print(predict) # if predict[i] == y_new[i]: # count += 1 acc = float(i / len(predict)) # print('Accuracy: {}%'.format(acc*100)) # from sklearn.metrics import confusion_matrix # confusion_matrix = confusion_matrix( # y_true=y_new, y_pred=predict) # # #Normalize CM # confusion_matrix = cm = confusion_matrix.astype( # 'float') / confusion_matrix.sum(axis=1)[:, np.newaxis] # df_cm = pd.DataFrame(confusion_matrix) # # plot confusion matrix # fig, ax = plt.subplots() # sns.heatmap(df_cm, cmap="coolwarm", annot=False) # fig.set_size_inches(8, 6) # ax.set_title("Confusion Matrix of RNN, Data: {}".format(filepath)) # ax.set_xlabel('Perdicted Label', fontsize=12) # ax.set_ylabel('Actual Label', fontsize=12) # plt.show() return predict, acc def forward_ni(self, filepath): # data processing df = pd.read_csv(filepath, header=None) # no column names!!! df_x = df.iloc[:, :9] df_x = df_x.div(df_x.sum(axis=1), axis=0) # normalize X = df_x X_scaling = StandardScaler().fit_transform(X) # numpy.array input_data = torch.tensor(X_scaling, requires_grad=True) input_data = input_data.view(-1, self.sequence_length, self.input_size) y_new = df.iloc[:, -1] input_data = input_data.float().to(device) ############## self.model.eval() result = self.model(input_data) _, predict = torch.max(result, 1) predict = predict.cpu() predict = predict.numpy() i = 0 print(predict) print(y_new.head(10)) count = 0 for i in range(len(predict)): # print(predict) if predict[i] == y_new[i]: count += 1 acc = float(count / len(predict)) # print('Accuracy: {}%'.format(acc*100)) from sklearn.metrics import confusion_matrix confusion_matrix = confusion_matrix(y_true=y_new, y_pred=predict) # #Normalize CM confusion_matrix = cm = confusion_matrix.astype( 'float') / confusion_matrix.sum(axis=1)[:, np.newaxis] df_cm = pd.DataFrame(confusion_matrix) # plot confusion matrix fig, ax = plt.subplots() sns.heatmap(df_cm, cmap="coolwarm", annot=False) fig.set_size_inches(8, 6) ax.set_title("Confusion Matrix of RNN, Data: {}".format(filepath)) ax.set_xlabel('Perdicted Label', fontsize=12) ax.set_ylabel('Actual Label', fontsize=12) plt.show() return predict, acc
class Trainer: def __init__(self, TRAIN_CONFIGS, GRU_CONFIGS, FFN_CONFIGS=None): self.TRAIN_CONFIGS = TRAIN_CONFIGS self.GRU_CONFIGS = self._process_gru_configs(GRU_CONFIGS) self.model = RNN(target=TRAIN_CONFIGS['target'], **self.GRU_CONFIGS, FFN_CONFIGS=FFN_CONFIGS) self.epochs_trained = 0 self.trained = False # Storage for later self.loss = self.val_loss = self.train_y_hat = self.train_y_true = self.val_y_hat = self.val_y_true = None def _process_gru_configs(self, GRU_CONFIGS): lti = self._load_data_source HIDDEN_SIZE = lti.A.shape[-1] INPUT_SIZE = 1 if lti.B is None else lti.U.shape[-1] GRU_IMPLIED_CONFIGS = { "hidden_size": lti.A.shape[-1], "input_size": 1 if lti.B is None else lti.U.shape[-1] } GRU_CONFIGS.update(GRU_IMPLIED_CONFIGS) return GRU_CONFIGS @property def _load_data_source(self): data_dir = self.TRAIN_CONFIGS.get("data_dir") lti_file = self.TRAIN_CONFIGS.get("lti_file") with open(path.join(data_dir, lti_file), "rb") as f: lti = pickle.load(f) return lti @property def _load_train_data(self): def unsqueeze(*args): return (_unsqueeze(M) for M in args) def _unsqueeze(M): if not M is None: M = M.unsqueeze(-2) return M lti = self._load_data_source Y, H, X, h0 = lti.torch _Y, _H, _X = unsqueeze(Y, H, X) _h0 = None if self.TRAIN_CONFIGS.get( "init_h") == False else h0.reshape(self.GRU_CONFIGS["num_layers"], 1, self.GRU_CONFIGS["hidden_size"]) return _Y, _H, _X, _h0 @property def fit(self): if self.trained == False: # get configs (for readability) nEpochs = self.TRAIN_CONFIGS['epochs'] train_steps = self.TRAIN_CONFIGS['train_steps'] init_h = self.TRAIN_CONFIGS['init_h'] base = self.TRAIN_CONFIGS['base'] # load data Y, H, X, h0 = tensor_to_cuda(*self._load_train_data) # split data if self.TRAIN_CONFIGS['target'] == 'states': y_train, y_val = H[:train_steps], H[train_steps:] elif self.TRAIN_CONFIGS['target'] == 'outputs': y_train, y_val = Y[:train_steps], Y[train_steps:] x_train, x_val = X[:train_steps], X[train_steps:] # prep model and optimizers self.model.cuda() optimizer = optim.Adam(self.model.parameters(), lr=1e-3) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2) # trian loss = [None] * nEpochs val_loss = [None] * nEpochs pbar = tqdm(total=nEpochs, leave=False) for i in range(nEpochs): # reset gradient optimizer.zero_grad() # generate prediction y_hat, h_plus1 = self.model( x_train) if not init_h else self.model(x_train, h0) y_hat = y_hat.squeeze() # calculate loss l = loss_func(y_hat, y_train, base=self.TRAIN_CONFIGS['base'], epoch=i) loss[i] = l.item() # learn from loss l.backward() optimizer.step() scheduler.step(l.item()) # validate with torch.no_grad(): val_y_hat, _ = self.model( x_val) if not init_h else self.model(x_val, h_plus1) val_y_hat = val_y_hat.squeeze() l = loss_func(val_y_hat, y_val, base=self.TRAIN_CONFIGS['base'], epoch=i) val_loss[i] = l.item() # decorator pbar.set_description( f"Loss={loss[i]:.3f}. Val={val_loss[i]:.3f}") pbar.update(1) pbar.close() self.epochs_trained += nEpochs self.loss, self.val_loss = loss, val_loss self.train_y_hat, self.train_y_true = y_hat.detach().cpu().squeeze( ), y_train.detach().cpu().squeeze() self.val_y_hat, self.val_y_true = val_y_hat.detach().cpu().squeeze( ), y_val.detach().cpu().squeeze() self.trained = True else: # this shouldn't ever be reached. It's a safety. raise ValueError("Model has already been trained.") return (self.loss, self.val_loss), \ (self.train_y_hat, self.train_y_true), \ (self.val_y_hat, self.val_y_true) def pickle_save(self, trial_num): p = Trainer._pickle_path(self.TRAIN_CONFIGS, trial_num) with open(p, "wb") as f: pickle.dump(self, f) # TODO remove # def _gen_relative_graphs(self, hat, true, dimH, val_begins, trial_num=0, fname_prefix=None, freq=10): # Trainer.gen_relative_graphs(hat, true, dimH, val_begins, trial_num, self.TRAIN_CONFIGS.get("fig_dir"), fname_prefix, freq) @staticmethod def pickled_exists(TRAIN_CONFIGS, trial_num): p = Trainer._pickle_path(TRAIN_CONFIGS, trial_num) return path.exists(p) @staticmethod def _pickle_path(TRAIN_CONFIGS, trial_num): name = Trainer.model_name(TRAIN_CONFIGS, trial_num) if not np.char.endswith(name, ".pickle"): name += ".pickle" model_dir = TRAIN_CONFIGS.get("model_dir") return path.join(model_dir, name) @staticmethod def _gen_relative_graphs(hat, true, dimOut, val_begins, trial_num, isState, fig_dir=None, fname_prefix=None, freq=10, pause=False): val_ends = hat.shape[0] palette = { "H1": "C0", "H2": "C1", "H3": "C2", "Y1": "C0", "Y2": "C1", "Y3": "C2" } for _base in range(dimOut): _dif = rel_space_dif(hat, true, _base) df = pd.DataFrame(_dif) df = df.drop(_base, axis=1) _pre = "H" if isState else "Y" df.columns = _pre + (df.columns + 1).astype(str) df.columns.name = "Hidden States" if isState else "Output Indices" df.index.name = "Itteration" df = df.stack() df.name = "Error" df = df.reset_index() _df = df[df['Itteration'] % freq == 0] plt.axhline(0, color="k", alpha=0.5) _hue = "Hidden States" if isState else "Output Indices" sns.lineplot(data=_df, x="Itteration", y="Error", hue=_hue, alpha=1, palette=palette) plt.title(f"Relative Difference (Base: {_pre}{_base+1})") plt.axvspan(val_begins, val_ends, facecolor="0.1", alpha=0.25) if not fname_prefix is None and not fig_dir is None: fname = fname_prefix + f"-relgraph-{_pre}{_base+1}-trial{trial_num}" f = path.join(fig_dir, fname) plt.savefig(path.join(fig_dir, fname)) else: print(f"fname_prefix='{fname_prefix}'; fig_dir='{fig_dir}'") if pause: plt.show() else: plt.show(block=False) plt.clf() @staticmethod def model_name(TRAIN_CONFIGS, trial_num): fprefix = TRAIN_CONFIGS.get("lti_file").split(".")[0] name = fprefix + f"-trial{trial_num}" return name @staticmethod def load_trained(TRAIN_CONFIGS, trial_num): model_dir = TRAIN_CONFIGS.get("model_dir") name = Trainer.model_name(TRAIN_CONFIGS, trial_num) if not np.char.endswith(name, ".pickle"): name += ".pickle" with open(path.join(model_dir, name), "rb") as f: trainer = pickle.load(f) return trainer def gen_relative_graphs(self, trial_num, freq=10, pause=False): train_hat, train_true, val_hat, val_true = self.train_y_hat, self.train_y_true, self.val_y_hat, self.val_y_true # derived dimOut = train_hat.shape[-1] val_begins = train_hat.shape[0] # combine predictions and true values hat = np.concatenate([train_hat, val_hat]) true = np.concatenate([train_true, val_true]) # graph fprefix = self.TRAIN_CONFIGS.get("lti_file").split(".pickle")[0] isState = self.TRAIN_CONFIGS.get("target") == "state" Trainer._gen_relative_graphs(hat, true, dimOut, val_begins, trial_num, isState, self.TRAIN_CONFIGS.get("fig_dir"), fprefix, freq=10, pause=False) @property def get_train_test_metrics(self): isState = self.TRAIN_CONFIGS.get("target") == "state" state_tups = [(self.train_y_hat, self.train_y_true), (self.val_y_hat, self.val_y_true)] train, test = [ all_state_metrics(state_hat, state_true, isState) for state_hat, state_true in state_tups ] return train, test def save_train_test_metrics(self, trial_num): metrics_dir = self.TRAIN_CONFIGS.get("metrics_dir") train, test = self.get_train_test_metrics _name = Trainer.model_name(self.TRAIN_CONFIGS, trial_num) train.to_csv(path.join(metrics_dir, _name + "-train.csv")) test.to_csv(path.join(metrics_dir, _name + "-val.csv")) return train, test