def main(): print("extracting corpus... ") # 导入词典 C = Corpus(conf) word2id, vocab_size = C.word2id, len(C.word2id) id2word = C.id2word # 导入数据 print("extracting data... ") train_data, valid_data, test_data = C.build_dataset(conf) train_size = train_data.size(1) # 实例化模型 model = RNN(vocab_size, conf.embed_size, conf.hidden_size, conf.num_layers).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=conf.learning_rate) # 训练开始 loss_count = [] for epoch in range(conf.num_epochs): print("="*20,"epoch: %d" % epoch, "="*20) states = (torch.zeros(conf.num_layers, conf.batch_size, conf.hidden_size).to(device), torch.zeros(conf.num_layers, conf.batch_size, conf.hidden_size).to(device)) for i in range(0, train_size-conf.seq_length, conf.seq_length): batch_x = train_data[:, i:(i+conf.seq_length)].to(device) batch_y = train_data[:, (i+1) : ((i+1+conf.seq_length)%train_size)].to(device) # 前传 states = detach(states) outputs,states = model(batch_x, states) loss = criterion(outputs, batch_y.reshape(-1)) # BP optimizer.zero_grad() loss.backward() clip_grad_norm_(model.parameters(), 0.5) optimizer.step() step = (i+1) // conf.seq_length if step % conf.print_per_batch == 0: loss_count.append(loss.item()) valid_acc = eval_model(valid_data, conf, states, model) print("step: %d,\t Loss: %.3f,\t train Perplextity: %.3f,\t validation Perplextity: %.3f." % ( step, loss.item(), np.exp(loss.item()), valid_acc*100 )) # 展示loss曲线 save_results(loss_count, conf.result_fig_path, show=conf.show_loss) # 保存模型 if conf.save_model: print("save model: %s" % conf.model_path) torch.save(model, conf.model_path)
def main(dl: DataLoader, model: RNN): prev_best = 0 patience = 0 decay = 0 lr = config.lr #optimizer = torch.optim.SGD(model.parameters(), lr=lr) optimizer = torch.optim.Adam(model.parameters(), lr=lr) criterion = nn.BCEWithLogitsLoss() for epoch in tqdm(range(config.max_epochs)): start_time = time.time() train_loss, train_acc = train(dl.train_examples, model, optimizer, criterion) dev_loss, dev_acc = evaluate(dl.dev_examples, model, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) print( f'Epoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc:.2f}') print(f'\t Dev Loss: {dev_loss:.3f} | Dev Acc: {dev_acc:.2f}') if dev_acc <= prev_best: patience += 1 if patience == 3: lr *= 0.5 optimizer = torch.optim.SGD(model.parameters(), lr=lr) tqdm.write( 'Dev accuracy did not increase in 3 epochs, halfing the learning rate' ) patience = 0 decay += 1 else: prev_best = dev_acc print('Save the best model') model.save() if decay >= 3: print('Evaluating model on test set') model.load() print('Load the best model') test_loss, test_acc = evaluate(dl.test_examples, model, criterion) print( f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%' ) break
def train(args): common.make_dir(args.checkout_dir) # nnet nnet = RNN((args.left_context + args.right_context + 1) * args.feat_dim, \ hidden_layer, hidden_size, args.num_classes, dropout=dropout) print(nnet) nnet.cuda() criterion = nn.CrossEntropyLoss() optimizer = th.optim.Adam(nnet.parameters(), lr=args.learning_rate) train_dataset = THCHS30(root=args.data_dir, data_type='train') train_loader = data.DataLoader(dataset=train_dataset, batch_size=args.min_batch, shuffle=True) test_dataset = THCHS30(root=args.data_dir, data_type='test') test_loader = data.DataLoader(dataset=test_dataset, batch_size=args.min_batch, shuffle=True) cross_validate(-1, nnet, test_loader, test_dataset.num_frames) for epoch in range(args.num_epochs): common.train_one_epoch(nnet, criterion, optimizer, train_loader, is_rnn=True) cross_validate(epoch, nnet, test_loader, test_dataset.num_frames) th.save( nnet, common.join_path(args.checkout_dir, 'rnn.{}.pkl'.format(epoch + 1)))
def main(): config = ConfigRNN.instance() loader = ACLIMDB(batch_size=config.BATCH_SIZE, embed_method=config.EMBED_METHOD, is_eval=config.EVAL_MODE, debug=config.CONSOLE_LOGGING) embedding_model = loader.data.embedding_model # TODO(hyungsun): This code is temporal. Erase this later. if config.SAVE_EMBED_MODEL: embedding_model.save("embed_model.wv") return if embedding_model == "DEFAULT": model = RNN() else: vectors = loader.data.embedding_model.wv.vectors # Add padding for masking. vectors = np.append(np.array([100 * [0]]), vectors, axis=0) model = RNN(torch.from_numpy(vectors).float()) optimizer = torch.optim.SGD(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) trainer = RNNTrainer(model, loader, optimizer) if config.EVAL_MODE: trainer.evaluate() else: trainer.train(config.MAX_EPOCH)
def main(): prepare() print(print_str.format("Begin to loading Data")) net = RNN(90, 256, 2, 2, 0.1) if use_cuda(): net = net.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=0.1) cross_entropy = nn.CrossEntropyLoss() if mode == "train": train_data, train_label, train_wav_ids, train_lengths = load_rnn_data( "train", train_protocol, mode=mode, feature_type=feature_type) train_dataset = ASVDataSet(train_data, train_label, wav_ids=train_wav_ids, mode=mode, lengths=train_lengths) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=4, shuffle=True) for epoch in range(num_epochs): correct = 0 total = 0 total_loss = 0 for tmp in tqdm(train_dataloader, desc="Epoch {}".format(epoch + 1)): data = tmp['data'] label = tmp['label'] length = tmp['length'] max_len = int(torch.max(length)) data = data[:, :max_len, :] label = label[:, :max_len] sorted_length, indices = torch.sort(length.view(-1), dim=0, descending=True) sorted_length = sorted_length.long().numpy() data, label = data[indices], label[indices] data, label = Variable(data), Variable(label).view(-1) if use_cuda(): data, label = data.cuda(), label.cuda() optimizer.zero_grad() outputs, out_length = net(data, sorted_length) loss = cross_entropy(outputs, label) loss.backward() optimizer.step() total_loss += loss.data[0] _, predict = torch.max(outputs, 1) correct += (predict.data == label.data).sum() total += label.size(0) print("Loss: {} \t Acc: {}".format(total_loss / len(train_dataloader), correct / total))
class Model(): def __init__(self, input_size, hidden_size, output_size, n_layers=1, gpu=-1): self.decoder = RNN(input_size, hidden_size, output_size, n_layers, gpu) if gpu >= 0: print("Use GPU %d" % torch.cuda.current_device()) self.decoder.cuda() self.optimizer = torch.optim.Adam(self.decoder.parameters(), lr=0.01) self.criterion = nn.CrossEntropyLoss() def train(self, inp, target, chunk_len=200): hidden = self.decoder.init_hidden() self.decoder.zero_grad() loss = 0 for c in range(chunk_len): out, hidden = self.decoder(inp[c], hidden) loss += self.criterion(out, target[c]) loss.backward() self.optimizer.step() return loss.data[0] / chunk_len def generate(self, prime_str, predict_len=100, temperature=0.8): predicted = prime_str hidden = self.decoder.init_hidden() prime_input = char_tensor(prime_str, self.decoder.gpu) # Use prime string to build up hidden state for p in range(len(prime_str) - 1): _, hidden = self.decoder(prime_input[p], hidden) inp = prime_input[-1] for p in range(predict_len): out, hidden = self.decoder(inp, hidden) # sample from network as a multinomial distribution out_dist = out.data.view(-1).div(temperature).exp() out_dist = out.data.view(-1).div(temperature).exp() top_i = torch.multinomial(out_dist, 1)[0] # Add predicted character to string and use as next input predicted_char = all_characters[top_i] predicted += predicted_char inp = char_tensor(predicted_char, self.decoder.gpu) return predicted def save(self): model_name = "char-rnn-gru.pt" if not os.path.exists("save"): os.mkdir("save") torch.save(self.decoder, "save/%s" % model_name) print("--------------> [Checkpoint] Save model into save/%s" % model_name) def load(self, model_path="save/char-rnn-gru.pt"): self.decoder = torch.load(model_path)
def load_model(args, train_len): model = RNN(args.emb_dim, args.hidden_dim) if torch.cuda.is_available(): model.cuda() loss_fnc = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) return model, loss_fnc, optimizer
def main(): config = ConfigRNN.instance() loader = ACLIMDB(batch_size=config.BATCH_SIZE, word_embedding=config.WORD_EMBEDDING, is_eval=False, debug=config.DEBUG_MODE) vectors = loader.data.embedding_model.wv.vectors model = RNN(torch.from_numpy(vectors).float()) optimizer = torch.optim.SGD(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) trainer = RNNTrainer(model, loader, optimizer) trainer.train(config.MAX_EPOCH, config.BATCH_SIZE)
def main(): path = './runtime/data.pkl' if not os.path.exists(path): with open(path, 'wb') as f: dataset = Dataset('./data/lyrics.txt') pickle.dump(dataset, f) else: with open(path, 'rb') as f: dataset = pickle.load(f) model = RNN(dataset.lang.n_words, HIDDEN_SIZE, dataset.lang.n_words, N_LAYERS) model, start_epoch = load_previous_model(model) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) loss_fn = nn.CrossEntropyLoss() train(model, optimizer, loss_fn, dataset, start_epoch=start_epoch)
def main(you='train'): path = 'F:/PYTHON/TF_song/ha.txt' dataset = Dataset(path) HIDDEN_SIZE = 128 N_LAYERS = 1 LEARNING_RATE = 0.001 model = RNN(dataset.lang.n_words, HIDDEN_SIZE, dataset.lang.n_words, N_LAYERS) # model, start_epoch = load_previous_model(model) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) loss_fn = nn.CrossEntropyLoss() if you == 'train': train(model, optimizer, loss_fn, dataset, dataset.lang.length) else: generate(dataset, '如', predict_len=150, youwant=3)
def main(): batch_size = 1 epochs = 50 current_loss = 0 all_losses = [] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cuda = True if torch.cuda.is_available() else False Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn criterion = nn.NLLLoss() name_dataset = NameDataset() dataloader = DataLoader(name_dataset, batch_size=batch_size, shuffle=True, drop_last=True) n_hidden = 128 rnn = RNN(name_dataset.n_letters, n_hidden, name_dataset.n_categories, batch_size).to(device) optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) start = time.time() print('Start Training') for epoch in range(epochs): for i, data in enumerate(dataloader): # output, _ = rnn(Variable(data[0][0, 0:1].type(Tensor)), Variable(torch.zeros(1, n_hidden).type(Tensor))) name_tensor = Variable(data[0].transpose(0, 1).type(Tensor)) category_tensor = Variable(data[1].type(Tensor)) output, loss = train(rnn, optimizer, category_tensor, name_tensor, device, learning_rate, criterion) current_loss += loss # Print epoch number, loss, name and prediction avg_loss = current_loss / (len(name_dataset) / batch_size) category = name_dataset.all_categories[int(category_tensor.detach().cpu().numpy()[0])] guess, guess_i = category_from_output(output[0], name_dataset.all_categories) correct = '✓' if guess == category else '✗ (%s)' % category print('Epoch: %d (%s) %.4f %s / %s %s' % ( epoch, time_since(start), avg_loss, tensor_to_letter(data[0][0], name_dataset.all_letters), guess, correct)) # Add current loss avg to list of losses all_losses.append(avg_loss) current_loss = 0 torch.save(rnn.state_dict(), "epoch_%d.pth" % epoch)
class TrainModel(): def __init__(self): self.model_2048 = RNN(rnn_size) def trainModel(self): trainDataset = DealDataset_enhanced( root=trainfilertoread, transform=transforms.Compose(transforms=[transforms.ToTensor()])) train_loader = DataLoader(dataset=trainDataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(self.model_2048.parameters(), lr=LR) for epoch in range(NUM_EPOCHS): for index, (board, direc) in enumerate(train_loader): board, direc = Variable(board), Variable(direc) if torch.cuda.is_available(): board, direc = board.cuda(), direc.cuda() self.model_2048.cuda() board = board.view(-1, 4, 4) out = self.model_2048(board) loss = criterion(out, direc) optimizer.zero_grad() loss.backward() optimizer.step() if index % 50 == 0: out = self.model_2048(board) pred = torch.max(out, 1)[1] train_correct = (pred == direc).sum().item() print( 'Epoch: ', epoch, '| train loss: %.4f' % loss, '| test accuracy: %.4f' % (train_correct / (BATCH_SIZE * 1.0))) torch.save(self.model_2048, 'rnn_model_' + str(epoch) + '.pkl') torch.save(self.model_2048, 'rnn_model_final.pkl')
def main(): path = './runtime/data.pkl' if not os.path.exists(path): with open(path, 'wb') as f: dataset = Dataset('./data/lyrics.txt') pickle.dump(dataset, f) else: with open(path, 'rb') as f: dataset = pickle.load(f) model = RNN(dataset.lang.n_words, HIDDEN_SIZE, dataset.lang.n_words, N_LAYERS) model, start_epoch = load_previous_model(model) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) loss_fn = nn.CrossEntropyLoss() while True: word = input("input> ") if len(word) < 2: print("输入两个字以上哦!") continue gen_len = input("length> ") generate(model, dataset, word, int(gen_len))
def get_model(self): print("Creating RNN model...") model = RNN(embed_size = self.embedding_size, num_output = 1, rnn_model = self.rnn, use_last = (not self.mean_seq), hidden_size = self.hid_size, num_layers = self.layers, batch_first = True) print(model) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr = self.lr, weight_decay = self.wd) criterion = nn.MSELoss() #BCE LOSS, SET target = 1 if greater than median returns, look at prices/prices.shift(1) returns stuff print(optimizer) print(criterion) if self.cuda: torch.backends.cudnn.enabled = True cudnn.benchmark = True model.cuad() criterion = criterion.cuda() #MAYBE THIS IS IMPOSSIBLE return model, criterion, optimizer
def main(): config = ConfigRNN.instance() loader = ACLIMDB(batch_size=config.BATCH_SIZE, embed_method=config.EMBED_METHOD, is_eval=False, debug=config.DEBUG_MODE) embedding_model = loader.data.embedding_model if embedding_model == "DEFAULT": vectors = loader.data.embedding_model.wv.vectors # Add padding for masking. vectors = np.append(np.array([100 * [0]]), vectors, axis=0) model = RNN(torch.from_numpy(vectors).float()) else: model = RNN() optimizer = torch.optim.SGD(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) trainer = RNNTrainer(model, loader, optimizer) trainer.train(config.MAX_EPOCH, config.BATCH_SIZE)
def main(): model = RNN() model = model.to('cuda:0') optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, momentum=0.9) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 50, 60], gamma=0.1) train_dataset = HoleDataset(batch_size=128, dataset_size=128, steps=5) val_dataset = HoleDataset(batch_size=512, dataset_size=5120, steps=20) ckpt_path = os.path.abspath('./checkpoints') if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) trainer = Trainer(model, optimizer, scheduler, train_dataset, val_dataset, checkpoint_path='./checkpoints') history = trainer.fit(num_epochs=70, num_train_batch=1000, num_val_batch=10)
def main(): config = ConfigRNN.instance() embed = Embed() # TODO(kyungsoo): Make this working. embedding_model = embed.get_embedding_model() if embedding_model == "DEFAULT": model = RNN() else: vectors = embedding_model.wv.vectors # Add padding for masking. vectors = np.append(np.array([100 * [0]]), vectors, axis=0) model = RNN(torch.from_numpy(vectors).float()) optimizer = torch.optim.SGD(model.parameters(), lr=config.LEARNING_RATE, weight_decay=config.WEIGHT_DECAY) trainer = RNNEvaluator(model, optimizer) # TODO(kyungsoo): Make this working. review_vector = embed.review2vec(sys.argv[0]) print(trainer.evaluate(review_vector=review_vector))
def main(args): # prepare data train_texts, train_labels = read_data(os.path.join(args.data_dir, 'train')) test_texts, test_labels = read_data(os.path.join(args.data_dir, 'test')) training_set = list(zip(train_texts, train_labels)) test_set = list(zip(test_texts, test_labels)) random.shuffle(training_set) random.shuffle(test_set) vocab_counter = Counter(flatten([get_words(text) for text in train_texts])) word2vec = vocab.Vocab(vocab_counter, max_size=20000, min_freq=3, vectors='glove.6B.100d') model = RNN(args.input_size, args.hidden_size, args.nb_class) model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) train(training_set, model, criterion, optimizer, args.batch_size, args.nb_epoch, word2vec) evaluate(test_set, model, args.batch_size, word2vec) torch.save(model.state_dict(), args.weights_file) writer.close()
def train(args, labeled, resume_from, ckpt_file): print("========== In the train step ==========") iterator, TEXT, LABEL, tabular_dataset = load_data(stage="train", args=args, indices=labeled) print("Created the iterators") INPUT_DIM = len(TEXT.vocab) OUTPUT_DIM = 1 BIDIRECTIONAL = True PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] model = RNN( INPUT_DIM, args["EMBEDDING_DIM"], args["HIDDEN_DIM"], OUTPUT_DIM, args["N_LAYERS"], BIDIRECTIONAL, args["DROPOUT"], PAD_IDX, ) model = model.to(device=device) pretrained_embeddings = TEXT.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) unk_idx = TEXT.vocab.stoi["<unk>"] pad_idx = TEXT.vocab.stoi["<pad>"] model.embedding.weight.data[unk_idx] = torch.zeros(args["EMBEDDING_DIM"]) model.embedding.weight.data[pad_idx] = torch.zeros(args["EMBEDDING_DIM"]) optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() model = model.to("cuda") criterion = criterion.to("cuda") if resume_from is not None: ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth")) model.load_state_dict(ckpt["model"]) optimizer.load_state_dict(ckpt["optimizer"]) else: getdatasetstate(args) model.train() # turn on dropout, etc for epoch in tqdm(range(args["train_epochs"]), desc="Training"): running_loss = 0 i = 0 for batch in iterator: # print("Batch is", batch.review[0]) text, text_length = batch.review labels = batch.sentiment text = text.cuda() text_length = text_length.cuda() optimizer.zero_grad() output = model(text, text_length) loss = criterion(torch.squeeze(output).float(), labels.float()) loss.backward() optimizer.step() running_loss += loss.item() if i % 10: print( "epoch: {} batch: {} running-loss: {}".format( epoch + 1, i + 1, running_loss / 1000), end="\r", ) running_loss = 0 i += 1 print("Finished Training. Saving the model as {}".format(ckpt_file)) ckpt = {"model": model.state_dict(), "optimizer": optimizer.state_dict()} torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth")) return
dataloader_train = data.DataLoader(dataset, batch_size=8, shuffle=True, num_workers=4) dataloader_val = data.DataLoader(dataset_val, batch_size=1, shuffle=False, num_workers=4) print(dataset.n_categories) categories = dataset.all_categories # Initialize the network. Hidden size: 1024. # 57 is the length of the one-hot-encoded input at each timestep model = RNN(57, 1024, dataset.n_categories) # criterion = nn.NLLLoss() criterion = nn.CrossEntropyLoss() # comment if not using a gpu model = model.cuda() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), 0.005) #$, momentum = 0.9) n_epochs = 10 for i in range(n_epochs): train(i, dataloader_train, model, criterion, optimizer, categories, 'train') if i % 2 == 1: train(i, dataloader_val, model, criterion, optimizer, categories, 'val')
return result def update_js(text): ls = text.split() result = 0 for word in ls: if 'Jew' in word or 'jew' in word or 'Jews' in word or 'Jews' in word: result += 1 return result X, Y, X_val, Y_val, n_chars, char2int, int2char, num_batches = read_data(filename, batch_size, seq_size) # make network and optimizer net = RNN(n_chars).to(device) opt = torch.optim.Adam(net.parameters(), lr=0.005) ''' TRAINING ''' # get loss for all validation batches def validation_loss(): with torch.no_grad(): val_losses = [] val_h = net.blank_hidden(batch_size) for x, y in batches(X_val, Y_val, batch_size, seq_size): out_val, val_h = net(x, val_h) val_loss = F.cross_entropy(out_val.transpose(1,2), y) val_losses.append(val_loss) val_losses = torch.stack(val_losses)
import torch.utils.data as data from torch.utils.data import DataLoader, Dataset from arguments import * from dataloader import AudioDataset from model import RNN import os loss_F=torch.nn.CrossEntropyLoss() dataset = AudioDataset('data') train_loader = DataLoader(dataset, batch_size= 1, shuffle= True) rnn = RNN(input_size, hidden_size, num_layers, num_classes) optimizer=torch.optim.Adam(rnn.parameters(), lr=0.01) for epoch in range(5): iter_data = iter(train_loader) for i in range(len(dataset)): audio, label = iter_data.next() #prcint(label) #print(audio.shape) audio = audio.reshape((audio.shape[1], 1, 13)) audio = audio.type(torch.float32) audio, label = audio, label #print(audio.shape) output = rnn(audio) output = output[0].unsqueeze(0)
def main(): parser = argparse.ArgumentParser(description="==========[RNN]==========") parser.add_argument("--mode", default="train", help="available modes: train, test, eval") parser.add_argument("--model", default="rnn", help="available models: rnn, lstm") parser.add_argument("--dataset", default="all", help="available datasets: all, MA, MI, TN") parser.add_argument("--rnn_layers", default=3, type=int, help="number of stacked rnn layers") parser.add_argument("--hidden_dim", default=16, type=int, help="number of hidden dimensions") parser.add_argument("--lin_layers", default=1, type=int, help="number of linear layers before output") parser.add_argument("--epochs", default=100, type=int, help="number of max training epochs") parser.add_argument("--dropout", default=0.0, type=float, help="dropout probability") parser.add_argument("--learning_rate", default=0.01, type=float, help="learning rate") parser.add_argument("--verbose", default=2, type=int, help="how much training output?") options = parser.parse_args() verbose = options.verbose if torch.cuda.is_available(): device = torch.device("cuda") if verbose > 0: print("GPU available, using cuda...") print() else: device = torch.device("cpu") if verbose > 0: print("No available GPU, using CPU...") print() params = { "MODE": options.mode, "MODEL": options.model, "DATASET": options.dataset, "RNN_LAYERS": options.rnn_layers, "HIDDEN_DIM": options.hidden_dim, "LIN_LAYERS": options.lin_layers, "EPOCHS": options.epochs, "DROPOUT_PROB": options.dropout, "LEARNING_RATE": options.learning_rate, "DEVICE": device, "OUTPUT_SIZE": 1 } params["PATH"] = "models/" + params["MODEL"] + "_" + params[ "DATASET"] + "_" + str(params["RNN_LAYERS"]) + "_" + str( params["HIDDEN_DIM"]) + "_" + str( params["LIN_LAYERS"]) + "_" + str( params["LEARNING_RATE"]) + "_" + str( params["DROPOUT_PROB"]) + "_" + str( params["EPOCHS"]) + "_model.pt" #if options.mode == "train": # print("training placeholder...") train_data = utils.DistrictData(params["DATASET"], "train") val_data = utils.DistrictData(params["DATASET"], "val") params["INPUT_SIZE"] = train_data[0]['sequence'].size()[1] if params["MODEL"] == "rnn": model = RNN(params) elif params["MODEL"] == "lstm": model = LSTM(params) model.to(params["DEVICE"]) criterion = nn.MSELoss(reduction='sum') optimizer = torch.optim.Adam(model.parameters(), lr=params["LEARNING_RATE"]) if verbose == 0: print(params["PATH"]) else: utils.print_params(params) print("Beginning training...") print() since = time.time() best_val_loss = 10.0 for e in range(params["EPOCHS"]): running_loss = 0.0 #model.zero_grad() model.train() train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4) for batch in train_loader: x = batch['sequence'].to(device) y = batch['target'].to(device) seq_len = batch['size'].to(device) optimizer.zero_grad() y_hat, hidden = model(x, seq_len) loss = criterion(y_hat, y) running_loss += loss loss.backward() optimizer.step() mean_loss = running_loss / len(train_data) val_loss = evaluate(val_data, model, params, criterion, validation=True) if verbose == 2 or (verbose == 1 and (e + 1) % 100 == 0): print('=' * 25 + ' EPOCH {}/{} '.format(e + 1, params["EPOCHS"]) + '=' * 25) print('Training Loss: {}'.format(mean_loss)) print('Validation Loss: {}'.format(val_loss)) print() if e > params["EPOCHS"] / 3: if val_loss < best_val_loss: best_val_loss = val_loss best_model = model.state_dict() torch.save(best_model, params["PATH"]) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Final Training Loss: {:4f}'.format(mean_loss)) print('Best Validation Loss: {:4f}'.format(best_val_loss)) test_data = utils.DistrictData(params["DATASET"], "test") test_loss = evaluate(test_data, model, params, criterion) print('Test Loss: {}'.format(test_loss)) print()
def main(): config = Settings() # |TODO| go to Setting() train_filename = config.train_file # train_filename_1 = config.train_file_1 # train_filename_2 = config.train_file_2 test_filename = config.test_file dataset_path = os.path.join(os.getcwd(), config.path) if not os.path.exists(config.exp_dir): os.mkdir(config.exp_dir) model_dir = os.path.join(config.exp_dir, config.model_name) logger = SummaryWriter(model_dir) if config.data_type == 'success': # with open(os.path.join(dataset_path, train_filename), 'rb') as f: # train_dataset = pickle.load(f) # with open(os.path.join(dataset_path, test_filename), 'rb') as f: # test_dataset = pickle.load(f) dataset = glob.glob(f'{dataset_path}/{train_filename}/*.pickle') # test_dataset = glob.glob(f'{dataset_path}/{test_filename}/*.pickle') # train_dataset = dataset[:1500000] # test_dataset = dataset[-200000:] train_dataset = dataset[:-20000] test_dataset = dataset[-20000:] print('#trajectories of train_dataset:', len(train_dataset)) print('#trajectories of test_dataset:', len(test_dataset)) elif config.data_type == 'mcts': dataset = glob.glob(f'{dataset_path}/{train_filename}/*.pickle') train_dataset = dataset[:-20000] test_dataset = dataset[-20000:] # train_dataset = glob.glob(f'{dataset_path}/{train_filename}/*.pickle') # test_dataset = glob.glob(f'{dataset_path}/{test_filename}/*.pickle') if config.filter: filtered_data_train = [] filtered_data_test = [] total_reward_filt = [] total_reward_not_filt = [] avg_total_reward_not_filt = 0 avg_total_reward_filt = 0 for data in train_dataset: with open(data, 'rb') as f: traj = pickle.load(f) avg_total_reward_not_filt += traj[-1] total_reward_not_filt.append(traj[-1]) if traj[-1] > config.filter: filtered_data_train.append(data) avg_total_reward_filt += traj[-1] total_reward_filt.append(traj[-1]) for data in test_dataset: with open(data, 'rb') as f: traj = pickle.load(f) if traj[-1] > config.filter: filtered_data_test.append(data) total_reward_not_filt_std = np.std( np.asarray(total_reward_not_filt)) total_reward_filt_std = np.std(np.asarray(total_reward_filt)) print('Average of total reward(not filtered):', avg_total_reward_not_filt / len(train_dataset)) print('std of total reward(not filtered):', total_reward_not_filt_std) print('Average of total reward(filtered):', avg_total_reward_filt / len(filtered_data_train)) print('std of total reward(filtered):', total_reward_filt_std) train_dataset = filtered_data_train test_dataset = filtered_data_test print('#trajectories of train_dataset:', len(train_dataset)) print('#trajectories of test_dataset:', len(test_dataset)) # # For mixed dataset # train_dataset_1 = glob.glob(f'{dataset_path}/{train_filename_1}/*.pickle') # dataset_2 = glob.glob(f'{dataset_path}/{train_filename_2}/*.pickle') # train_dataset_2 = dataset_2[:100000] # test_dataset = dataset_2[100000:] # if config.filter: # filtered_data_train = [] # filtered_data_test = [] # total_reward_filt = [] # total_reward_not_filt = [] # avg_total_reward_not_filt = 0 # avg_total_reward_filt = 0 # for data in train_dataset_2: # with open(data, 'rb') as f: # traj = pickle.load(f) # avg_total_reward_not_filt += traj[-1] # total_reward_not_filt.append(traj[-1]) # if traj[-1] > config.filter: # filtered_data_train.append(data) # avg_total_reward_filt += traj[-1] # total_reward_filt.append(traj[-1]) # for data in test_dataset: # with open(data, 'rb') as f: # traj = pickle.load(f) # if traj[-1] > config.filter: # filtered_data_test.append(data) # total_reward_not_filt_std = np.std(np.asarray(total_reward_not_filt)) # total_reward_filt_std = np.std(np.asarray(total_reward_filt)) # print('Average of total reward(not filtered):', avg_total_reward_not_filt/len(train_dataset_2)) # print('std of total reward(not filtered):', total_reward_not_filt_std) # print('Average of total reward(filtered):', avg_total_reward_filt/len(filtered_data_train)) # print('std of total reward(filtered):', total_reward_filt_std) # train_dataset = train_dataset_1 + filtered_data_train # test_dataset = filtered_data_test # print('#trajectories of train_dataset:', len(train_dataset)) # print('#trajectories of test_dataset:', len(test_dataset)) # generate dataloader train_loader = get_loader(config, train_dataset) test_loader = get_loader(config, test_dataset) # model device = th.device(config.device) if config.model == 'GPT': model = GPT2(config).to(device) elif config.model == 'RNN': model = RNN(config).to(device) elif config.model == 'LSTM': model = LSTM(config).to(device) elif config.model == 'CVAE' or config.model == 'PolicyValueNet': model = CVAE(config).to(device) elif config.model == 'ValueNet': model = ValueNet(config).to(device) else: raise Exception( f'"{config.model}" is not support!! You should select "GPT", "RNN", "LSTM", "CVAE", "ValueNet", or "PolicyValueNet.' ) # optimizer optimizer = th.optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) # learning rate scheduler if config.optimizer == 'AdamW': scheduler = th.optim.lr_scheduler.LambdaLR( optimizer, lambda step: min((step + 1) / config.warmup_step, 1)) elif config.optimizer == 'AdamWR': scheduler = CosineAnnealingWarmUpRestarts(optimizer=optimizer, T_0=config.T_0, T_mult=config.T_mult, eta_max=config.lr_max, T_up=config.warmup_step, gamma=config.lr_mult) else: raise Exception( f'"{config.optimizer}" is not support!! You should select "AdamW" or "AdamWR".' ) # Metric # |TODO| implement Chamfer distance if config.model == 'CVAE': loss_fn = ELBOLoss(config) eval_fn = ELBOLoss(config) elif config.model == 'ValueNet': loss_fn = RegressionLossValue(config) eval_fn = RegressionLossValue(config) elif config.model == 'PolicyValueNet': loss_fn = None eval_fn = None else: loss_fn = RegressionLossPolicy(config) eval_fn = RegressionLossPolicy(config) # Trainer & Evaluator trainer = Trainer(config=config, loader=train_loader, model=model, optimizer=optimizer, scheduler=scheduler, loss_fn=loss_fn, eval_fn=eval_fn) evaluator = Evaluator(config=config, loader=test_loader, model=model, eval_fn=eval_fn) # save configuration config.save(model_dir + '/config.yaml') # Logging model graph dummy = next(iter(test_loader)) for k in dummy: dummy[k].to(device).detach() logger.add_graph(ModelAsTuple(config, model), dummy) start_epoch = 1 best_error = 10000. # load checkpoint for resuming if config.resume is not None: filename = os.path.join(model_dir, config.resume) if os.path.isfile(filename): start_epoch, best_error, model, optimizer, scheduler = load_checkpoint( config, filename, model, optimizer, scheduler) start_epoch += 1 print("Loaded checkpoint '{}' (epoch {})".format( config.resume, start_epoch)) else: raise Exception("No checkpoint found at '{}'".format( config.resume)) # load checkpoint for pre-trained if config.pre_trained is not None: pre_trained_path = os.path.join(config.exp_dir, config.pre_trained) if os.path.isfile(pre_trained_path): start_epoch, best_error, model, optimizer, scheduler = load_checkpoint( config, pre_trained_path, model, optimizer, scheduler) start_epoch = 1 print("Loaded checkpoint '{}'".format(config.pre_trained)) else: raise Exception("No checkpoint found at '{}'".format( config.resume)) for epoch in range(start_epoch, config.epochs + 1): print(f'===== Start {epoch} epoch =====') # Training one epoch print("Training...") train_loss, train_val = trainer.train(epoch) # Logging if config.model == 'CVAE': logger.add_scalar('Loss(total)/train', train_loss['total'], epoch) logger.add_scalar('Loss(Reconstruction)/train', train_loss['Recon'], epoch) logger.add_scalar('Loss(KL_divergence)/train', train_loss['KL_div'], epoch) elif config.model == 'ValueNet': logger.add_scalar('Loss/train', train_loss['total'], epoch) elif config.model == 'PolicyValueNet': logger.add_scalar('Loss(total)/train', train_loss['total'], epoch) logger.add_scalar('Loss(action)/train', train_loss['action'], epoch) logger.add_scalar('Loss(accumulated reward)/train', train_loss['accumulated_reward'], epoch) # logger.add_scalar('Eval(action)/train', train_val['action'], epoch) else: logger.add_scalar('Loss(total)/train', train_loss['total'], epoch) logger.add_scalar('Loss(action)/train', train_loss['action'], epoch) # if config.use_reward: # logger.add_scalar('Loss(reward)/train', train_loss['reward'], epoch) # logger.add_scalar('Eval(action)/train', train_val['action'], epoch) # if config.use_reward: # logger.add_scalar('Eval(reward)/train', train_val['reward'], epoch) # |FIXME| debug for eff_grad: "RuntimeError: Boolean value of Tensor with more than one value is ambiguous" log_gradients(model, logger, epoch, log_grad=config.log_grad, log_param=config.log_para, eff_grad=config.eff_grad, print_num_para=config.print_num_para) # evaluating if epoch % config.test_eval_freq == 0: print("Validating...") test_val = evaluator.eval(epoch) # save the best model # |TODO| change 'action' to 'total' @ trainer.py & evaluator.py -> merge 'CVAE' & others if config.model == 'CVAE' or config.model == 'ValueNet' or config.model == 'PolicyValueNet': if test_val['total'] < best_error: best_error = test_val['total'] save_checkpoint('Saving the best model!', os.path.join(model_dir, 'best.pth'), epoch, best_error, model, optimizer, scheduler) else: if test_val['action'] < best_error: best_error = test_val['action'] save_checkpoint('Saving the best model!', os.path.join(model_dir, 'best.pth'), epoch, best_error, model, optimizer, scheduler) # Logging if config.model == 'CVAE': logger.add_scalar('Eval(total)/test', test_val['total'], epoch) logger.add_scalar('Eval(Reconstruction)/test', test_val['Recon'], epoch) logger.add_scalar('Eval(KL_divergence)/test', test_val['KL_div'], epoch) elif config.model == 'ValueNet': logger.add_scalar('Eval/test', test_val['total'], epoch) elif config.model == 'PolicyValueNet': logger.add_scalar('Eval(total)/test', test_val['total'], epoch) logger.add_scalar('Eval(action)/test', test_val['action'], epoch) logger.add_scalar('Eval(accumulated reward)/test', test_val['accumulated_reward'], epoch) else: logger.add_scalar('Eval(action)/test', test_val['action'], epoch) # if config.use_reward: # logger.add_scalar('Eval(reward)/test', test_val['reward'], epoch) # save the model if epoch % config.save_freq == 0: save_checkpoint('Saving...', os.path.join(model_dir, f'ckpt_epoch_{epoch}.pth'), epoch, best_error, model, optimizer, scheduler) print(f'===== End {epoch} epoch =====')
class Train(object): def __init__(self, epoch, sn=False): # Device configuration self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') # Hyper-parameters self.__sequence_length = 50 self.__input_size = 78 self.__hidden_size = 256 self.__num_layers = 3 self.__num_classes = 7 self.__batch_size = 100 #256 self.__num_epochs = epoch self.__learning_rate = 0.00005 self.__weight_decay = 0.0001 # 0.0001 self.__vat_alpha = 0.1 self.model = RNN(self.__input_size, self.__hidden_size, self.__num_layers, self.__num_classes, sn).to(self.device) self.vat_loss = VATLoss(xi=0.1, eps=1.0, ip=1) self.criterion = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.__learning_rate, weight_decay=self.__weight_decay) self.data_load() def data_load(self): # load data train_list = dataloader.make_datapath_list(phase="train") val_list = dataloader.make_datapath_list(phase="val") train_dataset = dataloader.LoadDataset(file_list=train_list, phase='train') val_dataset = dataloader.LoadDataset(file_list=val_list, phase='val') self.train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=self.__batch_size, shuffle=True) self.val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=self.__batch_size, shuffle=True) def train(self, save_name, vat=False): # Train the model total_step = len(self.train_loader) for epoch in range(self.__num_epochs): print(f'epoch = {epoch}') for i, (images, labels) in enumerate(self.train_loader): images = images.to(self.device) labels = labels.to(self.device) # Forward pass if vat: lds = self.vat_loss(self.model, images) outputs = self.model(images, self.device) loss = self.criterion(outputs, labels) + self.__vat_alpha * lds else: outputs = self.model(images, self.device) loss = self.criterion(outputs, labels) # Backward and optimize self.optimizer.zero_grad() loss.backward() self.optimizer.step() if (i + 1) % self.__num_epochs == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, self.__num_epochs, i + 1, total_step, loss.item())) if epoch == 10: torch.save(self.model.state_dict(), save_name + "_10.ckpt") elif epoch == 20: torch.save(self.model.state_dict(), save_name + "_20.ckpt") elif epoch == 40: torch.save(self.model.state_dict(), save_name + "_40.ckpt") elif epoch == 60: torch.save(self.model.state_dict(), save_name + "_60.ckpt") elif epoch == 80: torch.save(self.model.state_dict(), save_name + "_80.ckpt") # Save the model checkpoint torch.save(self.model.state_dict(), save_name) def test(self): # Test the model with torch.no_grad(): correct = 0 total = 0 for images, labels in self.val_loader: images = images.reshape(-1, self.__sequence_length, self.__input_size).to(self.device) labels = labels.to(self.device) outputs = self.model(images, self.device) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 7000 test data: {} %'. format(100 * correct / total))
def main(): start_epoch = 0 max_loss = math.inf epochs_since_improvement = 0 dataset = GaitSequenceDataset(root_dir=data_dir, longest_sequence=85, shortest_sequence=55) train_sampler, validation_sampler = generate_train_validation_samplers( dataset, validation_split=0.2) print('Building dataloaders..') train_dataloader = data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) validation_dataloader = data.DataLoader(dataset, batch_size=1, sampler=validation_sampler, drop_last=True) model = RNN(num_features, hidden_dimension, num_classes, num_layers=2).to(device) if load_pretrained is True: print('Loading pretrained model..') checkpoint = torch.load(checkpoint_path) start_epoch = checkpoint['epoch'] + 1 epochs_since_improvement = checkpoint['epochs_since_improvement'] model.load_state_dict(checkpoint['model_state_dict']) optimizer = checkpoint['optimizer'] else: print('Creating model..') optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss().to(device) if mode == 'train': summary = SummaryWriter() #summary = None model.to(device) print('########### ', model) for epoch in range(start_epoch, start_epoch + num_epochs): if epochs_since_improvement == 20: break if epochs_since_improvement > 0 and epochs_since_improvement % 4 == 0: adjust_learning_rate(optimizer, 0.8) train(model, train_dataloader, optimizer, criterion, clip_gradient, device, epoch, num_epochs, summary, loss_display_interval) current_loss = validate(model, validation_dataloader, criterion, device, epoch, num_epochs, summary, loss_display_interval) is_best = max_loss > current_loss max_loss = min(max_loss, current_loss) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 save_checkpoint(epoch, epochs_since_improvement, model, optimizer, is_best) print('Current loss : ', current_loss, ' Max loss : ', max_loss) else: print('testing...') model = RNN(num_features, hidden_dimension, num_classes, num_layers=2) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model_state_dict']) model.to(device) print(model) for batch_idx, val_data in enumerate(validation_dataloader): sequence = val_data['sequence'].permute(1, 0, 2).to(device) piano_roll = val_data['piano_roll'].permute(1, 0, 2).squeeze(1).to('cpu') sequence_length = val_data['sequence_length'] file_name = val_data['file_name'] frame = val_data['frame'] leg = val_data['leg'] sonify_sequence(model, sequence, sequence_length) plt.imshow(piano_roll) plt.show() print(file_name, frame, leg) break
# data loader and prepare dataX, dataY = creat_dataset(datas, look_back) dataX = torch.from_numpy(dataX).to(DEVICE) dataY = torch.from_numpy(dataY).to(DEVICE) train_size = int(len(dataX)*0.7) x_train = dataX[:train_size] y_train = dataY[:train_size] x_train = x_train.view(-1, input_feature_size, look_back) y_train = y_train.view(-1, input_feature_size, output_size) model = RNN(look_back, hidden_layer, output_size, num_layers).to(DEVICE) optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.5, 0.99), weight_decay=2.5*1e-5) loss_func = nn.MSELoss().to(DEVICE) total_loss = [] for i in range(epochs): x = x_train.float() y = y_train.float() out = model(x) loss = loss_func(out, y) optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 10 == 0: print('Epoch:{}, Loss:{:.5f}'.format(i+1, loss.item()))
input_dim = len(TEXT.vocab) print(f'The number of vocabularies is {input_dim}.') end_time = time.time() data_loading_time = round(end_time - start_time,3) data_prep_mins, data_prep_secs = epoch_time(start_time, end_time) print(f'Data loading Time: {data_prep_mins}m {data_prep_secs}s') pad_idx = TEXT.vocab.stoi[TEXT.pad_token] model = RNN(input_dim, args.embedding_dim, args.hidden_dim, 1, args.n_layers, args.bidirectional, args.dropout, pad_idx) model.embedding.weight.data[pad_idx] = torch.zeros(args.embedding_dim) optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() model = model.to(device) criterion = criterion.to(device) best_test_loss = float('inf') loss_result = [] acc_result = [] elapsed_time = [] print(f'Training with {tokenizer_name} tokenizer...') for epoch in range(args.n_epochs): start_time = time.time()
opt_fname = os.path.join(expt_dir, f"opt_epoch_{ep}") torch.save(model.state_dict(), model_fname) torch.save(optimizer.state_dict(), opt_fname) model_fname = os.path.join(expt_dir, f"model_final_{ep}") opt_fname = os.path.join(expt_dir, f"opt_final_{ep}") torch.save(model.state_dict(), model_fname) torch.save(optimizer.state_dict(), opt_fname) finally: log() if __name__ == "__main__": fname = "_bios.json" bc = ByteCode("byte_values.txt") ds = ByteDataset(fname, bc, device=torch.device('cpu')) print(f"Loaded {len(ds)} samples") dl = ByteDataLoader(ds, batch_size=1) rnn = RNN(bc.num_codes) rnn.train() epochs = 1 lr = 1e-3 losses = [] lossfn = nn.CrossEntropyLoss(reduction='none') optimizer = Adam(rnn.parameters(), lr=lr) train(dl, rnn, optimizer, dict(epochs=epochs, expt_dir="tst", sample_step=1), torch.device('cpu'), bc)
def main(): logging.basicConfig(filename='logs/train.log', level=logging.DEBUG) # saved model path save_path = 'history/trained_model' # input file #filename = 'data/train_and_test.csv' filename = 'data/golden_400.csv' embedding_size = 300 # 128 for torch embeddings, 300 for pre-trained hidden_size = 24 batch_size = 64 nb_epochs = 200 lr = 1e-4 max_norm = 5 folds = 3 # Dataset ds = ClaimsDataset(filename) vocab_size = ds.vocab.__len__() pad_id = ds.vocab.token2id.get('<pad>') test_len = val_len = math.ceil(ds.__len__() * .10) train_len = ds.__len__() - (val_len + test_len) print("\nTrain size: {}\tValidate size: {}\tTest Size: {}".format( train_len, val_len, test_len)) # randomly split dataset into tr, te, & val sizes d_tr, d_val, d_te = torch.utils.data.dataset.random_split( ds, [train_len, val_len, test_len]) # data loaders dl_tr = torch.utils.data.DataLoader(d_tr, batch_size=batch_size) dl_val = torch.utils.data.DataLoader(d_val, batch_size=batch_size) dl_test = torch.utils.data.DataLoader(d_te, batch_size=batch_size) model = RNN(vocab_size, embedding_size, hidden_size, pad_id, ds) model = utils.cuda(model) model.zero_grad() parameters = list([ parameter for parameter in model.parameters() if parameter.requires_grad ]) #parameters = list(model.parameters()) # comment out when using pre-trained embeddings optim = torch.optim.Adam(parameters, lr=lr, weight_decay=35e-3, amsgrad=True) # optimizer criterion = nn.NLLLoss(weight=torch.Tensor([1.0, 2.2]).cuda()) losses = defaultdict(list) print("\nTraining started: {}\n".format(utils.get_time())) phases, loaders = ['train', 'val'], [dl_tr, dl_val] tr_acc, v_acc = [], [] for epoch in range(nb_epochs): for phase, loader in zip(phases, loaders): if phase == 'train': model.train() else: model.eval() ep_loss, out_list, label_list = [], [], [] for i, inputs in enumerate(loader): optim.zero_grad() claim, labels = inputs labels = utils.variable(labels) out = model(claim) out_list.append(utils.normalize_out( out)) # collect output from every epoch label_list.append(labels) out = torch.log(out) # criterion.weight = get_weights(labels) loss = criterion(out, labels) # back propagate, for training only if phase == 'train': loss.backward() torch.nn.utils.clip_grad_norm_( parameters, max_norm=max_norm) # exploding gradients? say no more! optim.step() ep_loss.append(loss.item()) losses[phase].append( np.mean(ep_loss) ) # record average losses from every phase at each epoch acc = utils.get_accuracy(label_list, out_list) if phase == 'train': tr_acc.append(acc) else: v_acc.append(acc) print("Epoch: {} \t Phase: {} \t Loss: {:.4f} \t Accuracy: {:.3f}". format(epoch, phase, loss, acc)) print("\nTime finished: {}\n".format(utils.get_time())) utils.plot_loss(losses['train'], losses['val'], tr_acc, v_acc, filename, -1) logging.info("\nTrain file=> " + filename + "\nParameters=> \nBatch size: " + str(batch_size) + "\nHidden size: " + str(hidden_size) + "\nMax_norm: " + str(max_norm) + "\nL2 Reg/weight decay: " + str(optim.param_groups[0]['weight_decay']) + "\nLoss function: \n" + str(criterion)) logging.info('Final train accuracy: ' + str(tr_acc[-1])) logging.info('Final validation accuracy: ' + str(v_acc[-1])) # Save the model torch.save(model.state_dict(), save_path) #test(model, batch_size) # predict f1_test, acc_test = [], [] for i, inputs in enumerate(dl_test): claim, label = inputs label = utils.variable(label.float()) out = model(claim) y_pred = utils.normalize_out(out) #print("\n\t\tF1 score: {}\n\n".format(get_f1(label, y_pred))) # f1 score f1_test.append(utils.get_f1(label, y_pred)) acc_test.append(metrics.accuracy_score(label, y_pred)) print("\t\tF1: {:.3f}\tAccuracy: {:.3f}".format(np.mean(f1_test), np.mean(acc_test))) logging.info('\nTest f1: ' + str(np.mean(f1_test)) + '\nTest Accuracy: ' + str(np.mean(acc_test)))