def main(): batch_size = 1 epochs = 50 current_loss = 0 all_losses = [] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cuda = True if torch.cuda.is_available() else False Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn criterion = nn.NLLLoss() name_dataset = NameDataset() dataloader = DataLoader(name_dataset, batch_size=batch_size, shuffle=True, drop_last=True) n_hidden = 128 rnn = RNN(name_dataset.n_letters, n_hidden, name_dataset.n_categories, batch_size).to(device) optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) start = time.time() print('Start Training') for epoch in range(epochs): for i, data in enumerate(dataloader): # output, _ = rnn(Variable(data[0][0, 0:1].type(Tensor)), Variable(torch.zeros(1, n_hidden).type(Tensor))) name_tensor = Variable(data[0].transpose(0, 1).type(Tensor)) category_tensor = Variable(data[1].type(Tensor)) output, loss = train(rnn, optimizer, category_tensor, name_tensor, device, learning_rate, criterion) current_loss += loss # Print epoch number, loss, name and prediction avg_loss = current_loss / (len(name_dataset) / batch_size) category = name_dataset.all_categories[int(category_tensor.detach().cpu().numpy()[0])] guess, guess_i = category_from_output(output[0], name_dataset.all_categories) correct = '✓' if guess == category else '✗ (%s)' % category print('Epoch: %d (%s) %.4f %s / %s %s' % ( epoch, time_since(start), avg_loss, tensor_to_letter(data[0][0], name_dataset.all_letters), guess, correct)) # Add current loss avg to list of losses all_losses.append(avg_loss) current_loss = 0 torch.save(rnn.state_dict(), "epoch_%d.pth" % epoch)
def main(args): # prepare data train_texts, train_labels = read_data(os.path.join(args.data_dir, 'train')) test_texts, test_labels = read_data(os.path.join(args.data_dir, 'test')) training_set = list(zip(train_texts, train_labels)) test_set = list(zip(test_texts, test_labels)) random.shuffle(training_set) random.shuffle(test_set) vocab_counter = Counter(flatten([get_words(text) for text in train_texts])) word2vec = vocab.Vocab(vocab_counter, max_size=20000, min_freq=3, vectors='glove.6B.100d') model = RNN(args.input_size, args.hidden_size, args.nb_class) model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) train(training_set, model, criterion, optimizer, args.batch_size, args.nb_epoch, word2vec) evaluate(test_set, model, args.batch_size, word2vec) torch.save(model.state_dict(), args.weights_file) writer.close()
def main(args): print(sys.argv) if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.tensors_path, args.bs, args.json_labels_path, num_workers=8) model = RNN() if torch.cuda.is_available(): model.cuda() model.train() #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) if args.rms: optimizer = optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.mm) else: optimizer = optim.Adam(model.parameters(), lr=args.lr) model_loss = torch.nn.BCEWithLogitsLoss() losses = [] p = 1 try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate if epoch < 3: p = 1.0 elif epoch >= 3 and epoch < 6: p = 0.5 elif epoch >= 6 and epoch < 9: p = 0.25 else: p = 0.0 loss_epoch = [] for step, (feat_maps, gt) in enumerate(data_loader): if torch.cuda.is_available(): feat_maps = feat_maps.cuda() gt = gt.cuda() model.zero_grad() out = model(feat_maps, gt, p) loss = model_loss(out, gt) loss.backward() optimizer.step() loss_step = loss.cpu().detach().numpy() loss_epoch.append(loss_step) print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + str(len(data_loader)) + " - Loss: " + str(loss_step)) loss_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean)) if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
def main(): parser = argparse.ArgumentParser(description="==========[RNN]==========") parser.add_argument("--mode", default="train", help="available modes: train, test, eval") parser.add_argument("--model", default="rnn", help="available models: rnn, lstm") parser.add_argument("--dataset", default="all", help="available datasets: all, MA, MI, TN") parser.add_argument("--rnn_layers", default=3, type=int, help="number of stacked rnn layers") parser.add_argument("--hidden_dim", default=16, type=int, help="number of hidden dimensions") parser.add_argument("--lin_layers", default=1, type=int, help="number of linear layers before output") parser.add_argument("--epochs", default=100, type=int, help="number of max training epochs") parser.add_argument("--dropout", default=0.0, type=float, help="dropout probability") parser.add_argument("--learning_rate", default=0.01, type=float, help="learning rate") parser.add_argument("--verbose", default=2, type=int, help="how much training output?") options = parser.parse_args() verbose = options.verbose if torch.cuda.is_available(): device = torch.device("cuda") if verbose > 0: print("GPU available, using cuda...") print() else: device = torch.device("cpu") if verbose > 0: print("No available GPU, using CPU...") print() params = { "MODE": options.mode, "MODEL": options.model, "DATASET": options.dataset, "RNN_LAYERS": options.rnn_layers, "HIDDEN_DIM": options.hidden_dim, "LIN_LAYERS": options.lin_layers, "EPOCHS": options.epochs, "DROPOUT_PROB": options.dropout, "LEARNING_RATE": options.learning_rate, "DEVICE": device, "OUTPUT_SIZE": 1 } params["PATH"] = "models/" + params["MODEL"] + "_" + params[ "DATASET"] + "_" + str(params["RNN_LAYERS"]) + "_" + str( params["HIDDEN_DIM"]) + "_" + str( params["LIN_LAYERS"]) + "_" + str( params["LEARNING_RATE"]) + "_" + str( params["DROPOUT_PROB"]) + "_" + str( params["EPOCHS"]) + "_model.pt" #if options.mode == "train": # print("training placeholder...") train_data = utils.DistrictData(params["DATASET"], "train") val_data = utils.DistrictData(params["DATASET"], "val") params["INPUT_SIZE"] = train_data[0]['sequence'].size()[1] if params["MODEL"] == "rnn": model = RNN(params) elif params["MODEL"] == "lstm": model = LSTM(params) model.to(params["DEVICE"]) criterion = nn.MSELoss(reduction='sum') optimizer = torch.optim.Adam(model.parameters(), lr=params["LEARNING_RATE"]) if verbose == 0: print(params["PATH"]) else: utils.print_params(params) print("Beginning training...") print() since = time.time() best_val_loss = 10.0 for e in range(params["EPOCHS"]): running_loss = 0.0 #model.zero_grad() model.train() train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4) for batch in train_loader: x = batch['sequence'].to(device) y = batch['target'].to(device) seq_len = batch['size'].to(device) optimizer.zero_grad() y_hat, hidden = model(x, seq_len) loss = criterion(y_hat, y) running_loss += loss loss.backward() optimizer.step() mean_loss = running_loss / len(train_data) val_loss = evaluate(val_data, model, params, criterion, validation=True) if verbose == 2 or (verbose == 1 and (e + 1) % 100 == 0): print('=' * 25 + ' EPOCH {}/{} '.format(e + 1, params["EPOCHS"]) + '=' * 25) print('Training Loss: {}'.format(mean_loss)) print('Validation Loss: {}'.format(val_loss)) print() if e > params["EPOCHS"] / 3: if val_loss < best_val_loss: best_val_loss = val_loss best_model = model.state_dict() torch.save(best_model, params["PATH"]) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Final Training Loss: {:4f}'.format(mean_loss)) print('Best Validation Loss: {:4f}'.format(best_val_loss)) test_data = utils.DistrictData(params["DATASET"], "test") test_loss = evaluate(test_data, model, params, criterion) print('Test Loss: {}'.format(test_loss)) print()
out = model(x) loss = loss_func(out, y) optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 10 == 0: print('Epoch:{}, Loss:{:.5f}'.format(i+1, loss.item())) total_loss.append(loss.item()) # create loss csv file total_loss = pd.DataFrame(total_loss) total_loss.to_csv(os.path.join(ROOT_DIR, 'datasets/total_loss_{}.csv'.format(data_type))) # save model torch.save(model.state_dict(), './model.pkl') # test dataX = dataX.view(-1, input_feature_size, look_back).float() pred = model(dataX) pred_test = pred.view(-1, input_feature_size).data.numpy() # result for i in range(out_feature_size): true_data = pred_test[:, i] * (max - min) + min true_real_data = dataY[:, i] * (max - min) + min # save final result to csv result = {}
label_loss = criterion_label(label_logits,rel_targets) loss = arc_loss + label_loss optimizer_sparse.zero_grad() optimizer.zero_grad() loss.backward() nn.utils.clip_grad_value_(model.param_group_dense, args.clip) nn.utils.clip_grad_value_(model.param_group_sparse, args.clip) optimizer.step() optimizer_sparse.step() print("Finish training step: %i, Avg batch loss= %.4f, time= %.2fs" % (ts,loss.data.cpu().numpy().tolist() , time.time() - start)) if (ts<=1000): if (ts%100== 0): print("Save Model...") torch.save({'t_step': ts,'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(),'optimizer_sparse_state_dict': optimizer_sparse.state_dict()}, path_save_model) else: print("Performance on Dev data") start = time.time() arcs_acc,labls_acc = test(val_loader, model) print("Finish predictions on dev data in %.2fs" % (time.time() - start)) print("---\nUAS accuracy:\t%.2f" % (float(arcs_acc) * 100 )) print("---\nLAS accuracy:\t%.2f" % (float(labls_acc) * 100 )) print("------------------------------------------------------------------") score = (float(labls_acc) * 100) if score > highestScore: print("Save Model...") torch.save({'t_step': ts,'model_state_dict': model.state_dict(),
def main(FLAGS): data_path = DATA_PATH debug = FLAGS.debug unrolling_factor = FLAGS.unroll batch_size = FLAGS.batch_size # Initialize Models CNN_model = CNN(FLAGS.batch_size, FLAGS.unroll).to(device) RNN_model = RNN(CNN_OUTPUT_SIZE, FLAGS.unroll, FLAGS.batch_size, False).to(device) # Stateless LSTM for training criterion = nn.MSELoss() # Only skip connection parameters need to be learned skip_conv_params = list(CNN_model.skip_conv3.parameters()) + list( CNN_model.skip_conv2.parameters()) + list( CNN_model.skip_conv1.parameters()) prelu_params = list(CNN_model.prelu1.parameters()) + list( CNN_model.prelu2.parameters()) + list(CNN_model.prelu3.parameters()) # Network parameters that needs to be learnt by training params = list(RNN_model.parameters()) + prelu_params + skip_conv_params # Initialize optimizer , added weight decay optimizer = torch.optim.Adam(params, lr=FLAGS.learning_rate) # Load Checkpoint if present epoch = 0 # TODO: Manually copy the checkpoint to this path and has to be renamed as below checkpoint_name = './final_checkpoint/re3_final_checkpoint.pth' if debug: print("Checkpoint name is %s" % checkpoint_name) if os.path.isfile(checkpoint_name): checkpoint = torch.load(checkpoint_name) CNN_model.load_state_dict(checkpoint['cnn_model_state_dict']) RNN_model.load_state_dict(checkpoint['rnn_model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] print("Checkpoint loaded") if debug: print("Data folder is present in %s" % data_path) # Training data path train_data_path = data_path + '/data/train/Data/' train_annot_path = data_path + '/data/train/Annotations/' list_id, folder_start_pos = prepare_for_dataset(train_data_path, unrolling_factor) train_dataset = TrackerDataset(train_data_path, train_annot_path, list_id, folder_start_pos, (CROP_SIZE, CROP_SIZE, 3), unrolling_factor, debug) # img,labels = train_dataset.__getitem__(1) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) count = 0 total_step = len(train_loader) # Loss accumulator loss_sum = 0.0 #Start training ckpt_path = 'trained_checkpoints' # TODO: The checkpoints are saved in one folder and loaded from another folder. # TODO : By this way there is freedom to see model`s performance across different checkpoints. for epoch in range(epoch, FLAGS.num_epochs): for minibatch, (images, gt_labels) in enumerate(train_loader): try: # Converting (batch_size x 2*unroll x C x H x W ) to (batch_size*unroll*2 x C x H x W ) images = images.view(-1, 3, CROP_SIZE, CROP_SIZE).to(device) gt_labels = gt_labels.view(-1, 4).to(device) #Forward, backward and optimize CNN_features = CNN_model(images) pred_labels = RNN_model(CNN_features) loss = criterion(pred_labels, gt_labels) CNN_model.zero_grad() RNN_model.zero_grad() loss.backward() optimizer.step() loss_sum += loss.item() if minibatch % 20 == 0: print('Epoch [{}/{}],Step [{}/{}], Loss {:4f}\n'.format( epoch, FLAGS.num_epochs, minibatch, total_step, loss.item())) except Exception as e: print(e) print(images.size()) average_loss = loss_sum / total_step loss_sum = 0.0 flog.write('Epoch [{}/{}],Avg Loss {:4f}\n'.format( epoch, FLAGS.num_epochs, average_loss)) print('Epoch [{}/{}],Avg Loss {:4f}\n'.format(epoch, FLAGS.num_epochs, average_loss)) if (epoch) % 10 == 0: # Save the model checkpoint torch.save( { 'epoch': epoch, 'cnn_model_state_dict': CNN_model.state_dict(), 'rnn_model_state_dict': RNN_model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), }, ckpt_path + '/checkpoint_' + str(epoch) + '.pth')
def train(cfg, datasets, dataloaders, device, save_model_path): model = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers, cfg.drop_p, cfg.output_dim, cfg.bi_dir) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) criterion = torch.nn.CrossEntropyLoss() # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) best_metric = 0.0 best_epoch = 0 best_model_wts = copy.deepcopy(model.state_dict()) for epoch in range(cfg.num_epochs): for phase in ['train', 'valid']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 # running_corrects = 0 y_pred = [] y_true = [] # Iterate over data. for batch in dataloaders[phase]: inputs = batch['inputs'].to(device) targets = batch['targets'][cfg.task].to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs, hiddens = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, targets) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() * inputs.size(0) # running_corrects += torch.sum(preds == targets.data) y_pred.extend(preds.tolist()) y_true.extend(targets.tolist()) # if phase == 'train': # scheduler.step() # epoch_acc = running_corrects.double() / len(datasets[phase]) epoch_loss = running_loss / len(datasets[phase]) f1_ep = f1_score(y_true, y_pred, average='weighted') precision_ep = precision_score(y_true, y_pred, average='weighted') recall_ep = recall_score(y_true, y_pred, average='weighted') accuracy_ep = accuracy_score(y_true, y_pred) # print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) print( f'({phase} @ {epoch+1}): L: {epoch_loss:3f}; A: {accuracy_ep:3f}; R: {recall_ep:3f}; ' + f'P: {precision_ep:3f}; F1: {f1_ep:3f}') # deep copy the model if phase == 'valid' and f1_ep > best_metric: best_metric = f1_ep best_epoch = epoch best_model_wts = copy.deepcopy(model.state_dict()) print(f'Best val Metric {best_metric:3f} @ {best_epoch+1}\n') # load best model weights and saves it model.load_state_dict(best_model_wts) torch.save(model.state_dict(), save_model_path) print(f'model is saved @ {save_model_path}') return best_metric
plot_every = n_iters // 10000 n_hidden = args.hidden learning_rate = args.lr current_loss = 0 rnn = RNN(n_letters, n_hidden, n_categories) start = time.time() for iter in range(1, n_iters + 1): category, line, category_tensor, line_tensor = randomTrainingExample() output, loss = train(rnn, category_tensor, line_tensor, learning_rate) current_loss += loss if iter % print_every == 0: guess, guess_i = categoryFromOutput(output) correct = '✓' if guess == category else '✗ (%s)' % category print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct)) if iter % plot_every == 0: if (args.output != None): torch.save(rnn.state_dict(), "./" + str(args.output)) writer.add_scalar('Loss', current_loss / plot_every, iter) current_loss = 0
x = torch.zeros((1, time_len, 3)) for i in range(time_len): x[0][i][train_queue[i]] = 1 y = torch.zeros((time_len), dtype=torch.long) for i in range(time_len): y[i] = train_queue[i + 1] train_pre_y = rnn(x).view((-1, 3)) optimizer.zero_grad() loss = criterion(train_pre_y, y) loss.backward() optimizer.step() print("loss:", loss.item()) torch.save(rnn.state_dict(), rnn_model_file) # 获取下次的出拳 time_len = len(train_queue) x = torch.zeros((1, time_len, 3)) for i in range(0, time_len): x[0][i][train_queue[i]] = 1 with torch.no_grad(): rnn_next_pred = rnn(x) rnn_next_pred = rnn_next_pred.view((-1, 3))[-1] rnn_next_pred_idx = rnn_next_pred.argmax(dim=0).item() cv2.imshow('image', image) cv2.waitKey(0) break
num_layers=args.layers, batch_first=True) print(model) # optimizer and loss optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss() print(optimizer) print(criterion) print("Model's state_dict:") for param_tensor in model.state_dict(): print(param_tensor, "\t", model.state_dict()[param_tensor].size()) if args.cuda: torch.backends.cudnn.enabled = True cudnn.benchmark = True model.cuda() criterion = criterion.cuda() def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter()
from utils import train, timeSince from model import RNN import torch import torch.nn as nn import time from tqdm import tqdm n_iters = 10000 print_every = 1000 train_loss = [] # Reset every plot_every iters start = time.time() device = "gpu" if torch.cuda.is_available() else "cpu" data = get_data(data_path='data/names/*.txt') rnn = RNN(data['all_categories'], n_letters, 128, n_letters) dataset = TextDataset(data) train_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True) criterion = nn.NLLLoss() optimizer = torch.optim.AdamW(rnn.parameters(), lr=0.0005) for iter in tqdm(range(1, n_iters + 1)): output, loss = train(rnn, train_loader, criterion, optimizer, device) train_loss.append(loss) if iter % print_every == 0: print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss)) if iter % 2000 == 0: torch.save(rnn.state_dict(), "./weights/text_gen{}.pth".format(loss))
def main(args): print(sys.argv) if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.tensors_path, args.json_labels_path, args.bs) model = RNN(lstm_hidden_size=args.hidden_size) if torch.cuda.is_available(): model.cuda() model.train() #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) if args.rms: optimizer = optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.mm) else: optimizer = optim.Adam(model.parameters(), lr=args.lr) model_loss = torch.nn.BCEWithLogitsLoss() # model_loss = Loss() losses = [] p = 1 try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate if epoch in (3, 7, 15): if epoch == 3: p = 2 / 3 if epoch == 7: p = 1 / 3 if epoch == 15: p = 0 loss_epoch = [] loss1_epoch = [] loss2_epoch = [] for step, (tensors, masks, gt) in enumerate(data_loader): if torch.cuda.is_available(): tensors = tensors.cuda() masks = masks.cuda() gt = gt.cuda() model.zero_grad() out, att = model(tensors, masks, gt, p) loss1 = model_loss(out, gt) # att[:, :-1, :] -> attention produced (location in the next frame) until the last frame -1 (49) # gt[:, 1:, :] -> gt from the second frame until the last frame (49) loss2 = model_loss(att[:, :-1, :], gt[:, 1:, :]) loss = loss1 + loss2 loss.backward() optimizer.step() loss_epoch.append(loss.cpu().detach().numpy()) loss1_epoch.append(loss1.cpu().detach().numpy()) loss2_epoch.append(loss2.cpu().detach().numpy()) #print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + # str(len(data_loader)) + ' - Loss: ' + str(float(loss)) + " (Loss1: " + str(float(loss1)) # + ", Loss2: " + str(float(loss2)) + ")") loss_epoch_mean = np.mean(np.array(loss_epoch)) loss1_epoch_mean = np.mean(np.array(loss_epoch)) loss2_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean) + " (loss1: " + str(loss1_epoch_mean) + ", loss2: " + str(loss2_epoch_mean) + ")") if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
batch_time.update(time.time() - end) end = time.time() #if i!= 0 and i % args.print_freq == 0: # print('Test: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' # 'Loss {loss.val:.4f} ({loss.avg:.4f}) Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format( # i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1)) # gc.collect() print(' TEST Prec@1 {top1.avg:.3f}'.format(top1=top1)) return top1.avg # training and testing for epoch in range(1, args.epochs + 1): adjust_learning_rate(args.lr, optimizer, epoch) train(train_loader, model, criterion, optimizer, epoch) validate(val_loader, model, criterion) # save current model if epoch % args.save_freq == 0: #name_model = 'rnn_{}.pkl'.format(epoch) #path_save_model = os.path.join(model_dir, name_model) #joblib.dump(model.float(), path_save_model, compress=2) name_model = 'rnn_{}.pth'.format(epoch) path_save_model = os.path.join(model_dir, name_model) torch.save(model.state_dict(), path_save_model) print('testing...') test(test_loader, model, criterion)
def train(): train_seq, test_seq, inx2word, word2inx, word2vec, batch_size = load_data() translators = {'inx2word': inx2word, 'word2inx': word2inx} with open('models/translators.p', 'wb') as f: pickle.dump(translators, f) with open('models/word2vec.p', 'wb') as f: pickle.dump(word2vec, f) dict_size = len(word2inx) word2vec = torch.tensor(word2vec) # check for GPU is_cuda = torch.cuda.is_available() if is_cuda: device = torch.device("cuda") print("GPU is available") else: device = torch.device("cpu") print("GPU not available, CPU used") # Instantiate the model with hyperparameters model = RNN(embedding_matrix=word2vec, dict_size=dict_size, hidden_dim=100, n_layers=1) model.to(device) # Define hyperparameters batch_size = 2000 n_epochs = 100 lr = 0.01 # Define Loss, Optimizer lossfunction = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) # Training Run for epoch in range(1, n_epochs + 1): epoch_loss = 0 for _, (input_seq, target_seq) in enumerate(train_seq): optimizer.zero_grad( ) # Clears existing gradients from previous epoch input_seq = input_seq.to(device) target_seq = target_seq.to(device) output, h = model(input_seq) h = h.to(device) loss = lossfunction(output, target_seq.view(-1).long()) loss.backward() # Does backpropagation and calculates gradients optimizer.step() # Updates the weights accordingly epoch_loss += loss.item() if epoch % 10 == 0 or epoch == 1: loss_test_total = 0 for input_test, target_test in test_seq: input_test = input_test.to(device) target_test = target_test.to(device) output_test, _ = model(input_test) loss_test = lossfunction(output_test, target_test.view(-1).long()) loss_test_total += loss_test.item() norm_loss = epoch_loss / (len(train_seq) * batch_size) norm_loss_test = loss_test_total / (len(test_seq) * batch_size) print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ') print("Train loss: {:.4f}".format(norm_loss), end=' | ') print("Test loss: {:.4f}".format(norm_loss_test)) torch.save(model.state_dict(), 'models/rnn') print('Training done')
num = len(train_data) print( f'{time.time()} {group} {k} [Train] Epoch: {epoch}/{num_epoch} Loss: {running_loss/num} Acc: {running_acc/num}' ) model.eval() running_loss, running_acc = 0.0, 0.0 for i, (data, label, labelsq) in enumerate(test_dataloader): data = Variable(data).to(device) label = Variable(label).to(device) labelsq = Variable(labelsq).to(device).squeeze() with torch.no_grad(): out1, out2 = model(data) loss = Loss(criterionsq, criterion, out1, out2, labelsq, label) running_loss += loss.data.item() * label.size(0) _, pred = torch.max(F.log_softmax(out2, dim=1), 1) num_correct = (pred == label).sum() running_acc += num_correct.data.item() num = len(test_data) print( f'{time.time()} {group} {k} [valid] Epoch: {epoch}/{num_epoch} Loss: {running_loss/num} Acc: {running_acc/num}\n' ) if running_acc / num >= valid_acc: valid_acc = running_acc / num os.makedirs(f'./para{k_fold}/{group}', exist_ok=True) torch.save(model.state_dict(), f'./para{k_fold}/{group}/{k}_dl.pt') #'''
def main(args): device = torch.device( 'cuda' if torch.cuda.is_available() and args.cuda else 'cpu') dataset = MyDataset(batch_size=args.batch_size, use_vector=args.word_vector, pdevice=device) # Hyper parameters INPUT_DIM = len(dataset.TEXT.vocab) OUTPUT_DIM = 1 # TODO: SVM if args.model == 'rnn': print("Model: Vanila RNN") model = RNN(INPUT_DIM, args.ed, args.hd, OUTPUT_DIM).to(device) elif args.model == 'lstm': print("Model: LSTM") model = LSTM(INPUT_DIM, args.ed, args.hd, OUTPUT_DIM, n_layers=args.layer, use_bidirectional=args.bidirectional, use_dropout=args.dropout).to(device) elif args.model == 'lstm_attn': print("Model: LSTM with Attension") model = LSTM_with_Attention(INPUT_DIM, args.ed, args.hd, OUTPUT_DIM, n_layers=args.layer, use_bidirectional=args.bidirectional, use_dropout=args.dropout).to(device) elif args.model == 'cnn': print("Model: CNN") model = CNN(INPUT_DIM, args.ed, args.filter, args.filter_size, OUTPUT_DIM, args.dropout).to(device) if args.word_vector: model.embedding.weight.data.copy_(dataset.TEXT.vocab.vectors) if args.optim == 'sgd': print("Optim: SGD") optimizer = optim.SGD(model.parameters(), lr=args.lr) elif args.optim == 'adam': print("Optim: Adam") optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.BCEWithLogitsLoss(reduction='mean').to(device) best_acc = 0 for epoch in range(args.epoch): train_loss, train_acc = train(model, dataset.dataloader['train'], optimizer, criterion) valid_loss, valid_acc = evaluate(model, dataset.dataloader['dev'], criterion) test_loss, test_acc = evaluate(model, dataset.dataloader['test'], criterion) print( f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc * 100:.2f}%, Val. Loss: {valid_loss:.3f}, Val. Acc: {valid_acc * 100:.2f}%, Test Loss: {test_loss:.3f}, Test Acc: {test_acc * 100:.2f}%' ) writer.add_scalars('data/loss', { 'train': train_loss, 'val': valid_loss, 'test': test_loss, }, epoch + 1) writer.add_scalars('data/acc', { 'train': train_acc, 'val': valid_acc, 'test': test_acc, }, epoch + 1) if best_acc <= valid_acc: best_acc = valid_acc acc_result = test_acc pth = model.state_dict() if args.model in 'cnn': filename = "checkpoints/{}_{}_bs{}_filter{}_acc{:.03f}.pth".format( args.model, args.optim, args.batch_size, 100, test_acc) elif args.model in ['lstm', 'rnn', 'lstm_attn']: filename = "checkpoints/{}_{}_bs{}_hd{}_acc{:.03f}.pth".format( args.model, args.optim, args.batch_size, args.hd, test_acc) writer.add_text('Test acc', str(acc_result)) torch.save(pth, filename)
def train_model(args): # Hyper Parameters sequence_length = args.seq_len input_size = args.input_size hidden_size = args.hidden_size num_layers = args.num_layers num_classes = args.num_classes batch_size = args.batch_size num_epochs = args.num_epochs learning_rate = args.learning_rate dropout = args.dropout # Create the dataset train_dataset = create_dataset('data/train/', timesteps=sequence_length) train_loader = dataloader(train_dataset, batch_size=batch_size) test_dataset = create_dataset('data/test/', timesteps=sequence_length) test_loader = dataloader(test_dataset, batch_size=batch_size) # Define model and loss rnn = RNN('LSTM', input_size, hidden_size, num_layers, num_classes, dropout) criterion = nn.CrossEntropyLoss() if args.cuda: # switch to cuda rnn, criterion = rnn.cuda(), criterion.cuda() # Adam Optimizer optimizer = torch.optim.Adam(rnn.parameters(), learning_rate) # Train the Model i = 0 # updates best_test_acc = 0.0 for epoch in range(num_epochs): # Generate random batches every epoch train_loader = dataloader(train_dataset, batch_size) for batch_X, batch_y in train_loader: # points = pack_padded_sequence(Variable(torch.from_numpy(batch_X)), batch_seq_lens) points = Variable(torch.from_numpy(batch_X)) labels = Variable(torch.from_numpy(batch_y)) if args.cuda: points, labels = points.cuda(), labels.cuda() # Forward + Backward + Optimize optimizer.zero_grad() outputs = rnn(points) # final hidden state # outputs = pad_packed_sequence(outputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() print('Epoch [%d/%d], Loss: %.4f' % (epoch + 1, num_epochs, loss.data[0])) if i % 100 == 0: # every 100 updates, evaluate on test set # print("training accuracy = %.4f" % eval_model(rnn, train_loader)) test_acc = eval_model(rnn, test_loader) print("test accuracy = %.4f" % test_acc) if test_acc > best_test_acc: print("best test accuracy found") best_test_acc = test_acc torch.save(rnn.state_dict(), 'rnn_best.pkl') i += 1
return valid_loss / len(test_dataset), valid_acc / len(test_dataset) if __name__ == "__main__": if args.mode == 'train': best_valid_acc = 0.0 for epoch in range(args.epoch): start_time = time.time() train_loss, train_acc = train(args.train) valid_loss, valid_acc = test(args.dev) # save best model if valid_acc > best_valid_acc: save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'valid_acc': valid_acc }, True) secs = int(time.time() - start_time) mins = secs / 60 secs = secs % 60 writer.add_scalars("Loss", { 'train': train_loss, 'valid': valid_loss }, epoch) writer.add_scalars("Acc", { 'train': train_acc, 'valid': valid_acc }, epoch)
lr=1e-3, ) criterion = nn.BCEWithLogitsLoss() # training best_valid_loss = float('inf') for epoch in range(N_EPOCHS): start_time = time.time() train_loss, train_acc = train(model, train_iterator, optimizer, criterion) valid_loss, valid_acc = evaluate(model, valid_iterator, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), 'tut1-model.pt') print(f'Epoch: {epoch + 1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s') print( f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc * 100:.2f}%') print( f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc * 100:.2f}%') # testing model.load_state_dict(torch.load('tut1-model.pt')) test_loss, test_acc = evaluate(model, test_iterator, criterion) print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
def main(): # saved model path save_path = 'history/model_fold_' test_file = 'data/test120.csv' # create dataset #filename = 'data/golden_400.csv' #filename = 'data/golden_train_and_val.csv' filename = 'data/train_val120.csv' ds = ClaimsDataset(filename) vocab_size = ds.vocab.__len__() pad_id = ds.vocab.token2id.get('<pad>') embedding_size = 128 # 128 for torch embeddings, 300 for pre-trained hidden_size = 24 batch_size = 64 nb_epochs = 150 lr = 1e-4 max_norm = 5 folds = 10 criterion = nn.NLLLoss(weight=torch.Tensor([1.0, 2.2]).cuda()) # For testing phase fold_scores = {} test_set = ClaimsDataset(test_file) dl_test = torch_data.DataLoader(test_set, batch_size=batch_size, shuffle=True) mean = [] # holds the mean validation accuracy of every fold print("\nTraining\n") logger.info(utils.get_time()) for i in range(folds): print("\nFold: {}\n".format(i)) losses = defaultdict(list) train, val = utils.split_dataset(ds, i) print("Train size: {} \t Validate size: {}".format( len(train), len(val))) dl_train = torch_data.DataLoader(train, batch_size=batch_size, shuffle=True) dl_val = torch_data.DataLoader(val, batch_size=batch_size, shuffle=True) model = RNN(vocab_size, embedding_size, hidden_size, pad_id, ds) model = utils.cuda(model) model.zero_grad() # When using pre-trained embeddings, uncomment below otherwise, use the second statement #parameters = list([parameter for parameter in model.parameters() # if parameter.requires_grad]) parameters = list(model.parameters()) optim = torch.optim.Adam(parameters, lr=lr, weight_decay=35e-3, amsgrad=True) phases, loaders = ['train', 'val'], [dl_train, dl_val] tr_acc, v_acc = [], [] for epoch in range(nb_epochs): for p, loader in zip(phases, loaders): if p == 'train': model.train() else: model.eval() ep_loss, out_list, label_list = [], [], [] for _, inputs in enumerate(loader): optim.zero_grad() claim, labels = inputs labels = utils.variable(labels) out = model(claim) out_list.append(utils.normalize_out(out)) label_list.append(labels) out = torch.log(out) loss = criterion(out, labels) if p == 'train': loss.backward() torch.nn.utils.clip_grad_norm_(parameters, max_norm=max_norm) optim.step() ep_loss.append(loss.item()) losses[p].append(np.mean(ep_loss)) acc = utils.get_accuracy(label_list, out_list) if p == 'train': tr_acc.append(acc) else: v_acc.append(acc) print( "Epoch: {} \t Phase: {} \t Loss: {:.4f} \t Accuracy: {:.3f}" .format(epoch, p, loss, acc)) utils.plot_loss(losses['train'], losses['val'], tr_acc, v_acc, filename, i) mean.append(np.mean(v_acc)) logger.info("\n Fold: " + str(i)) logger.info("Train file=> " + filename + "\nParameters=> \nBatch size: " + str(batch_size) + "\nHidden size: " + str(hidden_size) + "\nMax_norm: " + str(max_norm) + "\nL2 Reg/weight decay: " + str(optim.param_groups[0]['weight_decay']) + "\nLoss function: " + str(criterion)) logger.info('Final train accuracy: ' + str(tr_acc[-1])) logger.info('Final validation accuracy: ' + str(v_acc[-1])) # Save model for current fold torch.save(model.state_dict(), save_path + str(i)) test_f1, test_acc = [], [] for _, inp in enumerate(dl_test): claim, label = inp label = utils.variable(label) model.eval() out = model(claim) y_pred = utils.normalize_out(out) test_f1.append(utils.get_f1(label, y_pred)) test_acc.append(metrics.accuracy_score(label, y_pred)) t_f1, t_acc = np.mean(test_f1), np.mean(test_acc) fold_scores[i] = dict([('F1', t_f1), ('Accuracy', t_acc)]) print("\tf1: {:.3f} \t accuracy: {:.3f}".format(t_f1, t_acc)) #logger.info('\nTest f1: '+str(t_f1)+'\nTest Accuracy: '+str(t_acc)) logger.info('Mean accuracy over 10 folds: \t' + str(np.mean(mean))) logger.info(fold_scores)
def main(args): print("in main") #creating tensorboard object tb_writer = SummaryWriter(log_dir=os.path.join(args.outdir, "tb/"), purge_step=0) #Loading data train_dl, val_dl, vocab, label_map = fetch_dataset(args.datapath) #Defining loss criterion = nn.CrossEntropyLoss() #Defining optimizer vocab_size = len(vocab) num_classes = len(label_map) model = RNN(vocab_size, num_classes, args.embed_dim, args.hidden_size) optimizer = optim.Adam(model.parameters(), lr=args.lr) #Looping training data for epoch in range(args.epochlen): running_loss, test_loss = 0.0, 0.0 count = 0 correct = 0 total_labels = 0 all_train_loss = [] all_test_loss = [] model.train() best_accuracy = 0 for i, batch in enumerate(train_dl): seqs, labels = batch #names = Vocab.get_string(batch) #zero the parameter gradients optimizer.zero_grad() #forward + backward + optimize pred_outputs = model(seqs) loss = criterion(pred_outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() count += 1 correct += (torch.argmax(pred_outputs, dim=1) == labels).sum().item() total_labels += labels.size(0) total_loss = running_loss / count all_train_loss.append(total_loss) accuracy = (correct * 100) / total_labels tb_writer.add_scalar('Train_Loss', running_loss, epoch) tb_writer.add_scalar('Train_Accuracy', accuracy, epoch) count = 0 model.eval() for batch in val_dl: seqs, labels = batch pred_outputs = model(seqs) loss = criterion(pred_outputs, labels) test_loss += loss.item() count += 1 correct += (torch.argmax(pred_outputs, dim=1) == labels).sum().item() total_labels += labels.size(0) total_test_loss = test_loss / count all_test_loss.append(total_test_loss) test_accuracy = (correct * 100) / total_labels print( f"Epoch : {str(epoch).zfill(2)}, Training Loss : {round(total_loss, 4)}, Training Accuracy : {round(accuracy, 4)}," f" Test Loss : {round(total_test_loss, 4)}, Test Accuracy : {round(test_accuracy, 4)}" ) tb_writer.add_scalar('Test_Loss', test_loss, epoch) tb_writer.add_scalar('Test_Accuracy', test_accuracy, epoch) if best_accuracy < test_accuracy: best_accuracy = test_accuracy torch.save(model.state_dict(), args.outdir + args.modelname + str(epoch)) # Plot confusion matrix y_true = [] y_pred = [] for data in val_dl: seq, labels = data outputs = model(seq) predicted = torch.argmax(outputs, dim=1) y_true += labels.tolist() y_pred += predicted.tolist() cm = confusion_matrix(np.array(y_true), np.array(y_pred)) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_map.keys()) disp.plot(include_values=True, cmap='viridis', ax=None, xticks_rotation='horizontal', values_format=None) plt.show()
def main(): logging.basicConfig(filename='logs/train.log', level=logging.DEBUG) # saved model path save_path = 'history/trained_model' # input file #filename = 'data/train_and_test.csv' filename = 'data/golden_400.csv' embedding_size = 300 # 128 for torch embeddings, 300 for pre-trained hidden_size = 24 batch_size = 64 nb_epochs = 200 lr = 1e-4 max_norm = 5 folds = 3 # Dataset ds = ClaimsDataset(filename) vocab_size = ds.vocab.__len__() pad_id = ds.vocab.token2id.get('<pad>') test_len = val_len = math.ceil(ds.__len__() * .10) train_len = ds.__len__() - (val_len + test_len) print("\nTrain size: {}\tValidate size: {}\tTest Size: {}".format( train_len, val_len, test_len)) # randomly split dataset into tr, te, & val sizes d_tr, d_val, d_te = torch.utils.data.dataset.random_split( ds, [train_len, val_len, test_len]) # data loaders dl_tr = torch.utils.data.DataLoader(d_tr, batch_size=batch_size) dl_val = torch.utils.data.DataLoader(d_val, batch_size=batch_size) dl_test = torch.utils.data.DataLoader(d_te, batch_size=batch_size) model = RNN(vocab_size, embedding_size, hidden_size, pad_id, ds) model = utils.cuda(model) model.zero_grad() parameters = list([ parameter for parameter in model.parameters() if parameter.requires_grad ]) #parameters = list(model.parameters()) # comment out when using pre-trained embeddings optim = torch.optim.Adam(parameters, lr=lr, weight_decay=35e-3, amsgrad=True) # optimizer criterion = nn.NLLLoss(weight=torch.Tensor([1.0, 2.2]).cuda()) losses = defaultdict(list) print("\nTraining started: {}\n".format(utils.get_time())) phases, loaders = ['train', 'val'], [dl_tr, dl_val] tr_acc, v_acc = [], [] for epoch in range(nb_epochs): for phase, loader in zip(phases, loaders): if phase == 'train': model.train() else: model.eval() ep_loss, out_list, label_list = [], [], [] for i, inputs in enumerate(loader): optim.zero_grad() claim, labels = inputs labels = utils.variable(labels) out = model(claim) out_list.append(utils.normalize_out( out)) # collect output from every epoch label_list.append(labels) out = torch.log(out) # criterion.weight = get_weights(labels) loss = criterion(out, labels) # back propagate, for training only if phase == 'train': loss.backward() torch.nn.utils.clip_grad_norm_( parameters, max_norm=max_norm) # exploding gradients? say no more! optim.step() ep_loss.append(loss.item()) losses[phase].append( np.mean(ep_loss) ) # record average losses from every phase at each epoch acc = utils.get_accuracy(label_list, out_list) if phase == 'train': tr_acc.append(acc) else: v_acc.append(acc) print("Epoch: {} \t Phase: {} \t Loss: {:.4f} \t Accuracy: {:.3f}". format(epoch, phase, loss, acc)) print("\nTime finished: {}\n".format(utils.get_time())) utils.plot_loss(losses['train'], losses['val'], tr_acc, v_acc, filename, -1) logging.info("\nTrain file=> " + filename + "\nParameters=> \nBatch size: " + str(batch_size) + "\nHidden size: " + str(hidden_size) + "\nMax_norm: " + str(max_norm) + "\nL2 Reg/weight decay: " + str(optim.param_groups[0]['weight_decay']) + "\nLoss function: \n" + str(criterion)) logging.info('Final train accuracy: ' + str(tr_acc[-1])) logging.info('Final validation accuracy: ' + str(v_acc[-1])) # Save the model torch.save(model.state_dict(), save_path) #test(model, batch_size) # predict f1_test, acc_test = [], [] for i, inputs in enumerate(dl_test): claim, label = inputs label = utils.variable(label.float()) out = model(claim) y_pred = utils.normalize_out(out) #print("\n\t\tF1 score: {}\n\n".format(get_f1(label, y_pred))) # f1 score f1_test.append(utils.get_f1(label, y_pred)) acc_test.append(metrics.accuracy_score(label, y_pred)) print("\t\tF1: {:.3f}\tAccuracy: {:.3f}".format(np.mean(f1_test), np.mean(acc_test))) logging.info('\nTest f1: ' + str(np.mean(f1_test)) + '\nTest Accuracy: ' + str(np.mean(acc_test)))
def main(args): if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.descriptors_path, args.json_labels_path, args.bs) model = RNN(num_descriptors=args.num_descriptors, hidden_size=args.hidden_size, lstm_in_size=args.input_size) if torch.cuda.is_available(): model.cuda() model.train() # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) optimizer = optim.Adam(model.parameters(), lr=args.lr) # model_loss = torch.nn.BCEWithLogitsLoss() model_loss = Loss() losses = [] try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate loss_epoch = [] for step, (descriptors, labels) in enumerate(data_loader): if torch.cuda.is_available(): descriptors = descriptors.cuda() labels = labels.cuda() model.zero_grad() attention = model(descriptors) loss = model_loss(attention, labels) loss.backward() optimizer.step() loss_epoch.append(loss.cpu().detach().numpy()) print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + str(len(data_loader)) + ' - Loss: ' + str(float(loss))) loss_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean)) if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/models_361_dropout', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
def train(args, labeled, resume_from, ckpt_file): print("========== In the train step ==========") iterator, TEXT, LABEL, tabular_dataset = load_data(stage="train", args=args, indices=labeled) print("Created the iterators") INPUT_DIM = len(TEXT.vocab) OUTPUT_DIM = 1 BIDIRECTIONAL = True PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] model = RNN( INPUT_DIM, args["EMBEDDING_DIM"], args["HIDDEN_DIM"], OUTPUT_DIM, args["N_LAYERS"], BIDIRECTIONAL, args["DROPOUT"], PAD_IDX, ) model = model.to(device=device) pretrained_embeddings = TEXT.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) unk_idx = TEXT.vocab.stoi["<unk>"] pad_idx = TEXT.vocab.stoi["<pad>"] model.embedding.weight.data[unk_idx] = torch.zeros(args["EMBEDDING_DIM"]) model.embedding.weight.data[pad_idx] = torch.zeros(args["EMBEDDING_DIM"]) optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() model = model.to("cuda") criterion = criterion.to("cuda") if resume_from is not None: ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth")) model.load_state_dict(ckpt["model"]) optimizer.load_state_dict(ckpt["optimizer"]) else: getdatasetstate(args) model.train() # turn on dropout, etc for epoch in tqdm(range(args["train_epochs"]), desc="Training"): running_loss = 0 i = 0 for batch in iterator: # print("Batch is", batch.review[0]) text, text_length = batch.review labels = batch.sentiment text = text.cuda() text_length = text_length.cuda() optimizer.zero_grad() output = model(text, text_length) loss = criterion(torch.squeeze(output).float(), labels.float()) loss.backward() optimizer.step() running_loss += loss.item() if i % 10: print( "epoch: {} batch: {} running-loss: {}".format( epoch + 1, i + 1, running_loss / 1000), end="\r", ) running_loss = 0 i += 1 print("Finished Training. Saving the model as {}".format(ckpt_file)) ckpt = {"model": model.state_dict(), "optimizer": optimizer.state_dict()} torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth")) return
step += 1 optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 5) optimizer.step() # track tracker['NLL'].append(loss.item()) # print statistics if itr % print_every == 0 or itr + 1 == len(dataloader): print("%s Batch %04d/%04d, NLL-Loss %.4f, " % (split.upper(), itr, len(dataloader), tracker['NLL'][-1])) samples = len(datasets[split]) print("%s Epoch %02d/%02d, NLL %.4f, PPL %.4f" % (split.upper(), ep, epoch, totals['NLL'] / samples, math.exp(totals['NLL'] / totals['words']))) # save checkpoint checkpoint_path = os.path.join(save_path, "E%02d.pkl" % ep) torch.save(model.state_dict(), checkpoint_path) print("Model saved at %s\n" % checkpoint_path) end_time = time.time() print('Total cost time', time.strftime("%H hr %M min %S sec", time.gmtime(end_time - start_time))) # save learning results sio.savemat("results.mat", tracker)
class Train(object): def __init__(self, epoch, sn=False): # Device configuration self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') # Hyper-parameters self.__sequence_length = 50 self.__input_size = 78 self.__hidden_size = 256 self.__num_layers = 3 self.__num_classes = 7 self.__batch_size = 100 #256 self.__num_epochs = epoch self.__learning_rate = 0.00005 self.__weight_decay = 0.0001 # 0.0001 self.__vat_alpha = 0.1 self.model = RNN(self.__input_size, self.__hidden_size, self.__num_layers, self.__num_classes, sn).to(self.device) self.vat_loss = VATLoss(xi=0.1, eps=1.0, ip=1) self.criterion = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.__learning_rate, weight_decay=self.__weight_decay) self.data_load() def data_load(self): # load data train_list = dataloader.make_datapath_list(phase="train") val_list = dataloader.make_datapath_list(phase="val") train_dataset = dataloader.LoadDataset(file_list=train_list, phase='train') val_dataset = dataloader.LoadDataset(file_list=val_list, phase='val') self.train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=self.__batch_size, shuffle=True) self.val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=self.__batch_size, shuffle=True) def train(self, save_name, vat=False): # Train the model total_step = len(self.train_loader) for epoch in range(self.__num_epochs): print(f'epoch = {epoch}') for i, (images, labels) in enumerate(self.train_loader): images = images.to(self.device) labels = labels.to(self.device) # Forward pass if vat: lds = self.vat_loss(self.model, images) outputs = self.model(images, self.device) loss = self.criterion(outputs, labels) + self.__vat_alpha * lds else: outputs = self.model(images, self.device) loss = self.criterion(outputs, labels) # Backward and optimize self.optimizer.zero_grad() loss.backward() self.optimizer.step() if (i + 1) % self.__num_epochs == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, self.__num_epochs, i + 1, total_step, loss.item())) if epoch == 10: torch.save(self.model.state_dict(), save_name + "_10.ckpt") elif epoch == 20: torch.save(self.model.state_dict(), save_name + "_20.ckpt") elif epoch == 40: torch.save(self.model.state_dict(), save_name + "_40.ckpt") elif epoch == 60: torch.save(self.model.state_dict(), save_name + "_60.ckpt") elif epoch == 80: torch.save(self.model.state_dict(), save_name + "_80.ckpt") # Save the model checkpoint torch.save(self.model.state_dict(), save_name) def test(self): # Test the model with torch.no_grad(): correct = 0 total = 0 for images, labels in self.val_loader: images = images.reshape(-1, self.__sequence_length, self.__input_size).to(self.device) labels = labels.to(self.device) outputs = self.model(images, self.device) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Test Accuracy of the model on the 7000 test data: {} %'. format(100 * correct / total))
# retrain flag==false if args.retrain == False: print("Start a new training...") # trainning train(args.epochs) logger.info("Training process finish") # test with torch.no_grad(): evaluate() # save model torch.save(model.state_dict(), './MNIST/models/{}.ckpt'.format(model_config_name)) ############################################################################### # Retraining code ############################################################################### def sparsify_model(state_dict): state_mask_dict = {} for k, v in state_dict.items(): if 'lstm1' in k: if 'weight_x' in k: state_dict[k] = set_to_zero_sparsity(v, sparsity=args.w_sp[0]) else: state_dict[k] = set_to_zero_sparsity(v, sparsity=args.w_sp[1])
dataset = AudioDataset('data') train_loader = DataLoader(dataset, batch_size= 1, shuffle= True) rnn = RNN(input_size, hidden_size, num_layers, num_classes) optimizer=torch.optim.Adam(rnn.parameters(), lr=0.01) for epoch in range(5): iter_data = iter(train_loader) for i in range(len(dataset)): audio, label = iter_data.next() #prcint(label) #print(audio.shape) audio = audio.reshape((audio.shape[1], 1, 13)) audio = audio.type(torch.float32) audio, label = audio, label #print(audio.shape) output = rnn(audio) output = output[0].unsqueeze(0) #print(output, label) loss = loss_F(output, label) optimizer.zero_grad() loss.backward() optimizer.step() torch.save(rnn.state_dict(), './rnn.pth')
# Generate some text using the model if batch % GEN_TEXT_INTERVAL == 0: rnn.eval() gen_str = greedy_search(rnn, dataset, dataset.get_start_symbol().split(), TRAINING_PROMPT_LENGTH, device=device) gen_str = gen_str.replace("\n", " ") rnn.train() # Save the model if avg_loss < best_loss and len(loss_arr) > SAVE_LOSS_MIN: best_loss = avg_loss save_state_dict(rnn.state_dict(), STATE_DICT_PATH, epoch, batch, avg_loss) lr.model_was_saved(epoch) # if avg_loss < 2: sys.stdout.write( "\rSaved model at epoch {}, batch {} with loss {}.\n".format( epoch, batch, avg_loss)) sys.stdout.flush() # Display progress if batch % PRINT_INTERVAL == 0: avg_loss = round(mean(loss_arr), LOSS_PRECISION) percentage = 100 * batch // num_batches