def model_fn(model_dir): """Load the PyTorch model from the `model_dir` directory.""" print("Loading model.") # First, load the parameters used to create the model. model_info = {} model_info_path = os.path.join(model_dir, 'model_info.pth') with open(model_info_path, 'rb') as f: model_info = torch.load(f) print("model_info: {}".format(model_info)) # Determine the device and construct the model. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = LSTMClassifier(model_info['embedding_dim'], model_info['hidden_dim'], model_info['vocab_size']) # Load the stored model parameters. model_path = os.path.join(model_dir, 'model.pth') with open(model_path, 'rb') as f: model.load_state_dict(torch.load(f)) # Load the saved word_dict. word_dict_path = os.path.join(model_dir, 'word_dict.pkl') with open(word_dict_path, 'rb') as f: model.word_dict = pickle.load(f) model.to(device).eval() print("Done loading model.") return model
def model_fn(model_dir): """ Load the PyTorch model from the `model_dir` directory """ # Begin loading model: print("Loading model: Beginning...\n") # First, load the parameters used to create the model: model_info = {} model_info_path = os.path.join(model_dir, 'model_info.pth') with open(model_info_path, 'rb') as f: model_info = torch.load(f) print("*** Model info: {}".format(model_info)) # Determine the device: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("*** Device: {}".format(device)) # Construct the model: model = LSTMClassifier(model_info['embedding_dim'], model_info['hidden_dim'], model_info['vocab_size']) # Load the store model parameters: model_path = os.path.join(model_dir, 'model.pth') with open(model_path, 'rb') as f: model.load_state_dict(torch.load(f)) # Load the saved word_dict: word_dict_path = os.path.join(model_dir, 'word_dict.pkl') with open(word_dict_path, 'rb') as f: model.word_dict = pickle.load(f) # Move to evaluation mode: model.to(device).eval() # Print built model: print("*** Model:\n{}".format(model)) # End loading model: print("\nLoading model: Done...") # Return model: return model
def main(): """ Training and validation. """ global epochs_since_improvement, start_epoch, label_map, best_F1, epoch, checkpoint # Initialize model or load checkpoint if checkpoint is None: model = LSTMClassifier() # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo biases = list() not_biases = list() for param_name, param in model.named_parameters(): if param.requires_grad: if param_name.endswith('.bias'): biases.append(param) else: not_biases.append(param) optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99)) # optimizer = torch.optim.SGD(params=[{'params': biases, 'lr': 2 * lr}, {'params': not_biases}], #lr=lr, momentum=momentum, weight_decay=weight_decay) else: checkpoint = torch.load(checkpoint) start_epoch = checkpoint['epoch'] epochs_since_improvement = checkpoint['epochs_since_improvement'] best_F1 = checkpoint['best_F1'] print('\nLoaded checkpoint from epoch %d. Best F1 so far is %.3f.\n' % (start_epoch, best_F1)) model = checkpoint['model'] optimizer = checkpoint['optimizer'] # Move to default device model = model.to(device) print(model) # criterion = torch.nn.CrossEntropyLoss() criterion = FocalLoss() # Custom dataloaders train_dataset = ICDARDataset(data_folder, split='train') val_dataset = ICDARDataset(data_folder, split='test') train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=val_dataset.collate_fn, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # One epoch's training train_loss = train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, epoch=epoch) # One epoch's validation val_loss, accuracy, F1 = validate(val_loader=val_loader, model=model, criterion=criterion) # Did validation loss improve? # is_best = train_loss < best_loss # best_loss = min(train_loss, best_loss) # Did validation loss improve? is_best = F1 > best_F1 best_F1 = max(F1, best_F1) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint save_checkpoint(epoch, epochs_since_improvement, model, optimizer, val_loss, best_F1, is_best) with open('log.txt', 'a+') as f: f.write('epoch:' + str(epoch) + ' train loss:' + str(train_loss) + ' val loss:' + str(val_loss) + 'accuracy:' + str(accuracy) + '\n')
batch_size = 32 output_size = 2 hidden_size = 228 embedding_length = 300 num_epochs = 20 model = LSTMClassifier(vocab_size=vocab_size, output_size=output_size, embedding_dim=embedding_length, hidden_dim=hidden_size, n_layers=2, weights=word_embeddings) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model.to(device) optim = torch.optim.Adam(model.parameters(), lr=lr) loss = torch.nn.CrossEntropyLoss() train_loss, train_acc, val_loss, val_acc = train_model(model=model, train_iter=train_iter, val_iter=val_iter, optim=optim, loss=loss, num_epochs=num_epochs, batch_size=batch_size) model.load_state_dict(torch.load('state_dict.pth')) model.eval() results_target = list()
def run(proc_id, n_gpus, devices, args): set_seed(args.seed) dev_id = devices[proc_id] if n_gpus > 1: dist_init_method = 'tcp://{master_ip}:{master_port}'.format( master_ip='127.0.0.1', master_port=args.tcp_port) world_size = n_gpus torch.distributed.init_process_group(backend="nccl", init_method=dist_init_method, world_size=world_size, rank=dev_id) device = torch.device(dev_id) dataset = Dataset( proc_id=proc_id, data_dir=args.save_dir, train_fname=args.train_fname, preprocessed=args.preprocessed, lower=args.lower, vocab_max_size=args.vocab_max_size, emb_dim=args.emb_dim, save_vocab_fname=args.save_vocab_fname, verbose=True, ) train_dl, valid_dl, test_dl = \ dataset.get_dataloader(proc_id=proc_id, n_gpus=n_gpus, device=device, batch_size=args.batch_size) validator = Validator(dataloader=valid_dl, save_dir=args.save_dir, save_log_fname=args.save_log_fname, save_model_fname=args.save_model_fname, valid_or_test='valid', vocab_itos=dataset.INPUT.vocab.itos, label_itos=dataset.TGT.vocab.itos) tester = Validator(dataloader=test_dl, save_log_fname=args.save_log_fname, save_dir=args.save_dir, valid_or_test='test', vocab_itos=dataset.INPUT.vocab.itos, label_itos=dataset.TGT.vocab.itos) predictor = Predictor(args.save_vocab_fname) if args.load_model: predictor.use_pretrained_model(args.load_model, device=device) import pdb pdb.set_trace() predictor.pred_sent(dataset.INPUT) tester.final_evaluate(predictor.model) return model = LSTMClassifier(emb_vectors=dataset.INPUT.vocab.vectors, emb_dropout=args.emb_dropout, lstm_dim=args.lstm_dim, lstm_n_layer=args.lstm_n_layer, lstm_dropout=args.lstm_dropout, lstm_combine=args.lstm_combine, linear_dropout=args.linear_dropout, n_linear=args.n_linear, n_classes=len(dataset.TGT.vocab)) if args.init_xavier: model.apply(init_weights) model = model.to(device) args = model_setup(proc_id, model, args) train(proc_id, n_gpus, model=model, train_dl=train_dl, validator=validator, tester=tester, epochs=args.epochs, lr=args.lr, weight_decay=args.weight_decay) if proc_id == 0: predictor.use_pretrained_model(args.save_model_fname, device=device) bookkeep(predictor, validator, tester, args, dataset.INPUT)
class Trainer: def __init__(self, config, n_gpu, vocab, train_loader=None, val_loader=None): self.config = config self.vocab = vocab self.n_gpu = n_gpu self.train_loader = train_loader self.val_loader = val_loader # Build model vocab_size = self.vocab.vocab_size() self.model = LSTMClassifier(self.config, vocab_size, self.config.n_label) self.model.to(device) if self.n_gpu > 1: self.model = nn.DataParallel(self.model) # Build optimizer self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.lr) # Build criterion self.criterion = nn.CrossEntropyLoss() def train(self): best_f1 = 0.0 best_acc = 0.0 global_step = 0 batch_f1 = [] batch_acc = [] for epoch in range(self.config.num_epoch): batch_loss = [] for step, batch in enumerate(self.train_loader): self.model.train() batch = tuple(t.to(device) for t in batch) batch = sort_batch(batch) input_ids, input_lengths, labels = batch outputs = self.model(input_ids, input_lengths) loss = self.criterion( outputs['logits'].view(-1, self.config.n_label), labels.view(-1)) f1, acc = ic_metric(labels.cpu(), outputs['predicted_intents'].cpu()) if self.n_gpu > 1: loss = loss.mean() loss.backward() self.optimizer.step() self.optimizer.zero_grad() global_step += 1 batch_loss.append(loss.float().item()) batch_f1.append(f1) batch_acc.append(acc) if (global_step == 1) or (global_step % self.config.log_interval == 0): mean_loss = np.mean(batch_loss) mean_f1 = np.mean(batch_f1) mean_acc = np.mean(batch_acc) batch_loss = [] nsml.report(summary=True, scope=locals(), epoch=epoch, train_loss=mean_loss, step=global_step) if (global_step > 0) and (global_step % self.config.val_interval == 0): val_loss, val_f1, val_acc = self.evaluation() nsml.report(summary=True, scope=locals(), epoch=epoch, val_loss=val_loss, val_f1=val_f1, val_acc=val_acc, step=global_step) if val_f1 > best_f1: best_f1 = val_f1 best_acc = val_acc nsml.save(global_step) def evaluation(self): self.model.eval() total_loss = [] preds = [] targets = [] with torch.no_grad(): for step, batch in enumerate(self.val_loader): batch = tuple(t.to(device) for t in batch) batch = sort_batch(batch) input_ids, input_lengths, labels = batch outputs = self.model(input_ids, input_lengths) loss = self.criterion( outputs['logits'].view(-1, self.config.n_label), labels.view(-1)) pred = outputs['predicted_intents'].squeeze( -1).cpu().numpy().tolist() target = labels.cpu().numpy().tolist() preds.extend(pred) targets.extend(target) total_loss.append(loss.float().item()) mean_loss = np.mean(total_loss) mean_f1, mean_acc = ic_metric(targets, preds) return mean_loss, mean_f1, mean_acc
# Determine the device and construct the model. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = LSTMClassifier(model_info['embedding_dim'], model_info['hidden_dim'], model_info['vocab_size']) # Load the stored model parameters. model_path = os.path.join(model_dir, 'model.pth') with open(model_path, 'rb') as f: model.load_state_dict(torch.load(f)) # Load the saved word_dict. word_dict_path = os.path.join(model_dir, 'word_dict.pkl') with open(word_dict_path, 'rb') as f: model.word_dict = pickle.load(f) model.to(device).eval() print("Done loading model.") return model def _get_train_data_loader(batch_size, training_dir): print("Get train data loader.") train_data = pd.read_csv(os.path.join(training_dir, "train.csv"), header=None, names=None) train_y = torch.from_numpy(train_data[[0]].values).float().squeeze() train_X = torch.from_numpy(train_data.drop([0], axis=1).values).long() train_ds = torch.utils.data.TensorDataset(train_X, train_y) return torch.utils.data.DataLoader(train_ds, batch_size=batch_size)