def model_fn(model_dir):
    """Load the PyTorch model from the `model_dir` directory."""
    print("Loading model.")

    # First, load the parameters used to create the model.
    model_info = {}
    model_info_path = os.path.join(model_dir, 'model_info.pth')
    with open(model_info_path, 'rb') as f:
        model_info = torch.load(f)

    print("model_info: {}".format(model_info))

    # Determine the device and construct the model.
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = LSTMClassifier(model_info['embedding_dim'],
                           model_info['hidden_dim'], model_info['vocab_size'])

    # Load the stored model parameters.
    model_path = os.path.join(model_dir, 'model.pth')
    with open(model_path, 'rb') as f:
        model.load_state_dict(torch.load(f))

    # Load the saved word_dict.
    word_dict_path = os.path.join(model_dir, 'word_dict.pkl')
    with open(word_dict_path, 'rb') as f:
        model.word_dict = pickle.load(f)

    model.to(device).eval()

    print("Done loading model.")
    return model
def model_fn(model_dir):
    """
    Load the PyTorch model from the `model_dir` directory
    """

    # Begin loading model:
    print("Loading model: Beginning...\n")

    # First, load the parameters used to create the model:
    model_info = {}
    model_info_path = os.path.join(model_dir, 'model_info.pth')
    with open(model_info_path, 'rb') as f:
        model_info = torch.load(f)
    print("*** Model info: {}".format(model_info))

    # Determine the device:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("*** Device: {}".format(device))

    # Construct the model:
    model = LSTMClassifier(model_info['embedding_dim'],
                           model_info['hidden_dim'], model_info['vocab_size'])

    # Load the store model parameters:
    model_path = os.path.join(model_dir, 'model.pth')
    with open(model_path, 'rb') as f:
        model.load_state_dict(torch.load(f))

    # Load the saved word_dict:
    word_dict_path = os.path.join(model_dir, 'word_dict.pkl')
    with open(word_dict_path, 'rb') as f:
        model.word_dict = pickle.load(f)

    # Move to evaluation mode:
    model.to(device).eval()

    # Print built model:
    print("*** Model:\n{}".format(model))

    # End loading model:
    print("\nLoading model: Done...")

    # Return model:
    return model
Esempio n. 3
0
def main():
    """
    Training and validation.
    """
    global epochs_since_improvement, start_epoch, label_map, best_F1, epoch, checkpoint

    # Initialize model or load checkpoint
    if checkpoint is None:
        model = LSTMClassifier()
        # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
        biases = list()
        not_biases = list()
        for param_name, param in model.named_parameters():
            if param.requires_grad:
                if param_name.endswith('.bias'):
                    biases.append(param)
                else:
                    not_biases.append(param)
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=lr,
                                     betas=(0.9, 0.99))
        # optimizer = torch.optim.SGD(params=[{'params': biases, 'lr': 2 * lr}, {'params': not_biases}],
        #lr=lr, momentum=momentum, weight_decay=weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch']
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_F1 = checkpoint['best_F1']
        print('\nLoaded checkpoint from epoch %d. Best F1 so far is %.3f.\n' %
              (start_epoch, best_F1))
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    # Move to default device
    model = model.to(device)
    print(model)

    # criterion = torch.nn.CrossEntropyLoss()
    criterion = FocalLoss()

    # Custom dataloaders
    train_dataset = ICDARDataset(data_folder, split='train')
    val_dataset = ICDARDataset(data_folder, split='test')
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=train_dataset.collate_fn,
        num_workers=workers,
        pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             collate_fn=val_dataset.collate_fn,
                                             num_workers=workers,
                                             pin_memory=True)

    # Epochs
    for epoch in range(start_epoch, epochs):
        # One epoch's training
        train_loss = train(train_loader=train_loader,
                           model=model,
                           criterion=criterion,
                           optimizer=optimizer,
                           epoch=epoch)

        # One epoch's validation
        val_loss, accuracy, F1 = validate(val_loader=val_loader,
                                          model=model,
                                          criterion=criterion)

        # Did validation loss improve?
        # is_best = train_loss < best_loss
        # best_loss = min(train_loss, best_loss)

        # Did validation loss improve?
        is_best = F1 > best_F1
        best_F1 = max(F1, best_F1)

        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))

        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, optimizer,
                        val_loss, best_F1, is_best)

        with open('log.txt', 'a+') as f:
            f.write('epoch:' + str(epoch) + '  train loss:' + str(train_loss) +
                    '  val loss:' + str(val_loss) + 'accuracy:' +
                    str(accuracy) + '\n')
Esempio n. 4
0
batch_size = 32
output_size = 2
hidden_size = 228
embedding_length = 300
num_epochs = 20

model = LSTMClassifier(vocab_size=vocab_size,
                       output_size=output_size,
                       embedding_dim=embedding_length,
                       hidden_dim=hidden_size,
                       n_layers=2,
                       weights=word_embeddings)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model.to(device)
optim = torch.optim.Adam(model.parameters(), lr=lr)
loss = torch.nn.CrossEntropyLoss()

train_loss, train_acc, val_loss, val_acc = train_model(model=model,
                                                       train_iter=train_iter,
                                                       val_iter=val_iter,
                                                       optim=optim,
                                                       loss=loss,
                                                       num_epochs=num_epochs,
                                                       batch_size=batch_size)

model.load_state_dict(torch.load('state_dict.pth'))
model.eval()
results_target = list()
def run(proc_id, n_gpus, devices, args):
    set_seed(args.seed)
    dev_id = devices[proc_id]

    if n_gpus > 1:
        dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
            master_ip='127.0.0.1', master_port=args.tcp_port)
        world_size = n_gpus
        torch.distributed.init_process_group(backend="nccl",
                                             init_method=dist_init_method,
                                             world_size=world_size,
                                             rank=dev_id)
    device = torch.device(dev_id)

    dataset = Dataset(
        proc_id=proc_id,
        data_dir=args.save_dir,
        train_fname=args.train_fname,
        preprocessed=args.preprocessed,
        lower=args.lower,
        vocab_max_size=args.vocab_max_size,
        emb_dim=args.emb_dim,
        save_vocab_fname=args.save_vocab_fname,
        verbose=True,
    )
    train_dl, valid_dl, test_dl = \
        dataset.get_dataloader(proc_id=proc_id, n_gpus=n_gpus, device=device,
                               batch_size=args.batch_size)

    validator = Validator(dataloader=valid_dl,
                          save_dir=args.save_dir,
                          save_log_fname=args.save_log_fname,
                          save_model_fname=args.save_model_fname,
                          valid_or_test='valid',
                          vocab_itos=dataset.INPUT.vocab.itos,
                          label_itos=dataset.TGT.vocab.itos)
    tester = Validator(dataloader=test_dl,
                       save_log_fname=args.save_log_fname,
                       save_dir=args.save_dir,
                       valid_or_test='test',
                       vocab_itos=dataset.INPUT.vocab.itos,
                       label_itos=dataset.TGT.vocab.itos)
    predictor = Predictor(args.save_vocab_fname)

    if args.load_model:
        predictor.use_pretrained_model(args.load_model, device=device)
        import pdb
        pdb.set_trace()

        predictor.pred_sent(dataset.INPUT)
        tester.final_evaluate(predictor.model)

        return

    model = LSTMClassifier(emb_vectors=dataset.INPUT.vocab.vectors,
                           emb_dropout=args.emb_dropout,
                           lstm_dim=args.lstm_dim,
                           lstm_n_layer=args.lstm_n_layer,
                           lstm_dropout=args.lstm_dropout,
                           lstm_combine=args.lstm_combine,
                           linear_dropout=args.linear_dropout,
                           n_linear=args.n_linear,
                           n_classes=len(dataset.TGT.vocab))
    if args.init_xavier: model.apply(init_weights)
    model = model.to(device)
    args = model_setup(proc_id, model, args)

    train(proc_id,
          n_gpus,
          model=model,
          train_dl=train_dl,
          validator=validator,
          tester=tester,
          epochs=args.epochs,
          lr=args.lr,
          weight_decay=args.weight_decay)

    if proc_id == 0:
        predictor.use_pretrained_model(args.save_model_fname, device=device)
        bookkeep(predictor, validator, tester, args, dataset.INPUT)
Esempio n. 6
0
class Trainer:
    def __init__(self,
                 config,
                 n_gpu,
                 vocab,
                 train_loader=None,
                 val_loader=None):
        self.config = config
        self.vocab = vocab
        self.n_gpu = n_gpu
        self.train_loader = train_loader
        self.val_loader = val_loader

        # Build model
        vocab_size = self.vocab.vocab_size()

        self.model = LSTMClassifier(self.config, vocab_size,
                                    self.config.n_label)
        self.model.to(device)

        if self.n_gpu > 1:
            self.model = nn.DataParallel(self.model)

        # Build optimizer
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.config.lr)

        # Build criterion
        self.criterion = nn.CrossEntropyLoss()

    def train(self):
        best_f1 = 0.0
        best_acc = 0.0
        global_step = 0
        batch_f1 = []
        batch_acc = []
        for epoch in range(self.config.num_epoch):
            batch_loss = []
            for step, batch in enumerate(self.train_loader):
                self.model.train()
                batch = tuple(t.to(device) for t in batch)
                batch = sort_batch(batch)
                input_ids, input_lengths, labels = batch

                outputs = self.model(input_ids, input_lengths)
                loss = self.criterion(
                    outputs['logits'].view(-1, self.config.n_label),
                    labels.view(-1))

                f1, acc = ic_metric(labels.cpu(),
                                    outputs['predicted_intents'].cpu())

                if self.n_gpu > 1:
                    loss = loss.mean()

                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()

                global_step += 1
                batch_loss.append(loss.float().item())
                batch_f1.append(f1)
                batch_acc.append(acc)

                if (global_step
                        == 1) or (global_step % self.config.log_interval == 0):
                    mean_loss = np.mean(batch_loss)
                    mean_f1 = np.mean(batch_f1)
                    mean_acc = np.mean(batch_acc)
                    batch_loss = []
                    nsml.report(summary=True,
                                scope=locals(),
                                epoch=epoch,
                                train_loss=mean_loss,
                                step=global_step)

                if (global_step > 0) and (global_step %
                                          self.config.val_interval == 0):
                    val_loss, val_f1, val_acc = self.evaluation()
                    nsml.report(summary=True,
                                scope=locals(),
                                epoch=epoch,
                                val_loss=val_loss,
                                val_f1=val_f1,
                                val_acc=val_acc,
                                step=global_step)

                    if val_f1 > best_f1:
                        best_f1 = val_f1
                        best_acc = val_acc
                        nsml.save(global_step)

    def evaluation(self):
        self.model.eval()
        total_loss = []
        preds = []
        targets = []
        with torch.no_grad():
            for step, batch in enumerate(self.val_loader):
                batch = tuple(t.to(device) for t in batch)
                batch = sort_batch(batch)
                input_ids, input_lengths, labels = batch

                outputs = self.model(input_ids, input_lengths)
                loss = self.criterion(
                    outputs['logits'].view(-1, self.config.n_label),
                    labels.view(-1))

                pred = outputs['predicted_intents'].squeeze(
                    -1).cpu().numpy().tolist()
                target = labels.cpu().numpy().tolist()

                preds.extend(pred)
                targets.extend(target)
                total_loss.append(loss.float().item())

        mean_loss = np.mean(total_loss)
        mean_f1, mean_acc = ic_metric(targets, preds)
        return mean_loss, mean_f1, mean_acc
Esempio n. 7
0
​
    # Determine the device and construct the model.
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = LSTMClassifier(model_info['embedding_dim'], model_info['hidden_dim'], model_info['vocab_size'])
​
    # Load the stored model parameters.
    model_path = os.path.join(model_dir, 'model.pth')
    with open(model_path, 'rb') as f:
        model.load_state_dict(torch.load(f))
​
    # Load the saved word_dict.
    word_dict_path = os.path.join(model_dir, 'word_dict.pkl')
    with open(word_dict_path, 'rb') as f:
        model.word_dict = pickle.load(f)
​
    model.to(device).eval()
​
    print("Done loading model.")
    return model
​
def _get_train_data_loader(batch_size, training_dir):
    print("Get train data loader.")
​
    train_data = pd.read_csv(os.path.join(training_dir, "train.csv"), header=None, names=None)
​
    train_y = torch.from_numpy(train_data[[0]].values).float().squeeze()
    train_X = torch.from_numpy(train_data.drop([0], axis=1).values).long()
​
    train_ds = torch.utils.data.TensorDataset(train_X, train_y)
​
    return torch.utils.data.DataLoader(train_ds, batch_size=batch_size)