Exemple #1
0
def main():
    data_generator = load_data()
    _history = []
    device = None
    model = None
    criterion = None
    fold_index = 0

    for TEXT, LABEL, train_data, val_data in data_generator.get_fold_data(num_folds=args['num_folds']):
        logger.info("***** Running Training *****")
        logger.info(f"Now fold: {fold_index + 1} / {args['num_folds']}")

        TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.300d")
        logger.info(f'Embedding size: {TEXT.vocab.vectors.size()}.')
        LABEL.build_vocab(train_data) # For converting str into float labels.

        model = Model(len(TEXT.vocab), args['embedding_dim'], args['hidden_dim'],
            args['output_dim'], args['num_layers'], args['dropout'], TEXT.vocab.vectors, args["embedding_trainable"])
        
        optimizer = optim.Adam(model.parameters())
        criterion = nn.BCEWithLogitsLoss()

        if args['gpu'] is True and args['gpu_number'] is not None:
            torch.cuda.set_device(args['gpu_number'])
            device = torch.device('cuda')
            model = model.to(device)
            criterion = criterion.to(device)
        else:
            device = torch.device('cpu')
            model = model.to(device)
            criterion = criterion.to(device)
        
        train_iterator = data.Iterator(train_data, batch_size=args['batch_size'], sort_key=lambda x: len(x.text), device=device)
        val_iterator = data.Iterator(val_data, batch_size=args['batch_size'], sort_key=lambda x: len(x.text), device=device)

        for epoch in range(args['epochs']):
            train_loss, train_acc = train_run(model, train_iterator, optimizer, criterion)
            logger.info(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        val_loss, val_acc = eval_run(model, val_iterator, criterion)
        logger.info(f'Val. Loss: {val_loss:.3f} | Val. Acc: {val_acc*100:.2f}% |')

        _history.append([val_loss, val_acc])
        fold_index += 1
    
    _history = np.asarray(_history)
    loss = np.mean(_history[:, 0])
    acc = np.mean(_history[:, 1])
    
    logger.info('***** Cross Validation Result *****')
    logger.info(f'LOSS: {loss}, ACC: {acc}')
def test():
    logger.info("***** Setup *****")
    logger.info(f"Configs: {args}")

    # make iterators
    data_proceessor = DataProcessor()
    test_data = data_proceessor.get_test_data(args['data_ratio'])
    test_iterator = DataLoader(test_data,
                               batch_size=args["batch_size"],
                               shuffle=True)

    # build a model
    model = Model(input_dim=28 * 28,
                  hidden_dim=args['hidden_dim'],
                  drop_rate=args['drop_rate'])

    # load weights
    model_dict = model.state_dict()
    weights_dict = torch.load(args["weight_name"])
    model.load_state_dict(weights_dict)

    # define an optimizer
    optimizer = optim.Adam(model.parameters())
    criterion = nn.BCEWithLogitsLoss()

    # for gpu environment
    if args['gpu'] is True and args['gpu_number'] is not None:
        torch.cuda.set_device(args['gpu_number'])
        device = torch.device('cuda')
        model = model.to(device)
        criterion = criterion.to(device)
    else:
        device = torch.device('cpu')
        model = model.to(device)
        criterion = criterion.to(device)

    logger.info(f"Number of testing samples: {len(test_iterator.dataset)}")

    logger.info("***** Testing *****")
    _, test_acc, test_auc, test_ap, test_eer, test_prec, test_rec, test_f1 = test_run(
        model, test_iterator, criterion, device)
    logger.info(
        f'| Test Accuracy: {test_acc:.3f} | Test AUC: {test_auc:.3f} | Test AP: {test_ap:.3f} | Test EER: {test_eer:.3f} | Test Precision: {test_prec:.3f} |  Test Recall: {test_rec:.3f} | Test F1: {test_f1:.3f} |'
    )
def train():
    logger.info("***** Setup *****")
    logger.info(f"Configs: {args}")

    # make iterators
    data_proceessor = DataProcessor()
    train_data, val_data, pos_weight = data_proceessor.get_data(
        args['split_rate'], args['data_ratio'], args['seed'])
    train_iterator = DataLoader(train_data,
                                batch_size=args["batch_size"],
                                shuffle=True)
    val_iterator = DataLoader(val_data,
                              batch_size=args["batch_size"],
                              shuffle=True)

    # build a model
    model = Model(input_dim=28 * 28,
                  hidden_dim=args['hidden_dim'],
                  drop_rate=args['drop_rate'])

    # define an optimizer
    optimizer = optim.Adam(model.parameters())
    if args['loss_correction'] is True:
        pos_weight = torch.tensor(pos_weight)
        criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    else:
        criterion = nn.BCEWithLogitsLoss()

    # additional settings (e.g., early stopping)
    early_stopping = EarlyStopping(logger,
                                   patience=args['patience'],
                                   verbose=True)

    # for gpu environment
    if args['gpu'] is True and args['gpu_number'] is not None:
        torch.cuda.set_device(args['gpu_number'])
        device = torch.device('cuda')
        model = model.to(device)
        criterion = criterion.to(device)
    else:
        device = torch.device('cpu')
        model = model.to(device)
        criterion = criterion.to(device)

    logger.info(f"Number of training samples: {len(train_iterator.dataset)}")
    logger.info(f"Number of validation samples: {len(val_iterator.dataset)}")

    logger.info("***** Training *****")
    _history = []
    for epoch in range(args['epochs']):
        train_loss, train_acc = train_run(model, train_iterator, optimizer,
                                          criterion, device)
        logger.info(
            f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.3f}% |'
        )
        val_loss, val_acc = eval_run(model, val_iterator, criterion, device)
        logger.info(
            f'| Val. Loss: {val_loss:.3f} | Val. Acc: {val_acc*100:.3f}% |')
        _history.append([train_loss, train_acc, val_loss, val_acc])

        # early stopping
        early_stopping(val_loss)
        if early_stopping.early_stop:
            logger.info(f'\tEarly stopping at {epoch+1:02}')
            if args['save_model'] is True:
                save_model(model)
            break

    else:  # end of the for loop
        if args['save_model'] is True:
            save_model(model)

    logger.info("***** Evaluation *****")
    # plot loss
    _history = np.array(_history)
    plt.figure()
    plt.plot(np.arange(len(_history)), _history[:, 0], label="train")
    plt.plot(np.arange(len(_history)), _history[:, 2], label='validation')
    plt.grid(True)
    plt.legend()
    plt.title("Training Monitoring")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.savefig('./fig/{}/loss.png'.format(run_start_time),
                bbox_inches="tight",
                pad_inches=0.1)

    # draw figures for evaluation
    _, _, val_auc, val_ap, val_eer, val_prec, val_rec, val_f1 = test_run(
        model, val_iterator, criterion, device)
    logger.info(
        f'| Val. AUC: {val_auc:.3f} | Val. AP: {val_ap:.3f} | Val. EER: {val_eer:.3f} | Val. Precision: {val_prec:.3f} |  Val. Recall: {val_rec:.3f} | Val. F1: {val_f1:.3f} |'
    )