Exemple #1
0
def main():
    # parser
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", "--model_config", required=False,
                        default="deep_dnn.json",
                        help="Model Architecture .json")
    parser.add_argument('-s', "--setup", default="agressive_setup.json",
                        help="Experimental Setup .json")
    args = parser.parse_args()

    # config paths
    model_config = 'configs/model_architecture/' + args.model_config
    setup_path = "configs/experimental_setup/" + args.setup

    print(model_config)

    # Hyper Parameters
    setup = load_config(setup_path)
    train_setup = setup["Train"]
    prune_setup = setup["Prune"]

    batch_size = train_setup["batch_size"]
    epochs = train_setup["training_epochs"]
    lr = train_setup["learning_rate"]
    datatype = train_setup["datatype"]
    feat_size = train_setup["feature_size"]
    n_samples = train_setup["n_samples"]
    n_classes = train_setup["n_classes"]
    val_ratio = train_setup["val_ratio"]
    test_ratio = train_setup["test_ratio"]
    
    labels = 1
    if datatype == "multilabel":
        labels = train_setup["labels_per_sample"]

    # CUDA for PyTorch
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Dataloaders
    train_loader, val_loader, test_loader = \
        CreateRandomDataset(datatype,
                            feat_size,
                            n_samples,
                            n_classes,
                            val_ratio,
                            test_ratio,
                            batch_size,
                            labels).get_dataloaders()

    data_loaders = {"train": train_loader,
                    "val": val_loader, 
                    "test": test_loader}

    # Init model
    model = DNN(config=model_config,
                in_features=feat_size,
                n_classes=n_classes)

    if torch.cuda.is_available():
        print('CUDA enabled.')
        model.cuda()
    print("--- DNN network initialized ---")
    print(model)

    # Criterion/Optimizer/Pruner
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    weight_pruner = WeightPruner(model)

    # Train the model from scratch
    print("--- Training DNN ---")
    train_losses, val_losses = \
        train_eval(model, criterion, optimizer, epochs, train_loader, val_loader)
    test_acc, test_loss = test(model, test_loader, criterion)

    learn_curves(train_losses, val_losses, "loss_fig.png")

    iterative_pruning(model, weight_pruner, criterion, data_loaders, prune_setup)
def main():

    parser = argparse.ArgumentParser(
        description='Emotion recognition on CMU-MOSEI')
    parser.add_argument('--seed', default=5566, type=int, help="Random seed")
    parser.add_argument('--save_dir', default='./results', type=str, help="")
    parser.add_argument('--num_epochs',
                        default=20,
                        type=int,
                        help="Number of training epochs")
    parser.add_argument('--dropout', default=0.5, type=float, help="")
    parser.add_argument('--min_ir',
                        default=2,
                        type=float,
                        help="Minimum imbalance ratio")
    parser.add_argument('--lr', default=0.5, type=float, help="")
    parser.add_argument('--activation', default='relu', type=str, help="")
    parser.add_argument('--batch_size', default=32, type=int, help="")
    parser.add_argument('--layers',
                        default='512.512.256.256.128.128',
                        type=str,
                        help="Comma-separted list of hidden dimensions")
    parser.add_argument('--gamma',
                        default=1,
                        type=float,
                        help="Weight for negative class")
    parser.add_argument('--dataset', default=None, type=str, help="Dataset")
    parser.add_argument('--verbose',
                        default=False,
                        action='store_true',
                        help="Verbose")

    args = parser.parse_args()

    options['num_epochs'] = args.num_epochs
    options['dropout'] = args.dropout
    options['activation'] = args.activation
    options['batch_size'] = args.batch_size
    options['layers'] = [int(x) for x in args.layers.split('.')]
    options['gamma'] = args.gamma
    options['lr'] = args.lr
    options['min_ir'] = args.min_ir

    grad_clip_value = 10.0

    CUDA = True
    import torch
    torch.manual_seed(args.seed)
    import torch.nn as nn
    import torch.nn.functional as F
    from torch.optim import Adam, Adagrad

    from torch.utils.data import Dataset, DataLoader

    from losses import WeightedBCELoss
    from models import DNN, mosei_dataset

    n_epochs = options['num_epochs']
    batch_size = options['batch_size']
    verbose = args.verbose

    save_dir = args.save_dir
    ckpt_path = os.path.join(save_dir, "checkpoint.pt")
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    with open(os.path.join(save_dir, 'options.json'), 'w') as fout:
        fout.write(json.dumps(options))

    class_weight = torch.Tensor(options['class_weights'])
    class_weight = class_weight / torch.sum(class_weight)
    gamma = torch.Tensor([options['gamma']])

    if CUDA:
        class_weight = class_weight.cuda()
        gamma = gamma.cuda()

    model = DNN(options)
    if CUDA:
        model.cuda()
    if verbose: print(model)

    model.train()

    dataset = pickle.load(open(args.dataset, "rb"))

    train_dataset = mosei_dataset(dataset,
                                  splits=train_split,
                                  oversample=True,
                                  min_ir=options['min_ir'])
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=4)
    if verbose: print(f"Train set: {len(train_dataset)} samples")

    val_dataset = mosei_dataset(dataset, splits=dev_split)
    val_loader = DataLoader(val_dataset,
                            batch_size=256,
                            shuffle=False,
                            num_workers=4)
    if verbose: print(f"Val set: {len(val_dataset)} samples")

    test_dataset = mosei_dataset(dataset, splits=test_split)
    test_loader = DataLoader(test_dataset,
                             batch_size=256,
                             shuffle=False,
                             num_workers=4)
    if verbose: print(f"Test set: {len(test_dataset)} samples")

    optimizer = Adam(
        [param for param in model.parameters() if param.requires_grad],
        lr=options['lr'])

    criterion = WeightedBCELoss(class_weight=class_weight,
                                PosWeightIsDynamic=True,
                                gamma=gamma)

    best_val = np.Inf
    best_metric = 0

    train_labels = []

    for epoch_no in range(n_epochs):
        total_pos = 0
        model.train()
        for batch_no, batch in enumerate(train_loader, start=1):
            embeddings, labels = batch
            if CUDA:
                embeddings, labels = embeddings.cuda(), labels.cuda()

            y_hat = model(embeddings)
            loss = criterion(y_hat, labels)
            loss.backward()
            # torch.nn.utils.clip_grad_norm_([param for param in model.parameters() if param.requires_grad], grad_clip_value)
            optimizer.step()
            optimizer.zero_grad()

            if batch_no % 200 == 0:
                if verbose: print(f"Training loss: {loss.item():.5f}")

                y_true, y_pred, val_loss = [], [], []
                model.eval()
                for batch in val_loader:
                    embeddings, labels = batch
                    if CUDA:
                        embeddings, labels = embeddings.cuda(), labels.cuda()
                    y_hat = model(embeddings)
                    loss = criterion(y_hat, labels)
                    val_loss.append(loss.item())

                    y_true.append(labels.detach().cpu().numpy())
                    y_pred.append(y_hat.detach().cpu().numpy())

                    assert not np.any(np.isnan(val_loss))

                y_true = np.concatenate(y_true, axis=0).squeeze()
                y_pred = np.concatenate(y_pred, axis=0).squeeze()
                y_true_bin = y_true > 0
                y_pred_bin = y_pred > 0

                val_loss = np.average(val_loss)
                f1score = [
                    f1_score(t, p, average="weighted")
                    for t, p in zip(y_true_bin.T, y_pred_bin.T)
                ]
                wa = np.average(weighted_accuracy(y_true_bin, y_pred_bin))

                val_metric = np.average(f1score) + np.average(wa)
                f1score = [f'{x*100:.2f}' for x in f1score]
                if verbose: print(f"Validation loss: {val_loss:.3f}")

                if best_metric < val_metric:
                    if verbose: print("Validation metric improved")
                    best_metric = val_metric
                    checkpoint = {
                        'options': options,
                        'model': model,
                        'epoch': epoch_no
                    }
                    torch.save(checkpoint, ckpt_path)

                model.train()

    # ====================================================================================================
    # Final Validation
    # ====================================================================================================

    checkpoint = torch.load(ckpt_path)
    model = checkpoint['model']
    if verbose:
        print("Loaded best model from epoch {}".format(checkpoint['epoch']))
    model.eval()

    val_true = []
    val_pred = []

    for batch in val_loader:
        embeddings, labels = batch
        if CUDA:
            embeddings, labels = embeddings.cuda(), labels.cuda()
        val_hat = model(embeddings)

        val_true.append(labels.detach().cpu().numpy())
        val_pred.append(val_hat.detach().cpu().numpy())

    val_true = np.concatenate(val_true, axis=0).squeeze()
    val_pred = np.concatenate(val_pred, axis=0).squeeze()

    val_true_bin = val_true > 0
    val_pred_bin = val_pred > 0

    wa = [
        weighted_accuracy(t, p) * 100
        for t, p in zip(val_true_bin.T, val_pred_bin.T)
    ]
    f1score = [
        f1_score(t, p, average="weighted") * 100
        for t, p in zip(val_true_bin.T, val_pred_bin.T)
    ]

    if verbose:
        print(f"Val WA, {reformat_array(wa)} Avg: {np.average(wa):.2f}")
    if verbose:
        print(
            f"Val F1, {reformat_array(f1score)} Avg: {np.average(f1score):.2f} "
        )

    # ====================================================================================================
    # Final Test
    # ====================================================================================================
    test_true = []
    test_pred = []

    for batch in test_loader:
        embeddings, labels = batch
        if CUDA:
            embeddings, labels = embeddings.cuda(), labels.cuda()

        pred = model(embeddings)

        test_true.append(labels.detach().cpu().numpy())
        test_pred.append(pred.detach().cpu().numpy())

    test_true = np.concatenate(test_true, axis=0).squeeze()
    test_pred = np.concatenate(test_pred, axis=0).squeeze()

    test_true_bin = test_true > 0  # Binarized
    test_pred_bin = test_pred > 0  # Logit outputs

    test_wa = [
        weighted_accuracy(t, p)
        for t, p in zip(test_true_bin.T, test_pred_bin.T)
    ]
    test_wa = reorder_labels(test_wa)

    test_f1score = [
        f1_score(t, p, average="weighted")
        for t, p in zip(test_true_bin.T, test_pred_bin.T)
    ]
    test_f1score = reorder_labels(test_f1score)

    test_wa_str = [f'{x*100:.2f}' for x in test_wa]
    test_f1score_str = [f'{x*100:.2f}' for x in test_f1score]

    print(f"Test WA: {test_wa_str} Avg: {np.average(test_wa)*100:2.1f}")
    print(
        f"Test F1: {test_f1score_str} Avg: {np.average(test_f1score)*100:2.1f}"
    )

    combined = [f" {x} & {y} " for x, y in zip(test_wa_str, test_f1score_str)]
    combined.append(
        f" {np.average(test_wa)*100:2.1f} & {np.average(test_f1score)*100:2.1f}"
    )
    if verbose: print("&".join(combined))