Example #1
0
    def _test(n_epochs):
        n_iters = 80
        s = 16
        n_classes = 10

        offset = n_iters * s
        y_true = torch.randint(0,
                               n_classes,
                               size=(offset *
                                     idist.get_world_size(), )).to(device)
        y_preds = torch.rand(offset * idist.get_world_size(),
                             n_classes).to(device)

        def update(engine, i):
            return (
                y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, :],
                y_true[i * s + rank * offset:(i + 1) * s + rank * offset],
            )

        engine = Engine(update)

        acc = Accuracy()
        acc.attach(engine, "acc")

        data = list(range(n_iters))
        engine.run(data=data, max_epochs=n_epochs)

        assert "acc" in engine.state.metrics
        res = engine.state.metrics["acc"]
        if isinstance(res, torch.Tensor):
            res = res.cpu().numpy()

        true_res = accuracy_score(y_true.cpu().numpy(),
                                  torch.argmax(y_preds, dim=1).cpu().numpy())

        assert pytest.approx(res) == true_res
Example #2
0
    def _test(n_epochs, metric_device):
        metric_device = torch.device(metric_device)
        n_iters = 80
        s = 16
        n_classes = 10

        offset = n_iters * s
        y_true = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 8, 10)).to(device)
        y_preds = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 8, 10)).to(device)

        def update(engine, i):
            return (
                y_preds[i * s + rank * offset : (i + 1) * s + rank * offset, ...],
                y_true[i * s + rank * offset : (i + 1) * s + rank * offset, ...],
            )

        engine = Engine(update)

        acc = Accuracy(is_multilabel=True, device=metric_device)
        acc.attach(engine, "acc")

        data = list(range(n_iters))
        engine.run(data=data, max_epochs=n_epochs)

        assert (
            acc._num_correct.device == metric_device
        ), f"{type(acc._num_correct.device)}:{acc._num_correct.device} vs {type(metric_device)}:{metric_device}"

        assert "acc" in engine.state.metrics
        res = engine.state.metrics["acc"]
        if isinstance(res, torch.Tensor):
            res = res.cpu().numpy()

        true_res = accuracy_score(to_numpy_multilabel(y_true), to_numpy_multilabel(y_preds))

        assert pytest.approx(res) == true_res
Example #3
0
def adv_prune_train_loop(model, params, ds, dset, min_y, base_data, model_id, prune_type, device, batch_size, tpa, max_epochs=5):
    #assert prune_type in ['global_unstructured', 'structured']
    total_prune_amount = tpa
    remove_amount = tpa
    ds_train, ds_valid = ds
    train_set, valid_set = dset
    min_y_train, min_y_val = min_y
    train_set, valid_set = dset
    total_prune_amount = tpa
    original_model = copy.deepcopy(model)
    original_model.eval()
    model_id = f'{model_id}_{prune_type}_pruning_{tpa}_l1'
    valid_freq = 200 * 500 // batch_size // 3

    conv_layers = [model.conv1]
    for sequential in [model.layer1, model.layer2, model.layer3, model.layer4]:
        for bottleneck in sequential:
            conv_layers.extend([bottleneck.conv1, bottleneck.conv2, bottleneck.conv3])
    conv_layers = conv_layers[:22]
    def prune_model(model):
        print(f'pruned model by {total_prune_amount}')
        if prune_type == 'global_unstructured':
            parameters_to_prune = [(layer, 'weight') for layer in conv_layers]
            prune.global_unstructured(
                parameters_to_prune,
                pruning_method=prune.L1Unstructured,
                amount=total_prune_amount,
            )
        else:
            for layer in conv_layers:

    prune_model(model)

    def valid_eval(model, dataset, dataloader, device, label):
        right = 0
        total = 0
        model.eval()
        with torch.no_grad():
            for i, data in tqdm(enumerate(dataloader), total=len(dataset) / dataloader.batch_size):
                data, y = data
                data = data.to(device)
                y = y.to(device) - label
                ans = model.forward(data)
                right += torch.sum(torch.eq(torch.argmax(ans, dim=1), y))
                total += y.shape[0]
        return right/total
    valid_acc = valid_eval(model, valid_set, ds_valid, device, min_y_val)
    print('initial accuracy:', valid_acc.item())
    
    with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

#         attack = GradientSignAttack(original_model, loss_fn=loss, eps=0.2)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
#             with ctx_noparamgrad_and_eval(model):
#                 x_adv = attack.perturb(x, y)
#             optimizer.zero_grad()
#             x = torch.cat((x, x_adv))
#             y = torch.cat((y, y))
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            with torch.no_grad():
                for layer in conv_layers:
                    layer.weight *= layer.weight_mask
            return l.item()

        trainer = Engine(train_step)

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
#             x_adv = attack.perturb(x, y)
#             x = torch.cat((x, x_adv))
#             y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
#             x_adv = attack.perturb(x, y)
#             x = torch.cat((x, x_adv))
#             y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq))
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                  .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=100))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 50) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}"
                      .format(engine.state.epoch, iter, len(ds_train), accuracy, nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch)

        @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(to_save, DiskSaver(os.path.join(base_data, model_id),
                                                create_dir=True),
                             score_function=validation_value, score_name="val_acc",
                             global_step_transform=global_step_from_engine(trainer),
                             n_saved=None)

        # kick everything off
        trainer.add_event_handler(Events.ITERATION_COMPLETED(every=valid_freq), handler)
        trainer.run(ds_train, max_epochs=max_epochs)
Example #4
0
def run(train_batch_size, test_batch_size, epochs, lr, log_interval, log_dir,
        no_cuda, sub_spectrogram_size, sub_spectrogram_mel_hop, n_mel_bins,
        seed, root_dir, train_dir, eval_dir):
    """
	Model runner

	Parameters
	----------
	train_batch_size : int
		Size of the training batch. Default: 16

	test_batch_size : int
		size of the testing batch. Default: 16

	epochs : int
		Number of training epochs. Default: 200

	lr : float
		Learning rate for the ADAM optimizer. Default: 0.001

	log_interval : int
		Interval for logging data: Default: 10

	log_dir : str
		Directory to save the logs

	no_cuda : Bool
		Should you NOT use cuda? Default: False

	sub_spectrogram_size : int
		Size of the SubSpectrogram. Default 20
		
	sub_spectrogram_mel_hop : int
		Mel-bin hop size of the SubSpectrogram. Default 10

	n_mel_bins : int
		Number of mel-bins of the Spectrogram extracted. Default: 40.

	seed : int
		Torch random seed value, for reproducable results. Default: 1

	root_dir : str
		Directory of the folder which contains the dataset (has 'audio' and 'evaluation_setup' folders inside)

	train_dir : str
		Set as default: 'evaluation_setup/train_fold1.txt'

	eval_dir : str
		Set as default: 'evaluation_setup/evaluate_fold1.txt'
	"""

    # check if possible to use CUDA
    use_cuda = not no_cuda and torch.cuda.is_available()

    # set seed
    torch.manual_seed(seed)

    # Map to GPU
    device = torch.device("cuda" if use_cuda else "cpu")

    # Load the data loaders
    train_loader, val_loader = get_data_loaders(train_batch_size,
                                                test_batch_size,
                                                sub_spectrogram_size,
                                                sub_spectrogram_mel_hop,
                                                n_mel_bins, use_cuda, root_dir,
                                                train_dir, eval_dir)

    # Get the model
    model = SubSpectralNet(sub_spectrogram_size, sub_spectrogram_mel_hop,
                           n_mel_bins, use_cuda).to(device)

    # Init the TensorBoard summary writer
    writer = create_summary_writer(model, train_loader, log_dir)

    # Init the optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Use GPU if possible
    if device:
        model.to(device)

    def update_model(engine, batch):
        """Prepare batch for training: pass to a device with options.

		"""
        model.train()
        optimizer.zero_grad()

        inputs, label = prepare_batch(batch, device=device)
        output = model(inputs)
        losses = []
        for ite in range(output.shape[1]):
            losses.append(F.nll_loss(output[:, ite, :], label))
        loss = sum(losses)
        loss.backward()
        optimizer.step()
        return losses, output

    # get the trainer module
    trainer = Engine(update_model)

    def evaluate(engine, batch):
        """Prepare batch for training: pass to a device with options.
		"""
        model.eval()
        with torch.no_grad():
            inputs, label = prepare_batch(batch, device=device)
            output = model(inputs)
            losses = []
            correct = []
            for ite in range(output.shape[1]):
                losses.append(
                    F.nll_loss(output[:, ite, :], label,
                               reduction='sum').item())
        return losses, output, label

    # get the evaluator module
    evaluator = Engine(evaluate)

    # define output transforms for multiple outputs.
    def output_transform1(output):
        # `output` variable is returned by above `process_function`
        losses, correct, label = output
        return correct[:, 0, :], label

    metric = Accuracy(output_transform=output_transform1)
    metric.attach(evaluator, "acc_highband")
    metric = Loss(F.nll_loss, output_transform=output_transform1)
    metric.attach(evaluator, "loss_highband")

    def output_transform2(output):
        # `output` variable is returned by above `process_function`
        losses, correct, label = output
        return correct[:, 1, :], label

    metric = Accuracy(output_transform=output_transform2)
    metric.attach(evaluator, "acc_midband")
    metric = Loss(F.nll_loss, output_transform=output_transform2)
    metric.attach(evaluator, "loss_midband")

    def output_transform3(output):
        # `output` variable is returned by above `process_function`
        losses, correct, label = output
        return correct[:, 2, :], label

    metric = Accuracy(output_transform=output_transform3)
    metric.attach(evaluator, "acc_lowband")
    metric = Loss(F.nll_loss, output_transform=output_transform3)
    metric.attach(evaluator, "loss_lowband")

    def output_transform(output):
        # `output` variable is returned by above `process_function`
        losses, correct, label = output
        return correct[:, 3, :], label

    metric = Accuracy(output_transform=output_transform)
    metric.attach(evaluator, "acc_globalclassifier")
    metric = Loss(F.nll_loss, output_transform=output_transform)
    metric.attach(evaluator, "loss_globalclassifier")

    # Log the events in Ignite: EVERY ITERATION
    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        iter = (engine.state.iteration - 1) % len(train_loader) + 1
        if iter % log_interval == 0:
            losses, output = engine.state.output
            epoch = engine.state.epoch
            print(
                'Train Epoch: {} [{}/{}]\tLosses: {:.6f} (Top Band), {:.6f} (Mid Band), {:.6f} (Low Band), {:.6f} (Global Classifier)'
                .format(epoch, iter, len(train_loader), losses[0].item(),
                        losses[1].item(), losses[2].item(), losses[3].item()))
            # TensorBoard Logs
            writer.add_scalar("training/loss_topband_itr", losses[0].item(),
                              engine.state.iteration)
            writer.add_scalar("training/loss_midband_itr", losses[1].item(),
                              engine.state.iteration)
            writer.add_scalar("training/loss_lowband_itr", losses[2].item(),
                              engine.state.iteration)
            writer.add_scalar("training/loss_global_itr", losses[3].item(),
                              engine.state.iteration)

    # Log the events in Ignite: Test the training data on EVERY EPOCH
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        evaluator.run(train_loader)
        print(
            "Training Results - Epoch: {}  Global accuracy: {:.2f} Avg loss: {:.2f}"
            .format(engine.state.epoch,
                    evaluator.state.metrics['acc_globalclassifier'],
                    evaluator.state.metrics['loss_globalclassifier']))
        # TensorBoard Logs
        writer.add_scalar("training/global_loss",
                          evaluator.state.metrics['loss_globalclassifier'],
                          engine.state.epoch)
        writer.add_scalar("training/lowband_loss",
                          evaluator.state.metrics['loss_lowband'],
                          engine.state.epoch)
        writer.add_scalar("training/midband_loss",
                          evaluator.state.metrics['loss_midband'],
                          engine.state.epoch)
        writer.add_scalar("training/highband_loss",
                          evaluator.state.metrics['loss_highband'],
                          engine.state.epoch)
        writer.add_scalar("training/global_acc",
                          evaluator.state.metrics['acc_globalclassifier'],
                          engine.state.epoch)
        writer.add_scalar("training/lowband_acc",
                          evaluator.state.metrics['acc_lowband'],
                          engine.state.epoch)
        writer.add_scalar("training/midband_acc",
                          evaluator.state.metrics['acc_midband'],
                          engine.state.epoch)
        writer.add_scalar("training/highband_acc",
                          evaluator.state.metrics['acc_highband'],
                          engine.state.epoch)

    # Log the events in Ignite: Test the validation data on EVERY EPOCH
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(val_loader)
        print(
            "Validation Results - Epoch: {}  Global accuracy: {:.2f} Avg loss: {:.2f}"
            .format(engine.state.epoch,
                    evaluator.state.metrics['acc_globalclassifier'],
                    evaluator.state.metrics['loss_globalclassifier']))
        # TensorBoard Logs
        writer.add_scalar("validation/global_loss",
                          evaluator.state.metrics['loss_globalclassifier'],
                          engine.state.epoch)
        writer.add_scalar("validation/lowband_loss",
                          evaluator.state.metrics['loss_lowband'],
                          engine.state.epoch)
        writer.add_scalar("validation/midband_loss",
                          evaluator.state.metrics['loss_midband'],
                          engine.state.epoch)
        writer.add_scalar("validation/highband_loss",
                          evaluator.state.metrics['loss_highband'],
                          engine.state.epoch)
        writer.add_scalar("validation/global_acc",
                          evaluator.state.metrics['acc_globalclassifier'],
                          engine.state.epoch)
        writer.add_scalar("validation/lowband_acc",
                          evaluator.state.metrics['acc_lowband'],
                          engine.state.epoch)
        writer.add_scalar("validation/midband_acc",
                          evaluator.state.metrics['acc_midband'],
                          engine.state.epoch)
        writer.add_scalar("validation/highband_acc",
                          evaluator.state.metrics['acc_highband'],
                          engine.state.epoch)

    # kick everything off
    trainer.run(train_loader, max_epochs=epochs)

    # close the writer
    writer.close()

    # return the model
    return model
def multiclass_train_lstm(
    model: LstmClassifier,
    dataloader_train: DataLoader,
    dataloader_val: DataLoader,
    filename_prefix: str,
):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=1e-4,
                                 weight_decay=1e-3)
    criterion = CrossEntropyLossOneHot()

    def process_function(_engine, batch):
        model.train()
        optimizer.zero_grad()
        x, y = batch
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x)
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        return y_pred, y, loss.item(),

    def eval_function(_engine, batch):
        model.eval()
        with torch.no_grad():
            x, y = batch
            y = y.to(device)
            x = x.to(device)
            y_pred = model(x)
            return y_pred, y

    def score_function(engine):
        return engine.state.metrics['top3-accuracy']

    model.to(device)

    trainer = Engine(process_function)
    train_evaluator = Engine(eval_function)
    validation_evaluator = Engine(eval_function)

    accuracy_top1 = Accuracy(output_transform=lambda x: (x[0], x[1]),
                             device=device,
                             is_multilabel=True)
    accuracy_top3 = TopKCategoricalAccuracy(output_transform=lambda x:
                                            (x[0], x[1]),
                                            k=3,
                                            device=device)

    RunningAverage(accuracy_top1).attach(trainer, 'accuracy')
    RunningAverage(accuracy_top3).attach(trainer, 'top3-accuracy')
    RunningAverage(output_transform=lambda x: x[2]).attach(trainer, 'loss')

    accuracy_top1.attach(train_evaluator, 'accuracy')
    accuracy_top3.attach(train_evaluator, 'top3-accuracy')
    Loss(criterion).attach(train_evaluator, 'loss')

    accuracy_top1.attach(validation_evaluator, 'accuracy')
    accuracy_top3.attach(validation_evaluator, 'top3-accuracy')
    Loss(criterion).attach(validation_evaluator, 'loss')

    pbar = ProgressBar(persist=True, bar_format="")
    pbar.attach(engine=trainer, metric_names='all')

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        train_evaluator.run(dataloader_train)
        message = f'Training results - Epoch: {engine.state.epoch}.'
        for metric_name, score in train_evaluator.state.metrics.items():
            message += f' {metric_name}: {score:.2f}.'
        pbar.log_message(message)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        validation_evaluator.run(dataloader_val)
        message = f'Validation results - Epoch: {engine.state.epoch}.'
        for metric_name, score in train_evaluator.state.metrics.items():
            message += f' {metric_name}: {score:.2f}.'
        pbar.log_message(message)
        pbar.n = pbar.last_print_n = 0

    validation_evaluator.add_event_handler(
        Events.COMPLETED,
        EarlyStopping(patience=5,
                      score_function=score_function,
                      trainer=trainer))

    checkpointer = ModelCheckpoint(dirname=DIR_MODELS,
                                   filename_prefix=filename_prefix,
                                   score_function=score_function,
                                   score_name='top3-accuracy',
                                   n_saved=2,
                                   create_dir=True,
                                   save_as_state_dict=True,
                                   require_empty=False)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer,
                              {'v2': model})

    trainer.run(dataloader_train, max_epochs=20)
Example #6
0
def prune_train_loop(model,
                     params,
                     ds,
                     dset,
                     min_y,
                     base_data,
                     model_id,
                     prune_type,
                     device,
                     batch_size,
                     tpa,
                     max_epochs=2):
    assert prune_type in ['global_unstructured', 'structured']
    total_prune_amount = tpa
    ds_train, ds_valid = ds
    train_set, valid_set = dset
    min_y_train, min_y_val = min_y
    model_id = f'{model_id}_{prune_type}_pruning_{tpa}'
    valid_freq = 200 * 500 // batch_size // 3

    conv_layers = [model.conv1]

    def prune_model(model):
        #         remove_amount = total_prune_amount // (max_epochs)
        remove_amount = total_prune_amount
        print(f'pruned model by {remove_amount}')
        worst = select_filters(model, ds_valid, valid_set, remove_amount,
                               device)
        worst = [
            k
            for k in Counter(torch.stack(worst).view(-1).cpu().numpy()).keys()
        ]
        worst.sort(reverse=True)
        print(worst)
        for layer in conv_layers:
            for d in worst:
                TuckerStructured(layer, name='weight', amount=0, dim=0, filt=d)
        return worst

    bad = prune_model(model)
    zeros = []
    wrong = []
    for i in range(len(model.conv1.weight_mask)):
        if torch.sum(model.conv1.weight_mask[i]) == 0.0:
            zeros.append(i)
    zeros.sort(reverse=True)
    if zeros == bad:
        print("correctly zero'd filters")
    else:
        if len(zeros) == len(bad):
            for i in range(len(zeros)):
                if zeros[i] != bad[i]:
                    wrong.append((bad[i], zeros[i]))
            print(wrong)
        else:
            print("diff number filters zero'd", zeros)
    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            optimizer.zero_grad()
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            with torch.no_grad():
                for layer in conv_layers:
                    layer.weight *= layer.weight_mask  # make sure pruned weights stay 0
            return l.item()

        trainer = Engine(train_step)

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq))
        #         @trainer.on(Events.ITERATION_COMPLETED)
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

#             prune_model(model)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=100))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 100) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

        @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(Events.ITERATION_COMPLETED(every=valid_freq),
                                  handler)
        trainer.run(ds_train, max_epochs=max_epochs)
Example #7
0
def adv_train_loop(model,
                   params,
                   ds,
                   min_y,
                   base_data,
                   model_id,
                   attack_type,
                   device,
                   batch_size,
                   max_epochs=5):
    print('training adversarial:', attack_type)
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y
    original_model = copy.deepcopy(
        model)  # used to generate adv images for the trained model
    original_model.eval()
    model = copy.deepcopy(
        model)  # making a copy so that original model is not changed
    model = model.to(device)
    model_id = f'{model_id}_{attack_type}'

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        classifier = PyTorchClassifier(
            model=original_model,
            clip_values=(0, 1),
            loss=nn.CrossEntropyLoss(),
            optimizer=optimizer,
            input_shape=(3, 64, 64),
            nb_classes=200,
        )

        attack = None

        #         if attack_type == "fgsm":
        #             attack = FastGradientMethod(estimator=classifier, eps=0.2)
        #         elif attack_type == "bim":
        #             attack = BasicIterativeMethod(estimator=classifier, eps=0.2)
        #         elif attack_type == "carlini":
        #             attack = CarliniLInfMethod(classifier=classifier)
        #         elif attack_type == "deepfool":
        #             attack = DeepFool(classifier=classifier)
        if attack_type == "fgsm":
            attack = GradientSignAttack(model, loss_fn=loss, eps=0.2)
        elif attack_type == "ffa":
            attack = FastFeatureAttack(model, loss_fn=loss, eps=0.3)
        elif attack_type == "carlini":
            attack = CarliniWagnerL2Attack(model, 200, max_iterations=1000)
        elif attack_type == "lbfgs":
            attack = DeepFool(classifier=classifier)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            with ctx_noparamgrad_and_eval(model):
                x_adv = attack.perturb(x, y)
            optimizer.zero_grad()
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            #             return ans, y
            return l.item()

        trainer = Engine(train_step)

        #         acc_metric.attach(trainer, "accuracy")
        #         loss_metric.attach(trainer, 'loss')

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            x_adv = attack.perturb(x, y)
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
            x_adv = attack.perturb(x, y)
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=50))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            # metrics = engine.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 50) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

#         @trainer.on(Events.EPOCH_COMPLETED)
#         def log_training_results(engine):
#             train_evaluator.run(ds_train)
#             metrics = train_evaluator.state.metrics
#             # metrics = engine.state.metrics
#             avg_accuracy = metrics['accuracy']
#             avg_nll = metrics['loss']
#             print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
#                   .format(engine.state.epoch, avg_accuracy, avg_nll))
#             writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch)
#             writer.add_scalar("training/avg_accuracy",
#                               avg_accuracy, engine.state.epoch)
#             writer.add_scalar("training/avg_error", 1. -
#                               avg_accuracy, engine.state.epoch)

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
Example #8
0
def create_evaluator(model, cfg):
    def _validation_step(_, batch):
        model.eval()
        with torch.no_grad():
            x_char, x_type, y_word, y_syllable = batch_to_tensor(batch, cfg)
            x_char, x_type, y_word, y_syllable = (t.to(
                cfg.device) for t in [x_char, x_type, y_word, y_syllable])

            logits_word, logits_syllable = model(x_char, x_type)
            loss, word_loss, syllable_loss, align_loss = model.joint_loss(
                logits_word, y_word, logits_syllable, y_syllable)

            return ((logits_word > 0.5).long(), y_word,
                    (logits_syllable > 0.5).long(), y_syllable, loss,
                    word_loss, syllable_loss, align_loss)

    evaluator = Engine(_validation_step)

    w_loss = Accuracy(lambda x: x[0:2])
    w_loss.attach(evaluator, 'w_acc')

    s_acc = Accuracy(lambda x: x[2:4])
    s_acc.attach(evaluator, 's_acc')

    Average(lambda x: x[4]).attach(evaluator, 'loss')
    Average(lambda x: x[5]).attach(evaluator, 'w_loss')
    Average(lambda x: x[6]).attach(evaluator, 's_loss')
    Average(lambda x: x[7]).attach(evaluator, 'a_loss')

    accuracy = Accuracy(lambda x: x[0:2])
    accuracy.attach(evaluator, "acc")

    w_precision = Precision(lambda x: x[0:2])
    w_precision.attach(evaluator, 'WP')
    MetricsLambda(lambda t: torch.mean(t).item(),
                  w_precision).attach(evaluator, "WMP")

    s_precision = Precision(lambda x: x[2:4])
    s_precision.attach(evaluator, 'SP')
    MetricsLambda(lambda t: torch.mean(t).item(),
                  s_precision).attach(evaluator, "SMP")

    w_recall = Recall(lambda x: x[0:2])
    w_recall.attach(evaluator, 'WR')
    MetricsLambda(lambda t: torch.mean(t).item(),
                  w_recall).attach(evaluator, "WMR")

    s_recall = Recall(lambda x: x[2:4])
    s_recall.attach(evaluator, 'SR')
    MetricsLambda(lambda t: torch.mean(t).item(),
                  s_recall).attach(evaluator, "SMR")

    w_f1 = 2. * w_precision * w_recall / (w_precision + w_recall + 1e-20)
    w_f1 = MetricsLambda(lambda t: torch.mean(t).item(), w_f1)
    w_f1.attach(evaluator, "WF1")

    s_f1 = 2. * s_precision * s_recall / (s_precision + s_recall + 1e-20)
    s_f1 = MetricsLambda(lambda t: torch.mean(t).item(), s_f1)
    s_f1.attach(evaluator, "SF1")

    return evaluator
Example #9
0
def prune_train_loop(model,
                     params,
                     ds,
                     min_y,
                     base_data,
                     model_id,
                     prune_type,
                     device,
                     batch_size,
                     max_epochs=5):
    assert prune_type in ['global_unstructured', 'structured']
    total_prune_amount = 0.3 if prune_type == 'global_unstructured' else 0.1
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y
    model_id = f'{model_id}_{prune_type}_pruning'

    conv_layers = [model.conv1]
    for sequential in [model.layer1, model.layer2, model.layer3, model.layer4]:
        for bottleneck in sequential:
            conv_layers.extend(
                [bottleneck.conv1, bottleneck.conv2, bottleneck.conv3])

    def prune_model(model):
        remove_amount = total_prune_amount / (max_epochs * 10)
        print(f'pruned model by {remove_amount}')
        if prune_type == 'global_unstructured':
            parameters_to_prune = [(layer, 'weight') for layer in conv_layers]
            prune.global_unstructured(
                parameters_to_prune,
                pruning_method=prune.L1Unstructured,
                amount=remove_amount,
            )
        else:
            for layer in conv_layers:
                prune.ln_structured(layer,
                                    name='weight',
                                    amount=remove_amount,
                                    n=1,
                                    dim=0)

    prune_model(model)

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            optimizer.zero_grad()
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            with torch.no_grad():
                for layer in conv_layers:
                    layer.weight *= layer.weight_mask  # make sure pruned weights stay 0
            return l.item()

        trainer = Engine(train_step)

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

            prune_model(model)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=50))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 50) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
Example #10
0
def validation_step(engine, batch):
    # return torch.rand(16, 101), torch.zeros(16).long()  # debug
    model.eval()
    with torch.no_grad():
        video, class_num = batch["video"].cuda(), batch["class"].cuda()
        pred = model(video)
        pred = F.softmax(pred, dim=1)
        # torch.cuda.empty_cache()

    return pred, class_num


evaluator = Engine(validation_step)

accuracy_metric = Accuracy()
accuracy_metric.attach(evaluator, "accuracy")
ce_loss_metric = Loss(ce_loss_fn)
ce_loss_metric.attach(evaluator, "loss")


@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(engine):
    e = engine.state.epoch
    i = engine.state.iteration
    loss = engine.state.output
    print(f"Epoch: {e} / {cfg.epochs} : {i} - Loss: {loss:.5f}")
    # if wandb_online:
    #   wandb.log({"loss": loss})


@trainer.on(Events.EPOCH_COMPLETED)
    model.eval()

    x, y = batch

    x.requires_grad_(True)

    z, y_pred = model(x)

    return y_pred, y, x, z


trainer = Engine(step)
evaluator = Engine(eval_step)

metric = Accuracy(output_transform=output_transform_acc)
metric.attach(evaluator, "accuracy")

metric = Loss(F.binary_cross_entropy, output_transform=output_transform_bce)
metric.attach(evaluator, "bce")

metric = Loss(calc_gradient_penalty, output_transform=output_transform_gp)
metric.attach(evaluator, "gp")

ds_train = torch.utils.data.TensorDataset(
    torch.from_numpy(X_train).float(),
    F.one_hot(torch.from_numpy(y_train)).float())
dl_train = torch.utils.data.DataLoader(ds_train,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       drop_last=True)
Example #12
0
def train(name, load, lrate, weight_decay, workers, smooth, device, validation,
          ground_truth):

    if not name:
        name = '{}_{}'.format(lrate, weight_decay)
    click.echo('model output name: {}'.format(name))

    torch.set_num_threads(1)

    train_set = BaselineSet(glob.glob('{}/**/*.seeds.png'.format(ground_truth),
                                      recursive=True),
                            smooth=smooth)
    train_data_loader = DataLoader(dataset=train_set,
                                   num_workers=workers,
                                   batch_size=1,
                                   shuffle=True,
                                   pin_memory=True)
    val_set = BaselineSet(glob.glob('{}/**/*.seeds.png'.format(validation),
                                    recursive=True),
                          smooth=smooth)
    val_data_loader = DataLoader(dataset=val_set,
                                 num_workers=workers,
                                 batch_size=1,
                                 pin_memory=True)

    click.echo('loading network')
    model = ResUNet(refine_encoder=False).to(device)

    if load:
        click.echo('loading weights')
        model = torch.load(load, map_location=device)

    criterion = nn.BCEWithLogitsLoss()
    opti = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                      lr=lrate,
                      weight_decay=weight_decay)

    def score_function(engine):
        val_loss = engine.state.metrics['loss']
        return -val_loss

    def output_preprocess(output):
        o, target = output
        o = torch.sigmoid(o)
        o = denoising_hysteresis_thresh(o.detach().squeeze().cpu().numpy(),
                                        0.8, 0.9, 2.5)
        return torch.from_numpy(o.astype('f')).unsqueeze(0).unsqueeze(0).to(
            device), target.double().to(device)

    trainer = create_supervised_trainer(model,
                                        opti,
                                        criterion,
                                        device=device,
                                        non_blocking=True)
    accuracy = Accuracy(output_transform=output_preprocess)
    precision = Precision(output_transform=output_preprocess)
    recall = Recall(output_transform=output_preprocess)
    loss = Loss(criterion)
    precision = Precision(average=False)
    recall = Recall(average=False)
    f1 = (precision * recall * 2 / (precision + recall)).mean()

    evaluator = create_supervised_evaluator(model,
                                            device=device,
                                            non_blocking=True)

    accuracy.attach(evaluator, 'accuracy')
    precision.attach(evaluator, 'precision')
    recall.attach(evaluator, 'recall')
    loss.attach(evaluator, 'loss')
    f1.attach(evaluator, 'f1')

    ckpt_handler = ModelCheckpoint('.',
                                   name,
                                   save_interval=1,
                                   n_saved=10,
                                   require_empty=False)
    RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')

    progress_bar = ProgressBar(persist=True)
    progress_bar.attach(trainer, ['loss'])

    trainer.add_event_handler(event_name=Events.EPOCH_COMPLETED,
                              handler=ckpt_handler,
                              to_save={'net': model})
    trainer.add_event_handler(event_name=Events.ITERATION_COMPLETED,
                              handler=TerminateOnNan())

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(val_data_loader)
        metrics = evaluator.state.metrics
        progress_bar.log_message(
            'eval results - epoch {} loss: {:.4f} f1: {:.4f}, accuracy: {:.4f} recall: {:.4f} precision {:.4f}'
            .format(engine.state.epoch, metrics['loss'], metrics['f1'],
                    metrics['accuracy'], metrics['recall'],
                    metrics['precision']))

    trainer.run(train_data_loader, max_epochs=1000)
    def create_zero_shot_eval_engine(self, model, zero_shot_label,
                                     model_mapping, label_mapping,
                                     is_test_multilabel, cpu):

        # Iterate through all labels in both the train and test sets to see which labels correspond to the zero shot label (the unifying label)
        model_target_int = [
            int for label, int in model_mapping.items()
            if zero_shot_label in label.lower()
        ]
        label_target_int = [
            int for label, int in label_mapping.items()
            if zero_shot_label in label.lower()
        ]

        # There should only be one unifying label in each dataset (Possible TODO: Allow multiple labels to map to one unifying label)
        assert len(
            model_target_int
        ) == 1, f"Ambiguous or empty model label list when trying to map {zero_shot_label} to {model_target_int}"
        assert len(
            label_target_int
        ) == 1, f"Ambiguous or empty gold label list when trying to map {zero_shot_label} to {label_target_int}"

        model_target_int = model_target_int[0]
        label_target_int = label_target_int[0]

        def process_function(engine, batch):
            X, y = batch

            if cpu:
                pred = model(X.cpu())
                gold = y.cpu()
            else:
                pred = model(X.cuda())
                gold = y.cuda()

            # Get the softmax of the raw model output (logits)
            pred = torch.softmax(pred, dim=1)

            # Get the probability that the prediction is the target class
            pred_in_class_prob = pred[:, [model_target_int]]

            # Get all the probabilities of all the other classes outside the target class by finding the complement of the in class probability
            pred_out_class_prob = 1 - pred_in_class_prob

            # Create a combined tensor which acts as a set of probabilities for in vs out of the zero-shot target class.
            # In this, 0 is out of class, whilst 1 is in class, so the combined tensor has the out of class probabilities in the 0th column and the in-class probs in the 1st column.
            pred = torch.cat((pred_out_class_prob, pred_in_class_prob), dim=1)

            if is_test_multilabel:
                # If test task is multilabel, get the values from the appropriate column of the truth labels
                gold = gold[:, label_target_int]
            else:
                # To correspond to the above contructed tensor, we set the golds as 1 (I.e. True) if the gold label is the zero-shot label, and 0 (False) if not.
                gold = (gold == label_target_int).long()

            return pred, gold

        eval_engine = Engine(process_function)

        really_small_number = 1e-10

        accuracy = Accuracy()
        accuracy.attach(eval_engine, "accuracy")
        recall = Recall()
        recall.attach(eval_engine, "recall")
        precision = Precision()
        precision.attach(eval_engine, "precision")
        f1 = (precision * recall * 2 /
              (precision + recall + really_small_number))
        f1.attach(eval_engine, "f1")
        f2 = (precision * recall * 5 /
              ((4 * precision) + recall + really_small_number))
        f2.attach(eval_engine, "f2")

        avg_recall = Recall(average=True)
        avg_recall.attach(eval_engine, "average recall")
        avg_precision = Precision(average=True)
        avg_precision.attach(eval_engine, "average precision")
        avg_f1 = (avg_precision * avg_recall * 2 /
                  (avg_precision + avg_recall + really_small_number))
        avg_f1.attach(eval_engine, "average f1")
        avg_f2 = (avg_precision * avg_recall * 5 /
                  ((4 * avg_precision) + avg_recall + really_small_number))
        avg_f2.attach(eval_engine, "average f2")

        return eval_engine
Example #14
0
def train_loop(model,
               params,
               ds,
               min_y,
               base_data,
               model_id,
               device,
               batch_size,
               max_epochs=2):
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            #             return ans, y
            return l.item()

        trainer = Engine(train_step)

        #         acc_metric.attach(trainer, "accuracy")
        #         loss_metric.attach(trainer, 'loss')

        def train_eval_step(engine, batch):
            model.eval()
            with torch.no_grad():
                x, y = batch
                x = x.to(device)
                y = y.to(device) - min_y_train
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            with torch.no_grad():
                x, y = batch
                x = x.to(device)
                y = y.to(device) - min_y_val
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=100))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            # metrics = engine.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 100) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

#         @trainer.on(Events.EPOCH_COMPLETED)
#         def log_training_results(engine):
#             train_evaluator.run(ds_train)
#             metrics = train_evaluator.state.metrics
#             # metrics = engine.state.metrics
#             avg_accuracy = metrics['accuracy']
#             avg_nll = metrics['loss']
#             print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
#                   .format(engine.state.epoch, avg_accuracy, avg_nll))
#             writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch)
#             writer.add_scalar("training/avg_accuracy",
#                               avg_accuracy, engine.state.epoch)
#             writer.add_scalar("training/avg_error", 1. -
#                               avg_accuracy, engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 5),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
def train_model(l_gradient_penalty, length_scale, final_model, epochs,
                input_dep_ls, use_grad_norm):

    input_size = 28
    num_classes = 10
    embedding_size = 256
    learnable_length_scale = False  #Learnable length scale
    gamma = 0.999

    if input_dep_ls and learnable_length_scale:  #only one can be True
        learnable_length_scale = False

    ## Main (FashionMNIST) and ood (Mnist) Dataset
    dataset = FastFashionMNIST("data/", train=True, download=True)
    test_dataset = FastFashionMNIST("data/", train=False, download=True)
    idx = list(range(60000))
    random.shuffle(idx)

    if final_model:
        train_dataset = dataset
        val_dataset = test_dataset
    else:
        train_dataset = torch.utils.data.Subset(dataset, indices=idx[:55000])
        val_dataset = torch.utils.data.Subset(dataset, indices=idx[55000:])

    dl_train = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=128,
                                           shuffle=True,
                                           num_workers=0,
                                           drop_last=True)

    dl_val = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=2000,
                                         shuffle=False,
                                         num_workers=0)

    dl_test = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=2000,
                                          shuffle=False,
                                          num_workers=0)

    # Model
    global model
    model = CNN_DUQ(input_size, num_classes, embedding_size,
                    learnable_length_scale, length_scale, gamma, input_dep_ls)

    model = model.cuda()
    #model.load_state_dict(torch.load("DUQ_FM_30_FULL.pt"))

    # Optimiser
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=0.05,
                                momentum=0.9,
                                weight_decay=1e-4)

    def output_transform_bce(output):
        y_pred, y, _, _ = output
        return y_pred, y

    def output_transform_acc(output):
        y_pred, y, _, _ = output
        return y_pred, torch.argmax(y, dim=1)

    def output_transform_gp(output):
        y_pred, y, x, y_pred_sum = output
        return x, y_pred_sum

    def calc_gradient_penalty(x, y_pred_sum):
        gradients = torch.autograd.grad(
            outputs=y_pred_sum,
            inputs=x,
            grad_outputs=torch.ones_like(y_pred_sum),
            create_graph=True,
            retain_graph=True,
        )[0]

        gradients = gradients.flatten(start_dim=1)

        # L2 norm
        grad_norm = gradients.norm(2, dim=1)

        # Two sided penalty
        gradient_penalty = ((grad_norm - 1)**2).mean()

        return gradient_penalty

    def step(engine, batch):
        model.train()
        optimizer.zero_grad()

        x, y = batch
        y = F.one_hot(y, num_classes=10).float()

        x, y = x.cuda(), y.cuda()

        x.requires_grad_(True)

        z, y_pred = model(x)

        loss = F.binary_cross_entropy(y_pred, y)
        loss += l_gradient_penalty * calc_gradient_penalty(x, y_pred.sum(1))

        if use_grad_norm:
            #gradient normalization
            loss /= (1 + l_gradient_penalty)

        x.requires_grad_(False)

        loss.backward()
        optimizer.step()

        with torch.no_grad():
            model.eval()
            model.update_embeddings(x, y)

        return loss.item()

    def eval_step(engine, batch):
        model.eval()

        x, y = batch
        y = F.one_hot(y, num_classes=10).float()

        x, y = x.cuda(), y.cuda()

        x.requires_grad_(True)

        z, y_pred = model(x)

        return y_pred, y, x, y_pred.sum(1)

    trainer = Engine(step)
    evaluator = Engine(eval_step)

    metric = Accuracy(output_transform=output_transform_acc)
    metric.attach(evaluator, "accuracy")

    metric = Loss(F.binary_cross_entropy,
                  output_transform=output_transform_bce)
    metric.attach(evaluator, "bce")

    metric = Loss(calc_gradient_penalty, output_transform=output_transform_gp)
    metric.attach(evaluator, "gradient_penalty")

    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=[10, 20],
                                                     gamma=0.2)

    pbar = ProgressBar()
    pbar.attach(trainer)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_results(trainer):
        scheduler.step()

        # logging every 5 epoch
        if trainer.state.epoch % 5 == 0:
            evaluator.run(dl_val)

            # AUROC on FashionMNIST + Mnist / NotMnist
            accuracy, roc_auc_mnist = get_fashionmnist_mnist_ood(model)
            accuracy, roc_auc_notmnist = get_fashionmnist_notmnist_ood(model)
            metrics = evaluator.state.metrics

            print(f"Validation Results - Epoch: {trainer.state.epoch} "
                  f"Val_Acc: {metrics['accuracy']:.4f} "
                  f"BCE: {metrics['bce']:.2f} "
                  f"GP: {metrics['gradient_penalty']:.6f} "
                  f"AUROC MNIST: {roc_auc_mnist:.4f} "
                  f"AUROC NotMNIST: {roc_auc_notmnist:.2f} ")
            #print(f"Sigma: {model.sigma}")

    # Train
    trainer.run(dl_train, max_epochs=epochs)

    # Validation
    evaluator.run(dl_val)
    val_accuracy = evaluator.state.metrics["accuracy"]

    # Test
    evaluator.run(dl_test)
    test_accuracy = evaluator.state.metrics["accuracy"]

    return model, val_accuracy, test_accuracy
Example #16
0
def train(model,
          train_loader,
          eval_loaders,
          optimizer,
          loss_fn,
          n_it_max,
          patience,
          split_names,
          select_metric='Val accuracy_0',
          select_mode='max',
          viz=None,
          device='cpu',
          lr_scheduler=None,
          name=None,
          log_steps=None,
          log_epoch=False,
          _run=None,
          prepare_batch=_prepare_batch,
          single_pass=False,
          n_ep_max=None):

    # print(model)

    if not log_steps and not log_epoch:
        logger.warning('/!\\ No logging during training /!\\')

    if log_steps is None:
        log_steps = []

    epoch_steps = len(train_loader)
    if log_epoch:
        log_steps.append(epoch_steps)

    if single_pass:
        max_epoch = 1
    elif n_ep_max is None:
        assert n_it_max is not None
        max_epoch = int(n_it_max / epoch_steps) + 1
    else:
        assert n_it_max is None
        max_epoch = n_ep_max

    all_metrics = defaultdict(dict)
    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        loss_fn,
                                        device=device,
                                        prepare_batch=prepare_batch)

    if hasattr(model, 'new_epoch_hook'):
        trainer.add_event_handler(Events.EPOCH_STARTED, model.new_epoch_hook)
    if hasattr(model, 'new_iter_hook'):
        trainer.add_event_handler(Events.ITERATION_STARTED,
                                  model.new_iter_hook)

    trainer._logger.setLevel(logging.WARNING)

    # trainer output is in the format (x, y, y_pred, loss, optionals)
    train_loss = RunningAverage(output_transform=lambda out: out[3].item(),
                                epoch_bound=True)
    train_loss.attach(trainer, 'Trainer loss')
    if hasattr(model, 's'):
        met = Average(output_transform=lambda _: float('nan')
                      if model.s is None else model.s)
        met.attach(trainer, 'cur_s')
        trainer.add_event_handler(Events.ITERATION_COMPLETED, met.completed,
                                  'cur_s')

    if hasattr(model, 'arch_sampler') and model.arch_sampler.distrib_dim > 0:
        met = Average(output_transform=lambda _: float('nan')
                      if model.cur_split is None else model.cur_split)
        met.attach(trainer, 'Trainer split')
        trainer.add_event_handler(Events.ITERATION_COMPLETED, met.completed,
                                  'Trainer split')
        # trainer.add_event_handler(Events.EPOCH_STARTED, met.started)
        all_ent = Average(
            output_transform=lambda out: out[-1]['arch_entropy_avg'].item())
        all_ent.attach(trainer, 'Trainer all entropy')
        trainer.add_event_handler(Events.ITERATION_COMPLETED,
                                  all_ent.completed, 'Trainer all entropy')
        train_ent = Average(
            output_transform=lambda out: out[-1]['arch_entropy_sample'].item())
        train_ent.attach(trainer, 'Trainer sampling entropy')
        trainer.add_event_handler(Events.ITERATION_COMPLETED,
                                  train_ent.completed,
                                  'Trainer sampling entropy')
        trainer.add_event_handler(
            Events.EPOCH_COMPLETED, lambda engine: model.check_arch_freezing(
                ent=train_ent.compute(),
                epoch=engine.state.iteration / (epoch_steps * max_epoch)))

        def log_always(engine, name):
            val = engine.state.output[-1][name]
            all_metrics[name][engine.state.iteration /
                              epoch_steps] = val.mean().item()

        def log_always_dict(engine, name):
            for node, val in engine.state.output[-1][name].items():
                all_metrics['node {} {}'.format(
                    node, name)][engine.state.iteration /
                                 epoch_steps] = val.mean().item()

        trainer.add_event_handler(Events.ITERATION_COMPLETED,
                                  log_always_dict,
                                  name='arch_grads')
        trainer.add_event_handler(Events.ITERATION_COMPLETED,
                                  log_always_dict,
                                  name='arch_probas')
        trainer.add_event_handler(Events.ITERATION_COMPLETED,
                                  log_always_dict,
                                  name='node_grads')
        trainer.add_event_handler(Events.ITERATION_COMPLETED,
                                  log_always,
                                  name='task all_loss')
        trainer.add_event_handler(Events.ITERATION_COMPLETED,
                                  log_always,
                                  name='arch all_loss')
        trainer.add_event_handler(Events.ITERATION_COMPLETED,
                                  log_always,
                                  name='entropy all_loss')

    if n_it_max is not None:
        StopAfterIterations([n_it_max]).attach(trainer)
    # epoch_pbar = ProgressBar(bar_format='{l_bar}{bar}{r_bar}', desc=name,
    #                          persist=True, disable=not (_run or viz))
    # epoch_pbar.attach(trainer, metric_names=['Train loss'])
    #
    # training_pbar = ProgressBar(bar_format='{l_bar}{bar}{r_bar}', desc=name,
    #                             persist=True, disable=not (_run or viz))
    # training_pbar.attach(trainer, event_name=Events.EPOCH_COMPLETED,
    #                      closing_event_name=Events.COMPLETED)
    total_time = Timer(average=False)
    eval_time = Timer(average=False)
    eval_time.pause()
    data_time = Timer(average=False)
    forward_time = Timer(average=False)
    forward_time.attach(trainer,
                        start=Events.EPOCH_STARTED,
                        pause=Events.ITERATION_COMPLETED,
                        resume=Events.ITERATION_STARTED,
                        step=Events.ITERATION_COMPLETED)
    epoch_time = Timer(average=False)
    epoch_time.attach(trainer,
                      start=Events.EPOCH_STARTED,
                      pause=Events.EPOCH_COMPLETED,
                      resume=Events.EPOCH_STARTED,
                      step=Events.EPOCH_COMPLETED)

    def get_loss(y_pred, y):
        l = loss_fn(y_pred, y)
        if not torch.is_tensor(l):
            l, *l_details = l
        return l.mean()

    def get_member(x, n=0):
        if isinstance(x, (list, tuple)):
            return x[n]
        return x

    eval_metrics = {'loss': Loss(get_loss)}

    for i in range(model.n_out):
        out_trans = get_attr_transform(i)

        def extract_ys(out):
            x, y, y_pred, loss, _ = out
            return out_trans((y_pred, y))

        train_acc = Accuracy(extract_ys)
        train_acc.attach(trainer, 'Trainer accuracy_{}'.format(i))
        trainer.add_event_handler(Events.ITERATION_COMPLETED,
                                  train_acc.completed,
                                  'Trainer accuracy_{}'.format(i))
        eval_metrics['accuracy_{}'.format(i)] = \
            Accuracy(output_transform=out_trans)
        # if isinstance(model, SSNWrapper):
        #     model.arch_sampler.entropy().mean()

    evaluator = create_supervised_evaluator(model,
                                            metrics=eval_metrics,
                                            device=device,
                                            prepare_batch=prepare_batch)
    last_iteration = 0
    patience_counter = 0

    best = {
        'value': float('inf') * 1 if select_mode == 'min' else -1,
        'iter': -1,
        'state_dict': None
    }

    def is_better(new, old):
        if select_mode == 'min':
            return new < old
        else:
            return new > old

    def log_results(evaluator, data_loader, iteration, split_name):
        evaluator.run(data_loader)
        metrics = evaluator.state.metrics

        log_metrics = {}

        for metric_name, metric_val in metrics.items():
            log_name = '{} {}'.format(split_name, metric_name)
            if viz:
                first = iteration == 0 and split_name == split_names[0]
                viz.line(
                    [metric_val],
                    X=[iteration],
                    win=metric_name,
                    name=log_name,
                    update=None if first else 'append',
                    opts={
                        'title': metric_name,
                        'showlegend': True,
                        'width': 500,
                        'xlabel': 'iterations'
                    })
                viz.line(
                    [metric_val],
                    X=[iteration / epoch_steps],
                    win='{}epoch'.format(metric_name),
                    name=log_name,
                    update=None if first else 'append',
                    opts={
                        'title': metric_name,
                        'showlegend': True,
                        'width': 500,
                        'xlabel': 'epoch'
                    })
            if _run:
                _run.log_scalar(log_name, metric_val, iteration)
            log_metrics[log_name] = metric_val
            all_metrics[log_name][iteration] = metric_val

        return log_metrics

    if lr_scheduler is not None:

        @trainer.on(Events.EPOCH_COMPLETED)
        def step(_):
            lr_scheduler.step()
            # logger.warning('current lr {:.5e}'.format(
            #     optimizer.param_groups[0]['lr']))

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_event(trainer):
        iteration = trainer.state.iteration if trainer.state else 0
        nonlocal last_iteration, patience_counter, best

        if not log_steps or not \
                (iteration in log_steps or iteration % log_steps[-1] == 0):
            return
        epoch_time.pause()
        eval_time.resume()
        all_metrics['training_epoch'][iteration] = iteration / epoch_steps
        all_metrics['training_iteration'][iteration] = iteration
        if hasattr(model, 'arch_sampler'):
            all_metrics['training_archs'][iteration] = \
                model.arch_sampler().squeeze().detach()
        # if hasattr(model, 'distrib_gen'):
        #     entropy = model.distrib_gen.entropy()
        #     all_metrics['entropy'][iteration] = entropy.mean().item()
        # if trainer.state and len(trainer.state.metrics) > 1:
        #     raise ValueError(trainer.state.metrics)
        all_metrics['data time'][iteration] = data_time.value()
        all_metrics['data time_ps'][iteration] = data_time.value() / max(
            data_time.step_count, 1.)
        all_metrics['forward time'][iteration] = forward_time.value()
        all_metrics['forward time_ps'][iteration] = forward_time.value() / max(
            forward_time.step_count, 1.)
        all_metrics['epoch time'][iteration] = epoch_time.value()
        all_metrics['epoch time_ps'][iteration] = epoch_time.value() / max(
            epoch_time.step_count, 1.)

        if trainer.state:
            # logger.warning(trainer.state.metrics)
            for metric, value in trainer.state.metrics.items():
                all_metrics[metric][iteration] = value
                if viz:
                    viz.line(
                        [value],
                        X=[iteration],
                        win=metric.split()[-1],
                        name=metric,
                        update=None if iteration == 0 else 'append',
                        opts={
                            'title': metric,
                            'showlegend': True,
                            'width': 500,
                            'xlabel': 'iterations'
                        })

        iter_this_step = iteration - last_iteration
        for d_loader, name in zip(eval_loaders, split_names):
            if name == 'Train':
                if iteration == 0:
                    all_metrics['Trainer loss'][iteration] = float('nan')
                    all_metrics['Trainer accuracy_0'][iteration] = float('nan')
                    if hasattr(model, 'arch_sampler'):
                        all_metrics['Trainer all entropy'][iteration] = float(
                            'nan')
                        all_metrics['Trainer sampling entropy'][
                            iteration] = float('nan')
                        # if hasattr(model, 'cur_split'):
                        all_metrics['Trainer split'][iteration] = float('nan')
                continue
            split_metrics = log_results(evaluator, d_loader, iteration, name)
            if select_metric not in split_metrics:
                continue
            if is_better(split_metrics[select_metric], best['value']):
                best['value'] = split_metrics[select_metric]
                best['iter'] = iteration
                best['state_dict'] = copy.deepcopy(model.state_dict())
                if patience > 0:
                    patience_counter = 0
            elif patience > 0:
                patience_counter += iter_this_step
                if patience_counter >= patience:
                    logger.info('#####')
                    logger.info('# Early stopping Run')
                    logger.info('#####')
                    trainer.terminate()
        last_iteration = iteration
        eval_time.pause()
        eval_time.step()
        all_metrics['eval time'][iteration] = eval_time.value()
        all_metrics['eval time_ps'][iteration] = eval_time.value(
        ) / eval_time.step_count
        all_metrics['total time'][iteration] = total_time.value()
        epoch_time.resume()

    log_event(trainer)

    #
    # @trainer.on(Events.EPOCH_COMPLETED)
    # def log_epoch(trainer):
    #     iteration = trainer.state.iteration if trainer.state else 0
    #     epoch = iteration/epoch_steps
    #     fw_t = forward_time.value()
    #     fw_t_ps = fw_t / forward_time.step_count
    #     d_t = data_time.value()
    #     d_t_ps = d_t / data_time.step_count
    #     e_t = epoch_time.value()
    #     e_t_ps = e_t / epoch_time.step_count
    #     ev_t = eval_time.value()
    #     ev_t_ps = ev_t / eval_time.step_count
    #     logger.warning('<{}> Epoch {}/{} finished (Forward: {:.3f}s({:.3f}), '
    #                    'data: {:.3f}s({:.3f}), epoch: {:.3f}s({:.3f}),'
    #                    ' Eval: {:.3f}s({:.3f}), Total: '
    #                    '{:.3f}s)'.format(type(model).__name__, epoch,
    #                                      max_epoch, fw_t, fw_t_ps, d_t, d_t_ps,
    #                                      e_t, e_t_ps, ev_t, ev_t_ps,
    #                                      total_time.value()))

    data_time.attach(trainer,
                     start=Events.STARTED,
                     pause=Events.ITERATION_STARTED,
                     resume=Events.ITERATION_COMPLETED,
                     step=Events.ITERATION_STARTED)

    if hasattr(model, 'iter_per_epoch'):
        model.iter_per_epoch = len(train_loader)
    trainer.run(train_loader, max_epochs=max_epoch)
    return trainer.state.iteration, all_metrics, best
Example #17
0
def run(model, criterion, optimizer, epochs=100, log_interval=10):
    vis = visdom.Visdom(env='ft_lift_ignite')

    train_loader = dataloaders['train']
    val_loader = dataloaders['test']

    # if not vis.check_connection():
    #     raise RuntimeError("Visdom server not running. Please run python -m visdom.server")

    # trainer = create_supervised_trainer(model, optimizer, criterion, device=device)
    # evaluator = create_supervised_evaluator(model,
    #                                         metrics={'accuracy': Accuracy(criterion['label']),
    #                                                  'nll': Loss(criterion['label']),
    #                                                  'precision': Precision(average=True )},
    #                                         device=device)

    def update_model(trainer, batch):
        inputs, labels = batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        #inputs, labels = _prepare_batch(batch, device=device)
        optimizer.zero_grad()
        class_output, structured_output = model(inputs)
        loss = criterion['label'](class_output, labels)+criterion['structured'](structured_output, labels)
        loss.backward()
        optimizer.step()
        return {'loss': loss.item(), 
                 'class_output': class_output, 
                 'structured_output': structured_output, 
                 #'inputs': inputs, 
                 'labels': labels}
    trainer = Engine(update_model)

    # def _prepare_batch(batch, device=None, non_blocking=False):
    #     """Prepare batch for training: pass to a device with options

    #     """
    #     x, y = batch
    #     return (convert_tensor(x, device=device, non_blocking=non_blocking),
    #             convert_tensor(y, device=device, non_blocking=non_blocking))

    def _inference(evaluator, batch):
        model.eval()
        with torch.no_grad():
            inputs, labels = batch
            inputs = inputs.to(device)
            labels = labels.to(device)
            class_output, structured_output = model(inputs)
            loss = criterion['label'](class_output, labels)+criterion['structured'](structured_output, labels)
            return {'loss': loss.item(), 
                 'class_output': class_output, 
                 'structured_output': structured_output, 
                 #'inputs': inputs, 
                 'labels': labels}
    evaluator = Engine(_inference)

    output_transform1 = lambda data: (data['class_output'], data['labels'])
    output_transform2 = lambda data: (data['structured_output'], data['labels'])

    metric_accuracy = Accuracy(output_transform=output_transform1)
    metric_accuracy.attach(evaluator, 'accuracy')

    metric_nll = Loss(criterion['label'], output_transform=output_transform1)
    metric_nll.attach(evaluator, 'nll')

    metric_precision = Precision(average=True, output_transform=output_transform1)
    metric_precision.attach(evaluator, 'precision')

    # evaluator = create_supervised_evaluator(model,
    #                                     metrics={'accuracy': Accuracy(output_transform=output_transform1),
    #                                                 'nll': Loss(criterion['label'], output_transform=output_transform1),
    #                                                 'precision': Precision(average=True, output_transform=output_transform1)},
    #                                     device=device)

    handler = ModelCheckpoint('/1116/tmp/lift_models', 'myprefix', save_interval=1, n_saved=150, require_empty=False, create_dir=True)

    train_loss_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Loss')
    train_avg_loss_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Average Loss')
    train_avg_accuracy_window = create_plot_window(vis, '#Iterations', 'Accuracy', 'Training Average Accuracy')
    train_avg_precision_window = create_plot_window(vis, '#Iterations', 'Precision', 'Training Average Precision')
    val_avg_loss_window = create_plot_window(vis, '#Epochs', 'Loss', 'Validation Average Loss')
    val_avg_accuracy_window = create_plot_window(vis, '#Epochs', 'Accuracy', 'Validation Average Accuracy')
    val_avg_precision_window = create_plot_window(vis, '#Epochs', 'Precision', 'Validation Average Precison')
    
    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        iter = (engine.state.iteration - 1) % len(train_loader) + 1
        if iter % log_interval == 0:
            print("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}"
                  "".format(engine.state.epoch, iter, len(train_loader), engine.state.output['loss']))
            vis.line(X=np.array([engine.state.iteration]),
                     Y=np.array([engine.state.output['loss']]),
                     update='append', win=train_loss_window)
    
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_nll = metrics['nll']
        avg_precision = metrics['precision']
        print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f} Avg Precision: {:.2f}"
              .format(engine.state.epoch, avg_accuracy, avg_nll, avg_precision))
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_accuracy]),
                 win=train_avg_accuracy_window, update='append')
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_nll]),
                 win=train_avg_loss_window, update='append')
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_precision]),
                 win=train_avg_precision_window, update='append')

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_nll = metrics['nll']
        avg_precision = metrics['precision']
        print("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f} Avg Precision: {:.2f}"
              .format(engine.state.epoch, avg_accuracy, avg_nll, avg_precision))
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_accuracy]),
                 win=val_avg_accuracy_window, update='append')
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_nll]),
                 win=val_avg_loss_window, update='append')
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_precision]),
                 win=val_avg_precision_window, update='append')        
                 
    
    # kick everything off
    trainer.add_event_handler(Events.EPOCH_COMPLETED, handler, {'mymodel': model})
    trainer.run(train_loader, max_epochs=epochs)