Ejemplo n.º 1
0
def _attach_losses(engine: Engine,
                   prefix: str = "",
                   running_average: bool = False):
    loss_peaks = Loss(output_transform=lambda x:
                      (x["loss-peaks"], x["loss-peaks"]),
                      loss_fn=lambda *x: x[0].mean())
    loss_peaks.attach(engine, prefix + 'loss-peaks')
    loss_enrichment = Loss(output_transform=lambda x:
                           (x["loss-enrichment"], x["loss-enrichment"]),
                           loss_fn=lambda *x: x[0].mean())
    loss_enrichment.attach(engine, prefix + 'loss-enrichment')

    if running_average:
        RunningAverage(loss_peaks).attach(engine, prefix + 'ra-loss-peaks')
        RunningAverage(loss_enrichment).attach(engine,
                                               prefix + 'ra-loss-enrichment')
Ejemplo n.º 2
0
def create_basic_evaluator(model,
                           device,
                           beta=1,
                           kl_loss=kl_loss,
                           recon_loss=recon_loss,
                           **kwargs):
    def evaluate_function(engine, batch):
        model.eval()
        with torch.no_grad():
            x = batch
            x = x.to(device)

            x_recon, logstd_noise, mu_z, logstd_z = model(x)
            kw = {
                'logstd_noise': logstd_noise,
                'mu_z': mu_z,
                'logstd_z': logstd_z
            }
            return x, x_recon, kw

    evaluator = Engine(evaluate_function)

    # Registering metrics
    m1 = Loss(kl_loss,
              output_transform=lambda x: (x[2]['mu_z'], x[2]['logstd_z']))
    m2 = Loss(recon_loss,
              output_transform=lambda x: (x[0], x[1], {
                  'logstd_noise': x[2]['logstd_noise']
              }))
    m1.attach(evaluator, 'kl_loss')
    m2.attach(evaluator, 'recon_loss')

    m3 = m2 + beta * m1
    m3.attach(evaluator, 'elbo_loss')

    return evaluator
Ejemplo n.º 3
0
def train_loop(model,
               params,
               ds,
               min_y,
               base_data,
               model_id,
               device,
               batch_size,
               max_epochs=2):
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            #             return ans, y
            return l.item()

        trainer = Engine(train_step)

        #         acc_metric.attach(trainer, "accuracy")
        #         loss_metric.attach(trainer, 'loss')

        def train_eval_step(engine, batch):
            model.eval()
            with torch.no_grad():
                x, y = batch
                x = x.to(device)
                y = y.to(device) - min_y_train
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            with torch.no_grad():
                x, y = batch
                x = x.to(device)
                y = y.to(device) - min_y_val
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=100))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            # metrics = engine.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 100) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

#         @trainer.on(Events.EPOCH_COMPLETED)
#         def log_training_results(engine):
#             train_evaluator.run(ds_train)
#             metrics = train_evaluator.state.metrics
#             # metrics = engine.state.metrics
#             avg_accuracy = metrics['accuracy']
#             avg_nll = metrics['loss']
#             print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
#                   .format(engine.state.epoch, avg_accuracy, avg_nll))
#             writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch)
#             writer.add_scalar("training/avg_accuracy",
#                               avg_accuracy, engine.state.epoch)
#             writer.add_scalar("training/avg_error", 1. -
#                               avg_accuracy, engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 5),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
Ejemplo n.º 4
0
def adv_prune_train_loop(model, params, ds, dset, min_y, base_data, model_id, prune_type, device, batch_size, tpa, max_epochs=5):
    #assert prune_type in ['global_unstructured', 'structured']
    total_prune_amount = tpa
    remove_amount = tpa
    ds_train, ds_valid = ds
    train_set, valid_set = dset
    min_y_train, min_y_val = min_y
    train_set, valid_set = dset
    total_prune_amount = tpa
    original_model = copy.deepcopy(model)
    original_model.eval()
    model_id = f'{model_id}_{prune_type}_pruning_{tpa}_l1'
    valid_freq = 200 * 500 // batch_size // 3

    conv_layers = [model.conv1]
    for sequential in [model.layer1, model.layer2, model.layer3, model.layer4]:
        for bottleneck in sequential:
            conv_layers.extend([bottleneck.conv1, bottleneck.conv2, bottleneck.conv3])
    conv_layers = conv_layers[:22]
    def prune_model(model):
        print(f'pruned model by {total_prune_amount}')
        if prune_type == 'global_unstructured':
            parameters_to_prune = [(layer, 'weight') for layer in conv_layers]
            prune.global_unstructured(
                parameters_to_prune,
                pruning_method=prune.L1Unstructured,
                amount=total_prune_amount,
            )
        else:
            for layer in conv_layers:

    prune_model(model)

    def valid_eval(model, dataset, dataloader, device, label):
        right = 0
        total = 0
        model.eval()
        with torch.no_grad():
            for i, data in tqdm(enumerate(dataloader), total=len(dataset) / dataloader.batch_size):
                data, y = data
                data = data.to(device)
                y = y.to(device) - label
                ans = model.forward(data)
                right += torch.sum(torch.eq(torch.argmax(ans, dim=1), y))
                total += y.shape[0]
        return right/total
    valid_acc = valid_eval(model, valid_set, ds_valid, device, min_y_val)
    print('initial accuracy:', valid_acc.item())
    
    with create_summary_writer(model, ds_train, base_data, model_id, device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=mom, weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

#         attack = GradientSignAttack(original_model, loss_fn=loss, eps=0.2)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
#             with ctx_noparamgrad_and_eval(model):
#                 x_adv = attack.perturb(x, y)
#             optimizer.zero_grad()
#             x = torch.cat((x, x_adv))
#             y = torch.cat((y, y))
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            with torch.no_grad():
                for layer in conv_layers:
                    layer.weight *= layer.weight_mask
            return l.item()

        trainer = Engine(train_step)

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
#             x_adv = attack.perturb(x, y)
#             x = torch.cat((x, x_adv))
#             y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
#             x_adv = attack.perturb(x, y)
#             x = torch.cat((x, x_adv))
#             y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq))
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                  .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll, engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy, engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy, engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=100))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 50) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}"
                      .format(engine.state.epoch, iter, len(ds_train), accuracy, nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy, engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy, engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output, engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'], engine.state.epoch)

        @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(to_save, DiskSaver(os.path.join(base_data, model_id),
                                                create_dir=True),
                             score_function=validation_value, score_name="val_acc",
                             global_step_transform=global_step_from_engine(trainer),
                             n_saved=None)

        # kick everything off
        trainer.add_event_handler(Events.ITERATION_COMPLETED(every=valid_freq), handler)
        trainer.run(ds_train, max_epochs=max_epochs)
Ejemplo n.º 5
0
def train(
    trn_path: Path,
    save_dir: Path,
    dev_path: Optional[Path] = None,
    vocab_path: Optional[Path] = None,
    encoding: str = 'utf8',
    lr: float = 1e-3,
    max_epochs: int = 50,
    batch_size: int = 16,
    patience: int = 5,
    numeric: bool = False,
    device: Optional[str] = None,
) -> None:
    logging.info('Creating save directory if not exist in %s', save_dir)
    save_dir.mkdir()

    ### Read/create/load samples and vocab

    trn_samples = read_or_load_samples(trn_path, encoding=encoding)
    vocab = create_or_load_vocab(trn_samples, path=vocab_path)
    dev_samples = None
    if dev_path is not None:
        dev_samples = read_or_load_samples(dev_path,
                                           encoding=encoding,
                                           name='dev')

    ### Numericalize samples

    if not numeric:
        logging.info('Numericalizing train samples')
        trn_samples = list(vocab.apply_to(trn_samples))
        if dev_samples is not None:
            logging.info('Numericalizing dev samples')
            dev_samples = list(vocab.apply_to(dev_samples))

    ### Save vocab and samples

    fnames = ['vocab.pkl', 'train-samples.pkl', 'dev-samples.pkl']
    objs = [vocab, trn_samples]
    if dev_samples is not None:
        objs.append(dev_samples)
    for fname, obj in zip(fnames, objs):
        save_path = save_dir / fname
        logging.info('Saving to %s', save_path)
        with open(save_path, 'wb') as f:
            pickle.dump(obj, f)

    ### Create model, optimizer, and loss fn

    logging.info('Creating language model')
    padding_idx = vocab['words']['<pad>']
    max_width = get_max_filter_width([trn_samples, dev_samples])
    model = create_lm(
        len(vocab['words']),
        len(vocab['chars']),
        padding_idx=padding_idx,
        filter_widths=list(range(1, max_width)),
    )
    logging.info('Model created with %d parameters',
                 sum(p.numel() for p in model.parameters()))
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = LMLoss(padding_idx=padding_idx)

    ### Save model metadata

    metadata_path = save_dir / 'metadata.yml'
    logging.info('Saving model metadata to %s', metadata_path)
    metadata_path.write_text(dump(model), encoding='utf8')

    ### Prepare engines

    def batch2tensors(
        batch: Batch,
        device: Optional[str] = None,
        non_blocking: Optional[bool] = None,
    ) -> Tuple[dict, torch.LongTensor]:
        arr = batch.to_array(pad_with=padding_idx)
        tsr = {
            k: torch.from_numpy(v).to(device=device)
            for k, v in arr.items()
        }
        words = tsr['words'][:, :-1].contiguous()
        chars = tsr['chars'][:, :-1, :].contiguous()
        targets = tsr['words'][:, 1:].contiguous()
        return {'words': words, 'chars': chars}, targets

    trainer = create_supervised_trainer(model,
                                        optimizer,
                                        loss_fn,
                                        device=device,
                                        prepare_batch=batch2tensors)
    trn_evaluator = create_supervised_evaluator(model,
                                                device=device,
                                                prepare_batch=batch2tensors)
    dev_evaluator = create_supervised_evaluator(model,
                                                device=device,
                                                prepare_batch=batch2tensors)

    ### Attach metrics

    loss = Loss(loss_fn,
                batch_size=lambda tgt:
                (tgt != padding_idx).long().sum().item())
    ppl = MetricsLambda(math.exp, loss)
    loss.attach(trn_evaluator, 'loss')
    loss.attach(dev_evaluator, 'loss')
    ppl.attach(trn_evaluator, 'ppl')
    ppl.attach(dev_evaluator, 'ppl')

    ### Attach timers

    epoch_timer = Timer()
    epoch_timer.attach(trainer,
                       start=Events.EPOCH_STARTED,
                       pause=Events.EPOCH_COMPLETED)

    ### Attach progress bars

    trn_pbar = ProgressBar(bar_format=None, unit='batch', desc='Training')
    trn_pbar.attach(trainer,
                    output_transform=lambda loss: {
                        'loss': loss,
                        'ppl': math.exp(loss)
                    })
    eval_pbar = ProgressBar(bar_format=None, unit='sent', desc='Evaluating')
    eval_pbar.attach(trn_evaluator)
    eval_pbar.attach(dev_evaluator)

    ### Attach checkpointers

    if dev_samples is None:
        ckptr_kwargs: dict = {'save_interval': 1, 'n_saved': 5}
        ckptr_engine = trainer
    else:
        ckptr_kwargs = {
            'score_function': lambda eng: -eng.state.metrics['ppl'],
            'score_name': 'dev_ppl'
        }
        ckptr_engine = dev_evaluator
    ckptr = ModelCheckpoint(str(save_dir / 'checkpoints'),
                            'ckpt',
                            save_as_state_dict=True,
                            **ckptr_kwargs)
    ckptr_engine.add_event_handler(Events.EPOCH_COMPLETED, ckptr, {
        'model': model,
        'optimizer': optimizer
    })

    ### Attach early stopper

    if dev_samples is not None:
        early_stopper = EarlyStopping(patience,
                                      lambda eng: -eng.state.metrics['ppl'],
                                      trainer)
        dev_evaluator.add_event_handler(Events.EPOCH_COMPLETED, early_stopper)

    ### Attach custom handlers

    @trainer.on(Events.EPOCH_STARTED)
    def start_epoch(engine: Engine) -> None:
        logging.info('[Epoch %d/%d] Starting', engine.state.epoch,
                     engine.state.max_epochs)

    @trainer.on(Events.EPOCH_COMPLETED)
    def complete_epoch(engine: Engine) -> None:
        epoch = engine.state.epoch
        max_epochs = engine.state.max_epochs
        logging.info('[Epoch %d/%d] Done in %s', epoch, max_epochs,
                     timedelta(seconds=epoch_timer.value()))
        logging.info('[Epoch %d/%d] Evaluating on train corpus', epoch,
                     max_epochs)
        trn_evaluator.run(BatchIterator(trn_samples))
        if dev_samples is not None:
            logging.info('[Epoch %d/%d] Evaluating on dev corpus', epoch,
                         max_epochs)
            dev_evaluator.run(BatchIterator(dev_samples))

    @trn_evaluator.on(Events.COMPLETED)
    @dev_evaluator.on(Events.COMPLETED)
    def print_metrics(engine: Engine) -> None:
        loss = engine.state.metrics['loss']
        ppl = engine.state.metrics['ppl']
        logging.info('||| loss %.4f | ppl %.4f', loss, ppl)

    ### Start training

    iterator = ShuffleIterator(trn_samples, key=lambda s: len(s['words']))
    iterator = BatchIterator(iterator, batch_size=batch_size)
    try:
        trainer.run(iterator, max_epochs=max_epochs)
    except KeyboardInterrupt:
        logging.info('Interrupt detected, aborting training')
        trainer.terminate()
Ejemplo n.º 6
0
def run():
    writer = SummaryWriter()

    CUDA = Config.device
    model = Retriever()
    print(f'Initializing model on {CUDA}')
    model.to(CUDA)
    optimizer = torch.optim.Adam(model.parameters(), lr=Config.LR)
    loss_fn = torch.nn.L1Loss().to(CUDA)
    print(f'Creating sentence transformer')
    encoder = SentenceTransformer(Config.sentence_transformer).to(CUDA)
    for parameter in encoder.parameters():
        parameter.requires_grad = False
    print(f'Loading data')
    if os.path.exists('_full_dump'):
        with open('_full_dump', 'rb') as pin:
            train_loader, train_utts, val_loader, val_utts = pickle.load(pin)
    else:
        data = load_data(Config.data_source)
        train_loader, train_utts, val_loader, val_utts = get_loaders(data, encoder, Config.batch_size)
    
        with open('_full_dump', 'wb') as pout:
            pickle.dump((train_loader, train_utts, val_loader, val_utts), pout, protocol=-1)


    def train_step(engine, batch):
        model.train()
        optimizer.zero_grad()
        x, not_ys, y = batch
        yhat = model(x[0])
        loss = loss_fn(yhat, y)
        gains = loss_fn(not_ys[0], yhat) * Config.negative_weight
        loss -= gains

        loss.backward()
        optimizer.step()
        return loss.item()
    
    def eval_step(engine, batch):
        model.eval()
        with torch.no_grad():
            x, _, y = batch
            yhat = model(x[0])
            return yhat, y
    
    trainer = Engine(train_step)
    trainer.logger = setup_logger('trainer')

    evaluator = Engine(eval_step)
    evaluator.logger = setup_logger('evaluator')
    
    latent_space = BallTree(numpy.array(list(train_utts.keys())))

    l1 = Loss(loss_fn)

    recall = RecallAt(latent_space)

    recall.attach(evaluator, 'recall')
    l1.attach(evaluator, 'l1')
    
    @trainer.on(Events.ITERATION_COMPLETED(every=1000))
    def log_training(engine):
        batch_loss = engine.state.output
        lr = optimizer.param_groups[0]['lr']
        e = engine.state.epoch
        n = engine.state.max_epochs
        i = engine.state.iteration
        print("Epoch {}/{} : {} - batch loss: {}, lr: {}".format(e, n, i, batch_loss, lr))
        writer.add_scalar('Training/loss', batch_loss, i)
    
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        print(f"Training Results - Epoch: {engine.state.epoch} " 
              f" L1: {metrics['l1']:.2f} "
              f" R@1: {metrics['r1']:.2f} "
              f" R@3: {metrics['r3']:.2f} "
              f" R@10: {metrics['r10']:.2f} ")

        for metric, value in metrics.items():
            writer.add_scalar(f'Training/{metric}', value, engine.state.epoch)
        
    #@trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        print(f"Validation Results - Epoch: {engine.state.epoch} "
              f"L1: {metrics['l1']:.2f} " 
              f" R@10: {metrics['r10']:.2f} ")
        for metric, value in metrics.items():
            writer.add_scalar(f'Validation/{metric}', value, engine.state.epoch)
 
    trainer.run(train_loader, max_epochs=Config.max_epochs)

    torch.save(model.state_dict(), Config.checkpoint)
    print(f'Saved checkpoint at {Config.checkpoint}')
    interact(model, encoder, latent_space, train_utts)
Ejemplo n.º 7
0
def training(encoder, decoder, batch_size):
    optimizer_en = optim.Adam(encoder.parameters(), lr=lr)
    scheduler_en = optim.lr_scheduler.ReduceLROnPlateau(optimizer_en,
                                                        'min',
                                                        patience=patience,
                                                        min_lr=min_lr,
                                                        factor=0.1)
    optimizer_de = optim.Adam(decoder.parameters(), lr=lr)
    scheduler_de = optim.lr_scheduler.ReduceLROnPlateau(optimizer_de,
                                                        'min',
                                                        patience=patience,
                                                        min_lr=min_lr,
                                                        factor=0.1)

    def process_function(engine, batch):
        encoder.train()
        decoder.train()
        optimizer_en.zero_grad()
        optimizer_de.zero_grad()
        encoded = encoder(batch)
        decoded = decoder(encoded)
        loss = criterion(decoded, batch)
        loss.backward()

        optimizer_en.step()
        optimizer_de.step()
        return loss.item()

    def eval_function(engine, batch):
        encoder.eval()
        decoder.eval()

        with torch.no_grad():
            encoded = encoder(batch)
            decoded = decoder(encoded)
            return decoded, batch

    trainer = Engine(process_function)
    train_evaluator = Engine(eval_function)
    validation_evaluator = Engine(eval_function)

    metric = Loss(criterion)
    metric.attach(train_evaluator, 'loss')
    metric.attach(validation_evaluator, 'loss')

    pbar = ProgressBar(persist=True, bar_format="")
    pbar.attach(trainer, ['loss'])

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        train_evaluator.run(train_iterator)
        metrics = train_evaluator.state.metrics
        avg_loss = metrics['loss']

        pbar.log_message(
            "Training Results - Epoch: {}  Avg loss: {:.4f}".format(
                engine.state.epoch, avg_loss))

    def log_validation_results(engine):
        validation_evaluator.run(valid_iterator)
        metrics = validation_evaluator.state.metrics
        avg_loss = metrics['loss']
        print(avg_loss)
        print("Current lr: {}".format(optimizer_de.param_groups[0]['lr']))
        scheduler_en.step(avg_loss)
        scheduler_de.step(avg_loss)
        pbar.log_message(
            "Validation Results - Epoch: {}  Avg loss: {:.4f}".format(
                engine.state.epoch, avg_loss))
        pbar.n = pbar.last_print_n = 0

    trainer.add_event_handler(Events.EPOCH_COMPLETED, log_validation_results)

    # Reduce on Plateau
    def average_loss(engine):
        print("Current lr: {}".format(optimizer_de.param_groups[0]['lr']))
        average_loss = engine.state.metrics['loss']
        scheduler_en.step(average_loss)
        scheduler_de.step(average_loss)

    validation_evaluator.add_event_handler(Events.COMPLETED, average_loss)

    # Early Stopping
    def score_function(engine):
        val_loss = engine.state.metrics['loss']
        return -val_loss

    handler = EarlyStopping(patience=100,
                            score_function=score_function,
                            trainer=trainer)
    validation_evaluator.add_event_handler(Events.COMPLETED, handler)

    # Model Checkpoint
    checkpointer = ModelCheckpoint(str(DRIVE_PATH.joinpath('models')),
                                   'review',
                                   save_interval=10,
                                   n_saved=1,
                                   create_dir=False,
                                   save_as_state_dict=True,
                                   require_empty=False)
    trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {
        'encoder': encoder,
        'decoder': decoder
    })

    train_iterator = Data.DataLoader(train_data,
                                     batch_size=batch_size,
                                     shuffle=True,
                                     drop_last=False)
    valid_iterator = Data.DataLoader(val_data,
                                     batch_size=batch_size,
                                     shuffle=True,
                                     drop_last=False)

    trainer.run(train_iterator, max_epochs=500)
Ejemplo n.º 8
0
def run(model, criterion, optimizer, epochs=100, log_interval=10):
    vis = visdom.Visdom(env='ft_lift_ignite')

    train_loader = dataloaders['train']
    val_loader = dataloaders['test']

    # if not vis.check_connection():
    #     raise RuntimeError("Visdom server not running. Please run python -m visdom.server")

    # trainer = create_supervised_trainer(model, optimizer, criterion, device=device)
    # evaluator = create_supervised_evaluator(model,
    #                                         metrics={'accuracy': Accuracy(criterion['label']),
    #                                                  'nll': Loss(criterion['label']),
    #                                                  'precision': Precision(average=True )},
    #                                         device=device)

    def update_model(trainer, batch):
        inputs, labels = batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        #inputs, labels = _prepare_batch(batch, device=device)
        optimizer.zero_grad()
        class_output, structured_output = model(inputs)
        loss = criterion['label'](class_output, labels)+criterion['structured'](structured_output, labels)
        loss.backward()
        optimizer.step()
        return {'loss': loss.item(), 
                 'class_output': class_output, 
                 'structured_output': structured_output, 
                 #'inputs': inputs, 
                 'labels': labels}
    trainer = Engine(update_model)

    # def _prepare_batch(batch, device=None, non_blocking=False):
    #     """Prepare batch for training: pass to a device with options

    #     """
    #     x, y = batch
    #     return (convert_tensor(x, device=device, non_blocking=non_blocking),
    #             convert_tensor(y, device=device, non_blocking=non_blocking))

    def _inference(evaluator, batch):
        model.eval()
        with torch.no_grad():
            inputs, labels = batch
            inputs = inputs.to(device)
            labels = labels.to(device)
            class_output, structured_output = model(inputs)
            loss = criterion['label'](class_output, labels)+criterion['structured'](structured_output, labels)
            return {'loss': loss.item(), 
                 'class_output': class_output, 
                 'structured_output': structured_output, 
                 #'inputs': inputs, 
                 'labels': labels}
    evaluator = Engine(_inference)

    output_transform1 = lambda data: (data['class_output'], data['labels'])
    output_transform2 = lambda data: (data['structured_output'], data['labels'])

    metric_accuracy = Accuracy(output_transform=output_transform1)
    metric_accuracy.attach(evaluator, 'accuracy')

    metric_nll = Loss(criterion['label'], output_transform=output_transform1)
    metric_nll.attach(evaluator, 'nll')

    metric_precision = Precision(average=True, output_transform=output_transform1)
    metric_precision.attach(evaluator, 'precision')

    # evaluator = create_supervised_evaluator(model,
    #                                     metrics={'accuracy': Accuracy(output_transform=output_transform1),
    #                                                 'nll': Loss(criterion['label'], output_transform=output_transform1),
    #                                                 'precision': Precision(average=True, output_transform=output_transform1)},
    #                                     device=device)

    handler = ModelCheckpoint('/1116/tmp/lift_models', 'myprefix', save_interval=1, n_saved=150, require_empty=False, create_dir=True)

    train_loss_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Loss')
    train_avg_loss_window = create_plot_window(vis, '#Iterations', 'Loss', 'Training Average Loss')
    train_avg_accuracy_window = create_plot_window(vis, '#Iterations', 'Accuracy', 'Training Average Accuracy')
    train_avg_precision_window = create_plot_window(vis, '#Iterations', 'Precision', 'Training Average Precision')
    val_avg_loss_window = create_plot_window(vis, '#Epochs', 'Loss', 'Validation Average Loss')
    val_avg_accuracy_window = create_plot_window(vis, '#Epochs', 'Accuracy', 'Validation Average Accuracy')
    val_avg_precision_window = create_plot_window(vis, '#Epochs', 'Precision', 'Validation Average Precison')
    
    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        iter = (engine.state.iteration - 1) % len(train_loader) + 1
        if iter % log_interval == 0:
            print("Epoch[{}] Iteration[{}/{}] Loss: {:.2f}"
                  "".format(engine.state.epoch, iter, len(train_loader), engine.state.output['loss']))
            vis.line(X=np.array([engine.state.iteration]),
                     Y=np.array([engine.state.output['loss']]),
                     update='append', win=train_loss_window)
    
    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        evaluator.run(train_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_nll = metrics['nll']
        avg_precision = metrics['precision']
        print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f} Avg Precision: {:.2f}"
              .format(engine.state.epoch, avg_accuracy, avg_nll, avg_precision))
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_accuracy]),
                 win=train_avg_accuracy_window, update='append')
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_nll]),
                 win=train_avg_loss_window, update='append')
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_precision]),
                 win=train_avg_precision_window, update='append')

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(val_loader)
        metrics = evaluator.state.metrics
        avg_accuracy = metrics['accuracy']
        avg_nll = metrics['nll']
        avg_precision = metrics['precision']
        print("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f} Avg Precision: {:.2f}"
              .format(engine.state.epoch, avg_accuracy, avg_nll, avg_precision))
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_accuracy]),
                 win=val_avg_accuracy_window, update='append')
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_nll]),
                 win=val_avg_loss_window, update='append')
        vis.line(X=np.array([engine.state.epoch]), Y=np.array([avg_precision]),
                 win=val_avg_precision_window, update='append')        
                 
    
    # kick everything off
    trainer.add_event_handler(Events.EPOCH_COMPLETED, handler, {'mymodel': model})
    trainer.run(train_loader, max_epochs=epochs)
Ejemplo n.º 9
0
def adv_train_loop(model,
                   params,
                   ds,
                   min_y,
                   base_data,
                   model_id,
                   attack_type,
                   device,
                   batch_size,
                   max_epochs=5):
    print('training adversarial:', attack_type)
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y
    original_model = copy.deepcopy(
        model)  # used to generate adv images for the trained model
    original_model.eval()
    model = copy.deepcopy(
        model)  # making a copy so that original model is not changed
    model = model.to(device)
    model_id = f'{model_id}_{attack_type}'

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        classifier = PyTorchClassifier(
            model=original_model,
            clip_values=(0, 1),
            loss=nn.CrossEntropyLoss(),
            optimizer=optimizer,
            input_shape=(3, 64, 64),
            nb_classes=200,
        )

        attack = None

        #         if attack_type == "fgsm":
        #             attack = FastGradientMethod(estimator=classifier, eps=0.2)
        #         elif attack_type == "bim":
        #             attack = BasicIterativeMethod(estimator=classifier, eps=0.2)
        #         elif attack_type == "carlini":
        #             attack = CarliniLInfMethod(classifier=classifier)
        #         elif attack_type == "deepfool":
        #             attack = DeepFool(classifier=classifier)
        if attack_type == "fgsm":
            attack = GradientSignAttack(model, loss_fn=loss, eps=0.2)
        elif attack_type == "ffa":
            attack = FastFeatureAttack(model, loss_fn=loss, eps=0.3)
        elif attack_type == "carlini":
            attack = CarliniWagnerL2Attack(model, 200, max_iterations=1000)
        elif attack_type == "lbfgs":
            attack = DeepFool(classifier=classifier)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            with ctx_noparamgrad_and_eval(model):
                x_adv = attack.perturb(x, y)
            optimizer.zero_grad()
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            #             return ans, y
            return l.item()

        trainer = Engine(train_step)

        #         acc_metric.attach(trainer, "accuracy")
        #         loss_metric.attach(trainer, 'loss')

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            x_adv = attack.perturb(x, y)
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
            x_adv = attack.perturb(x, y)
            x = torch.cat((x, x_adv))
            y = torch.cat((y, y))
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=50))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            # metrics = engine.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 50) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

#         @trainer.on(Events.EPOCH_COMPLETED)
#         def log_training_results(engine):
#             train_evaluator.run(ds_train)
#             metrics = train_evaluator.state.metrics
#             # metrics = engine.state.metrics
#             avg_accuracy = metrics['accuracy']
#             avg_nll = metrics['loss']
#             print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
#                   .format(engine.state.epoch, avg_accuracy, avg_nll))
#             writer.add_scalar("training/avg_loss", avg_nll, engine.state.epoch)
#             writer.add_scalar("training/avg_accuracy",
#                               avg_accuracy, engine.state.epoch)
#             writer.add_scalar("training/avg_error", 1. -
#                               avg_accuracy, engine.state.epoch)

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
Ejemplo n.º 10
0
    model.eval()
    with torch.no_grad():
        video, class_num = batch["video"].cuda(), batch["class"].cuda()
        pred = model(video)
        pred = F.softmax(pred, dim=1)
        # torch.cuda.empty_cache()

    return pred, class_num


evaluator = Engine(validation_step)

accuracy_metric = Accuracy()
accuracy_metric.attach(evaluator, "accuracy")
ce_loss_metric = Loss(ce_loss_fn)
ce_loss_metric.attach(evaluator, "loss")


@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(engine):
    e = engine.state.epoch
    i = engine.state.iteration
    loss = engine.state.output
    print(f"Epoch: {e} / {cfg.epochs} : {i} - Loss: {loss:.5f}")
    # if wandb_online:
    #   wandb.log({"loss": loss})


@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(engine):
    state = evaluator.run(train_loader)
Ejemplo n.º 11
0
def prune_train_loop(model,
                     params,
                     ds,
                     min_y,
                     base_data,
                     model_id,
                     prune_type,
                     device,
                     batch_size,
                     max_epochs=5):
    assert prune_type in ['global_unstructured', 'structured']
    total_prune_amount = 0.3 if prune_type == 'global_unstructured' else 0.1
    ds_train, ds_valid = ds
    min_y_train, min_y_val = min_y
    model_id = f'{model_id}_{prune_type}_pruning'

    conv_layers = [model.conv1]
    for sequential in [model.layer1, model.layer2, model.layer3, model.layer4]:
        for bottleneck in sequential:
            conv_layers.extend(
                [bottleneck.conv1, bottleneck.conv2, bottleneck.conv3])

    def prune_model(model):
        remove_amount = total_prune_amount / (max_epochs * 10)
        print(f'pruned model by {remove_amount}')
        if prune_type == 'global_unstructured':
            parameters_to_prune = [(layer, 'weight') for layer in conv_layers]
            prune.global_unstructured(
                parameters_to_prune,
                pruning_method=prune.L1Unstructured,
                amount=remove_amount,
            )
        else:
            for layer in conv_layers:
                prune.ln_structured(layer,
                                    name='weight',
                                    amount=remove_amount,
                                    n=1,
                                    dim=0)

    prune_model(model)

    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            optimizer.zero_grad()
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            with torch.no_grad():
                for layer in conv_layers:
                    layer.weight *= layer.weight_mask  # make sure pruned weights stay 0
            return l.item()

        trainer = Engine(train_step)

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

            prune_model(model)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=50))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 50) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

        @trainer.on(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(
            Events.ITERATION_COMPLETED(every=200 * 5000 // batch_size // 10),
            handler)
        trainer.run(ds_train, max_epochs=max_epochs)
Ejemplo n.º 12
0
def train(name, load, lrate, weight_decay, workers, smooth, device, validation,
          ground_truth):

    if not name:
        name = '{}_{}'.format(lrate, weight_decay)
    click.echo('model output name: {}'.format(name))

    torch.set_num_threads(1)

    train_set = BaselineSet(glob.glob('{}/**/*.seeds.png'.format(ground_truth),
                                      recursive=True),
                            smooth=smooth)
    train_data_loader = DataLoader(dataset=train_set,
                                   num_workers=workers,
                                   batch_size=1,
                                   shuffle=True,
                                   pin_memory=True)
    val_set = BaselineSet(glob.glob('{}/**/*.seeds.png'.format(validation),
                                    recursive=True),
                          smooth=smooth)
    val_data_loader = DataLoader(dataset=val_set,
                                 num_workers=workers,
                                 batch_size=1,
                                 pin_memory=True)

    click.echo('loading network')
    model = ResUNet(refine_encoder=False).to(device)

    if load:
        click.echo('loading weights')
        model = torch.load(load, map_location=device)

    criterion = nn.BCEWithLogitsLoss()
    opti = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                      lr=lrate,
                      weight_decay=weight_decay)

    def score_function(engine):
        val_loss = engine.state.metrics['loss']
        return -val_loss

    def output_preprocess(output):
        o, target = output
        o = torch.sigmoid(o)
        o = denoising_hysteresis_thresh(o.detach().squeeze().cpu().numpy(),
                                        0.8, 0.9, 2.5)
        return torch.from_numpy(o.astype('f')).unsqueeze(0).unsqueeze(0).to(
            device), target.double().to(device)

    trainer = create_supervised_trainer(model,
                                        opti,
                                        criterion,
                                        device=device,
                                        non_blocking=True)
    accuracy = Accuracy(output_transform=output_preprocess)
    precision = Precision(output_transform=output_preprocess)
    recall = Recall(output_transform=output_preprocess)
    loss = Loss(criterion)
    precision = Precision(average=False)
    recall = Recall(average=False)
    f1 = (precision * recall * 2 / (precision + recall)).mean()

    evaluator = create_supervised_evaluator(model,
                                            device=device,
                                            non_blocking=True)

    accuracy.attach(evaluator, 'accuracy')
    precision.attach(evaluator, 'precision')
    recall.attach(evaluator, 'recall')
    loss.attach(evaluator, 'loss')
    f1.attach(evaluator, 'f1')

    ckpt_handler = ModelCheckpoint('.',
                                   name,
                                   save_interval=1,
                                   n_saved=10,
                                   require_empty=False)
    RunningAverage(output_transform=lambda x: x).attach(trainer, 'loss')

    progress_bar = ProgressBar(persist=True)
    progress_bar.attach(trainer, ['loss'])

    trainer.add_event_handler(event_name=Events.EPOCH_COMPLETED,
                              handler=ckpt_handler,
                              to_save={'net': model})
    trainer.add_event_handler(event_name=Events.ITERATION_COMPLETED,
                              handler=TerminateOnNan())

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        evaluator.run(val_data_loader)
        metrics = evaluator.state.metrics
        progress_bar.log_message(
            'eval results - epoch {} loss: {:.4f} f1: {:.4f}, accuracy: {:.4f} recall: {:.4f} precision {:.4f}'
            .format(engine.state.epoch, metrics['loss'], metrics['f1'],
                    metrics['accuracy'], metrics['recall'],
                    metrics['precision']))

    trainer.run(train_data_loader, max_epochs=1000)
    def _setup(self, config):
        training_params = config['tp']
        hyper_params = config['hyper-params']
        self.name = training_params['name']
        self.loss_fn = training_params['loss_fn']
        self.device = training_params['device']
        self.n_it_max = training_params['n_it_max']
        self.split_names = training_params['split_names']

        self.model = torch.load(training_params['model_path'])
        self.datasets = self._load_datasets(training_params['data_path'],
                                            training_params['loss_fn'],
                                            training_params['past_tasks'])
        self.batch_sizes = training_params['batch_sizes']
        data_loaders = self._get_dataloaders(self.datasets, self.batch_sizes)
        self.train_loader, self.eval_loaders = data_loaders

        self.named_eval_loaders = OrderedDict(
            zip(self.split_names, self.eval_loaders))

        training_params['optim_func'].func.keywords['lr'] = hyper_params['lr']
        training_params['optim_func'].func.keywords['weight_decay'] = \
        hyper_params['wd']
        if 'dropout' in hyper_params:
            set_dropout(self.model, hyper_params['dropout'])

        # optim_func.keywords['momentum'] = config['momentum']
        self.optim = training_params['optim_func'](self.model.parameters())

        self.log_interval = training_params.get('log_interval', 30)
        self.log_steps = training_params['log_steps'].copy()
        if self.log_steps is None:
            self.log_steps = []
        if training_params['log_epoch']:
            self.log_steps.append(len(self.train_loader))
        self.n_iterations = 0
        self.n_epochs = 0
        self.n_steps = 0

        # For early stopping
        self.patience = training_params['patience']
        self.counter = 0
        self.best_score = None

        self.best_loss = float('inf')

        self.trainer = \
            create_supervised_trainer(self.model, self.optim,
                                      training_params['loss_fn'],
                                      device=self.device,
                                      output_transform=
                                      lambda x, y, y_pred, loss: (
                                          y_pred, y))
        self.trainer._logger.setLevel(logging.WARNING)

        l = Loss(lambda y_pred, y: self.loss_fn(y_pred, y).mean())
        l.attach(self.trainer, 'train_loss')
        self.trainer.add_event_handler(Events.ITERATION_COMPLETED,
                                       l.completed, 'train_loss')
        StopAfterIterations(self.log_steps).attach(self.trainer)

        self.eval_metrics = {'nll': Loss(lambda y_pred, y:
                                         self.loss_fn(y_pred, y).mean())}
        for i in range(self.model.n_out):
            self.eval_metrics['accuracy_{}'.format(i)] = \
                Accuracy(output_transform=get_attr_transform(i))

        self.evaluator = \
            create_supervised_evaluator(self.model,
                                        metrics=self.eval_metrics,
                                        device=self.device)

        self.all_accuracies = defaultdict(dict)
Ejemplo n.º 14
0
    def _add_metrics(self):
        train_loss = RunningAverage(Loss(self.get_loss))
        train_loss.attach(self.trainer, 'avg_train_loss')

        val_loss = Loss(self.get_loss)
        val_loss.attach(self.evaluator, 'val_loss')
Ejemplo n.º 15
0
    def __init__(self,
                 optimizer: OptimizerType,
                 train_loader: DataLoaderType,
                 model: torch.nn.Module,
                 train_engine: Optional[ignite.engine.Engine] = None,
                 test_engine: Optional[ignite.engine.Engine] = None,
                 test_loader: Optional[DataLoaderType] = None,
                 loss_fn: Optional[LossFnType] = None,
                 eval_metric: Optional[ignite.metrics.Metric] = None,
                 descending: bool = True,
                 device: str = 'cuda') -> None:

        super().__init__()
        self.descending = descending
        self.optimizer: OptimizerType = optimizer
        self.model: Optional[torch.nn.Module] = model
        self.train_engine: ignite.engine.Engine
        self.train_loader: DataLoaderType = train_loader
        self.test_loader: Optional[DataLoaderType] = test_loader
        self.test_engine: Optional[ignite.engine.Engine]

        # create the train engine if necessary
        # if so, build it from  the model and loss_fn
        if train_engine is None and model is None:
            raise TypeError('either train_engine or model have to be provided')
        if train_engine is not None:
            self.train_engine = train_engine  # directly use it
        elif model is not None:
            if loss_fn is None:
                raise TypeError(
                    'loss_fn has to be provided if passing a plain pytorch model'
                )
            self.train_engine = ignite.engine.create_supervised_trainer(
                model,
                optimizer,
                loss_fn=loss_fn,
                device=device,
                non_blocking=True)

        # get the metric to use
        new_metric = None
        if eval_metric is not None:
            new_metric = eval_metric
        elif loss_fn is not None:
            # use the given eval_metric if provided, but fallback
            # to using the loss averaged over the entire epoch
            new_metric = Loss(loss_fn)

        # if the test loader is present, then we need an engine for training
        if test_loader is not None:
            # test engine is needed only if we have a test loader
            if test_engine is None:
                if eval_metric is None:
                    if loss_fn is None:
                        # error if no metric or loss_fn
                        raise TypeError(
                            'loss_fn has to be provided if using the default evaluator and not '
                            'providing a metric')
                if model is None:
                    raise TypeError(
                        'model must be provided if using the default evaluator'
                    )

                # create a default test engine
                self.test_engine = ignite.engine.create_supervised_evaluator(
                    model,
                    metrics={'loss': new_metric},
                    device=device,
                    non_blocking=True)
            else:
                self.test_engine = test_engine  # use the specified engine
                # attach a new metric if present
                if new_metric is not None:
                    new_metric.attach(self.test_engine, 'loss')
        else:
            self.test_engine = None  # no need for a test engine if no test loader specified
Ejemplo n.º 16
0
def prune_train_loop(model,
                     params,
                     ds,
                     dset,
                     min_y,
                     base_data,
                     model_id,
                     prune_type,
                     device,
                     batch_size,
                     tpa,
                     max_epochs=2):
    assert prune_type in ['global_unstructured', 'structured']
    total_prune_amount = tpa
    ds_train, ds_valid = ds
    train_set, valid_set = dset
    min_y_train, min_y_val = min_y
    model_id = f'{model_id}_{prune_type}_pruning_{tpa}'
    valid_freq = 200 * 500 // batch_size // 3

    conv_layers = [model.conv1]

    def prune_model(model):
        #         remove_amount = total_prune_amount // (max_epochs)
        remove_amount = total_prune_amount
        print(f'pruned model by {remove_amount}')
        worst = select_filters(model, ds_valid, valid_set, remove_amount,
                               device)
        worst = [
            k
            for k in Counter(torch.stack(worst).view(-1).cpu().numpy()).keys()
        ]
        worst.sort(reverse=True)
        print(worst)
        for layer in conv_layers:
            for d in worst:
                TuckerStructured(layer, name='weight', amount=0, dim=0, filt=d)
        return worst

    bad = prune_model(model)
    zeros = []
    wrong = []
    for i in range(len(model.conv1.weight_mask)):
        if torch.sum(model.conv1.weight_mask[i]) == 0.0:
            zeros.append(i)
    zeros.sort(reverse=True)
    if zeros == bad:
        print("correctly zero'd filters")
    else:
        if len(zeros) == len(bad):
            for i in range(len(zeros)):
                if zeros[i] != bad[i]:
                    wrong.append((bad[i], zeros[i]))
            print(wrong)
        else:
            print("diff number filters zero'd", zeros)
    with create_summary_writer(model,
                               ds_train,
                               base_data,
                               model_id,
                               device=device) as writer:
        lr = params['lr']
        mom = params['momentum']
        wd = params['l2_wd']
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=mom,
                                    weight_decay=wd)
        sched = ReduceLROnPlateau(optimizer, factor=0.5, patience=5)
        funcs = {'accuracy': Accuracy(), 'loss': Loss(F.cross_entropy)}
        loss = funcs['loss']._loss_fn

        acc_metric = Accuracy(device=device)
        loss_metric = Loss(F.cross_entropy, device=device)

        acc_val_metric = Accuracy(device=device)
        loss_val_metric = Loss(F.cross_entropy, device=device)

        def train_step(engine, batch):
            model.train()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            optimizer.zero_grad()
            ans = model.forward(x)
            l = loss(ans, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            with torch.no_grad():
                for layer in conv_layers:
                    layer.weight *= layer.weight_mask  # make sure pruned weights stay 0
            return l.item()

        trainer = Engine(train_step)

        def train_eval_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_train
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        train_evaluator = Engine(train_eval_step)
        acc_metric.attach(train_evaluator, "accuracy")
        loss_metric.attach(train_evaluator, 'loss')

        def validation_step(engine, batch):
            model.eval()
            x, y = batch
            x = x.to(device)
            y = y.to(device) - min_y_val
            with torch.no_grad():
                ans = model.forward(x)
            return ans, y

        valid_evaluator = Engine(validation_step)
        acc_val_metric.attach(valid_evaluator, "accuracy")
        loss_val_metric.attach(valid_evaluator, 'loss')

        @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq))
        #         @trainer.on(Events.ITERATION_COMPLETED)
        def log_validation_results(engine):
            valid_evaluator.run(ds_valid)
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            avg_nll = metrics['loss']
            print(
                "Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
                .format(engine.state.epoch, valid_avg_accuracy, avg_nll))
            writer.add_scalar("validation/avg_loss", avg_nll,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_accuracy", valid_avg_accuracy,
                              engine.state.epoch)
            writer.add_scalar("validation/avg_error", 1. - valid_avg_accuracy,
                              engine.state.epoch)

#             prune_model(model)

        @trainer.on(Events.EPOCH_COMPLETED)
        def lr_scheduler(engine):
            metrics = valid_evaluator.state.metrics
            avg_nll = metrics['accuracy']
            sched.step(avg_nll)

        @trainer.on(Events.ITERATION_COMPLETED(every=100))
        def log_training_loss(engine):
            batch = engine.state.batch
            ds = DataLoader(TensorDataset(*batch), batch_size=batch_size)
            train_evaluator.run(ds)
            metrics = train_evaluator.state.metrics
            accuracy = metrics['accuracy']
            nll = metrics['loss']
            iter = (engine.state.iteration - 1) % len(ds_train) + 1
            if (iter % 100) == 0:
                print("Epoch[{}] Iter[{}/{}] Accuracy: {:.2f} Loss: {:.2f}".
                      format(engine.state.epoch, iter, len(ds_train), accuracy,
                             nll))
            writer.add_scalar("batchtraining/detloss", nll, engine.state.epoch)
            writer.add_scalar("batchtraining/accuracy", accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/error", 1. - accuracy,
                              engine.state.iteration)
            writer.add_scalar("batchtraining/loss", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_lr(engine):
            writer.add_scalar("lr", optimizer.param_groups[0]['lr'],
                              engine.state.epoch)

        @trainer.on(Events.ITERATION_COMPLETED(every=valid_freq))
        def validation_value(engine):
            metrics = valid_evaluator.state.metrics
            valid_avg_accuracy = metrics['accuracy']
            return valid_avg_accuracy

        to_save = {'model': model}
        handler = Checkpoint(
            to_save,
            DiskSaver(os.path.join(base_data, model_id), create_dir=True),
            score_function=validation_value,
            score_name="val_acc",
            global_step_transform=global_step_from_engine(trainer),
            n_saved=None)

        # kick everything off
        trainer.add_event_handler(Events.ITERATION_COMPLETED(every=valid_freq),
                                  handler)
        trainer.run(ds_train, max_epochs=max_epochs)
Ejemplo n.º 17
0
    def __call__(self) -> float:
        self.logger = logging.getLogger()
        if self.options.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)
        fh = logging.FileHandler(
            os.path.join(self.options.checkpoints_dir, "trainer.log"))
        formatter = logging.Formatter(
            "%(asctime)s - %(threadName)s - %(levelname)s: %(message)s")
        fh.setFormatter(formatter)
        self.logger.addHandler(fh)

        self.logger.info("Creating trainer with the following options:")
        for key, value in vars(self.options).items():
            if key == "device":
                value = value.type
            elif key == "gpu_ids":
                value = "cuda : " + str(value) if torch.cuda.is_available(
                ) else "cpu"
            self.logger.info(
                f"    {key:>25}: {'None' if value is None else value:<30}")

        # Create Reconstructor Model
        self.reconstructor = models.reconstruction.ReconstructorNetwork(
            number_of_cascade_blocks=self.options.number_of_cascade_blocks,
            n_downsampling=self.options.n_downsampling,
            number_of_filters=self.options.number_of_reconstructor_filters,
            number_of_layers_residual_bottleneck=self.options.
            number_of_layers_residual_bottleneck,
            mask_embed_dim=self.options.mask_embed_dim,
            dropout_probability=self.options.dropout_probability,
            img_width=self.options.image_width,
            use_deconv=self.options.use_deconv,
        )

        if self.options.device.type == "cuda":
            self.reconstructor = torch.nn.DataParallel(self.reconstructor).to(
                self.options.device)
        self.optimizers = {
            "G":
            optim.Adam(
                self.reconstructor.parameters(),
                lr=self.options.lr,
                betas=(self.options.beta1, 0.999),
            )
        }

        # Create Evaluator Model
        if self.options.use_evaluator:
            self.evaluator = models.evaluator.EvaluatorNetwork(
                number_of_filters=self.options.number_of_evaluator_filters,
                number_of_conv_layers=self.options.
                number_of_evaluator_convolution_layers,
                use_sigmoid=False,
                width=self.options.image_width,
                height=640 if self.options.dataroot == "KNEE_RAW" else None,
                mask_embed_dim=self.options.mask_embed_dim,
            )
            self.evaluator = torch.nn.DataParallel(self.evaluator).to(
                self.options.device)

            self.optimizers["D"] = optim.Adam(
                self.evaluator.parameters(),
                lr=self.options.lr,
                betas=(self.options.beta1, 0.999),
            )

        train_loader, val_loader = self.get_loaders()

        self.load_from_checkpoint_if_present()
        self.load_weights_from_given_checkpoint()

        writer = SummaryWriter(self.options.checkpoints_dir)

        # Training engine and handlers
        train_engine = Engine(lambda engine, batch: self.update(batch))
        val_engine = Engine(lambda engine, batch: self.inference(batch))

        validation_mse = Loss(
            loss_fn=F.mse_loss,
            output_transform=lambda x: (
                x["reconstructed_image_magnitude"],
                x["ground_truth_magnitude"],
            ),
        )
        validation_mse.attach(val_engine, name="mse")

        validation_ssim = Loss(
            loss_fn=util.common.compute_ssims,
            output_transform=lambda x: (
                x["reconstructed_image_magnitude"],
                x["ground_truth_magnitude"],
            ),
        )
        validation_ssim.attach(val_engine, name="ssim")

        if self.options.use_evaluator:
            validation_loss_d = Loss(
                loss_fn=self.discriminator_loss,
                output_transform=lambda x: (
                    x["reconstructor_eval"],
                    x["ground_truth_eval"],
                    {
                        "reconstructed_image": x["reconstructed_image"],
                        "target": x["ground_truth"],
                        "mask": x["mask"],
                    },
                ),
            )
            validation_loss_d.attach(val_engine, name="loss_D")

        progress_bar = ProgressBar()
        progress_bar.attach(train_engine)

        train_engine.add_event_handler(
            Events.EPOCH_COMPLETED,
            run_validation_and_update_best_checkpoint,
            val_engine=val_engine,
            progress_bar=progress_bar,
            val_loader=val_loader,
            trainer=self,
        )

        # Tensorboard Plots
        @train_engine.on(Events.ITERATION_COMPLETED)
        def plot_training_loss(engine):
            writer.add_scalar(
                "training/generator_loss",
                engine.state.output["loss_G"],
                self.updates_performed,
            )
            if "loss_D" in engine.state.output:
                writer.add_scalar(
                    "training/discriminator_loss",
                    engine.state.output["loss_D"],
                    self.updates_performed,
                )

        @train_engine.on(Events.EPOCH_COMPLETED)
        def plot_validation_loss(_):
            writer.add_scalar("validation/MSE",
                              val_engine.state.metrics["mse"],
                              self.completed_epochs)
            writer.add_scalar(
                "validation/SSIM",
                val_engine.state.metrics["ssim"],
                self.completed_epochs,
            )
            if "loss_D" in val_engine.state.metrics:
                writer.add_scalar(
                    "validation/loss_D",
                    val_engine.state.metrics["loss_D"],
                    self.completed_epochs,
                )

        @train_engine.on(Events.EPOCH_COMPLETED)
        def plot_validation_images(_):
            ground_truth = val_engine.state.output["ground_truth_magnitude"]
            zero_filled_image = val_engine.state.output[
                "zero_filled_image_magnitude"]
            reconstructed_image = val_engine.state.output[
                "reconstructed_image_magnitude"]
            uncertainty_map = val_engine.state.output["uncertainty_map"]
            difference = torch.abs(ground_truth - reconstructed_image)

            # Create plots
            ground_truth = util.common.create_grid_from_tensor(ground_truth)
            writer.add_image("validation_images/ground_truth", ground_truth,
                             self.completed_epochs)

            zero_filled_image = util.common.create_grid_from_tensor(
                zero_filled_image)
            writer.add_image(
                "validation_images/zero_filled_image",
                zero_filled_image,
                self.completed_epochs,
            )

            reconstructed_image = util.common.create_grid_from_tensor(
                reconstructed_image)
            writer.add_image(
                "validation_images/reconstructed_image",
                reconstructed_image,
                self.completed_epochs,
            )

            uncertainty_map = util.common.gray2heatmap(
                util.common.create_grid_from_tensor(uncertainty_map.exp()),
                cmap="jet",
            )
            writer.add_image(
                "validation_images/uncertainty_map",
                uncertainty_map,
                self.completed_epochs,
            )

            difference = util.common.create_grid_from_tensor(difference)
            difference = util.common.gray2heatmap(difference, cmap="gray")
            writer.add_image("validation_images/difference", difference,
                             self.completed_epochs)

            mask = util.common.create_grid_from_tensor(
                val_engine.state.output["mask"].repeat(
                    1, 1, val_engine.state.output["mask"].shape[3], 1))
            writer.add_image("validation_images/mask_image", mask,
                             self.completed_epochs)

        train_engine.add_event_handler(
            Events.EPOCH_COMPLETED,
            save_regular_checkpoint,
            trainer=self,
            progress_bar=progress_bar,
        )

        train_engine.run(train_loader,
                         self.options.max_epochs - self.completed_epochs)

        writer.close()

        return self.best_validation_score