Ejemplo n.º 1
0
def create_baseline_trainer(model, optimizer=None, name='train', device=None):

    if device is not None:
        model.to(device)

    is_train = optimizer is not None

    def _update(engine, batch):
        model.train(is_train)

        with torch.set_grad_enabled(is_train):
            images, labels = convert_tensor(batch, device=device)
            preds = model(images)
            loss = F.cross_entropy(preds, labels)

        if is_train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        return {'loss': loss.item(), 'y_pred': preds, 'y': labels}

    engine = Engine(_update)
    engine.name = name
    metrics.Average(lambda o: o['loss']).attach(engine, 'single_loss')
    metrics.Accuracy(lambda o: (o['y_pred'], o['y'])).attach(
        engine, 'single_acc')
    return engine
Ejemplo n.º 2
0
 def more_metrics(self, metrics_: OrderedDict):
     metrics_['loss'] = metrics.Loss(nn.CrossEntropyLoss())
     metrics_['accuracy'] = metrics.Accuracy()
     metrics_['recall'] = metrics.Recall()
     metrics_['precision'] = metrics.Precision()
     metrics_['confusion_matrix'] = metrics.ConfusionMatrix(
         8, average='recall')
Ejemplo n.º 3
0
def get_metric(metric):
    if metric == 'mse':
        return M.MeanSquaredError()
    elif metric == 'xent':
        return M.Loss(nn.CrossEntropyLoss())
    elif metric == 'acc':
        return M.Accuracy()
    raise ValueError('Unrecognized metric {}.'.format(metric))
Ejemplo n.º 4
0
        def get_metrics_fn() -> Dict[str, _metrics.Metric]:
            def rounded_transform(output):
                y_pred, y = output
                return torch.round(y_pred), y

            transform = rounded_transform
            accuracy = _metrics.Accuracy(transform, device=self.device)
            precision = _metrics.Precision(transform, device=self.device)
            recall = _metrics.Recall(transform, device=self.device)
            f1 = precision * recall * 2 / (precision + recall + 1e-20)
            return {
                'loss': _metrics.Loss(loss_fn),
                'accuracy': accuracy,
                'precision': precision,
                'recall': recall,
                'f1': f1
            }
Ejemplo n.º 5
0
def get_metric(metric):
    name = metric['name']
    params = metric['params']
    if name == 'mse':
        return M.MeanSquaredError(**params)
    elif name == 'vae':
        return M.Loss(GaussianVAELoss(**params))
    elif name == 'kl-div':
        return M.Loss(GaussianKLDivergence(**params))
    elif name == 'recons_nll':
        return M.Loss(ReconstructionNLL(**params))
    elif name == 'bxent':
        return M.Loss(nn.BCEWithLogitsLoss(**params))
    elif name == 'xent':
        return M.Loss(nn.CrossEntropyLoss(**params))
    elif name == 'acc':
        return M.Accuracy(**params)
    raise ValueError('Unrecognized metric {}.'.format(metric))
Ejemplo n.º 6
0
def run_ort(args, params, create_new_dir=True):
    reset_global_worth_manager()
    params = params.copy()
    params["model_dir"] = os.path.join(args["model_dir"], params["exp_name"])

    if create_new_dir:
        timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
        params["model_dir"] = os.path.join(params["model_dir"], timestamp)

    os.makedirs(params["model_dir"], exist_ok=True)
    init_logger(params["model_dir"])

    logger = logging.getLogger(__name__)

    # Save model parameter settings to model directory.
    with open(os.path.join(params["model_dir"], "model_params.json"),
              "w") as out_fh:
        json.dump(params, out_fh, indent=2, skipkeys=True)

    logger.info("Model parameters:")
    logger.info(json.dumps(params, indent=2, sort_keys=True))

    # Make parameter objects that aren't json serializable.
    params["device"] = torch.device(params["device"])
    # dtype_dict = {
    #     "float32": torch.float32,
    #     "float64": torch.float64
    # }
    # params["dtype"] = dtype_dict[params["dtype"]]

    dataset = FashionMNIST(params)

    # Construct the estimator.
    # Config that prevents TF from allocating the whole GPU memory.
    # config = tf.ConfigProto(
    #     allow_soft_placement=True,
    #     log_device_placement=False
    # )
    # config.gpu_options.allow_growth = True

    # Calculate the number of steps between summaries, so that summaries per epoch stays the same.
    summaries_per_epoch = 3
    save_summary_steps = math.ceil(dataset.steps_per_epoch /
                                   summaries_per_epoch)
    params["total_steps"] = dataset.steps_per_epoch * params["train_epochs"]

    get_worth_manager().load_hparams(params)

    model = FFNN(params).to(params["device"])

    estimator_config = EstimatorConfig(model_dir=params["model_dir"],
                                       device=params["device"],
                                       save_summary_steps=save_summary_steps
                                       # evaluate_steps=1000
                                       )
    estimator = Estimator(model=model,
                          params=params,
                          config=estimator_config,
                          eval_data_iter=dataset.get_eval_iterator())
    estimator.add_metric("accuracy", metrics.Accuracy())

    logger.info(model)

    train_loop(params, estimator, dataset)
Ejemplo n.º 7
0
class Metrics(enum.Enum):
    train_class_metrics: t.Dict[str, im.Metric] = {
        'acc_1':
        im.RunningAverage(im.Accuracy(output_transform=lambda x: x[1:3])),
        'acc_5':
        im.RunningAverage(
            im.TopKCategoricalAccuracy(k=5,
                                       output_transform=lambda x: x[1:3])),
        'ce_loss':
        im.RunningAverage(output_transform=lambda x: x[0]),
        'total_loss':
        im.RunningAverage(output_transform=lambda x: x[0])
    }
    train_ae_metrics: t.Dict[str, im.Metric] = {
        'acc_1':
        im.RunningAverage(
            im.Accuracy(output_transform=lambda x: (x[1], x[5]))),
        'acc_5':
        im.RunningAverage(
            im.TopKCategoricalAccuracy(k=5,
                                       output_transform=lambda x:
                                       (x[1], x[5]))),
        'ce_loss':
        train_ae_ce_loss,
        'l1_loss':
        train_ae_l1_loss,
        'total_loss':
        train_ae_total_loss
    }
    train_gsnn_metrics: t.Dict[str, im.Metric] = {
        'acc_1':
        im.RunningAverage(
            im.Accuracy(
                output_transform=lambda x: (x[0].squeeze(dim=1), x[6]))),
        'acc_5':
        im.RunningAverage(
            im.TopKCategoricalAccuracy(k=5,
                                       output_transform=lambda x:
                                       (x[0].squeeze(dim=1), x[6]))),
        'ce_loss':
        train_gsnn_ce_loss,
        'kld_loss':
        train_gsnn_kld_loss,
        'total_loss':
        train_gsnn_total_loss,
        'kld_factor':
        train_gsnn_kld_factor,
    }
    train_vae_metrics: t.Dict[str, im.Metric] = {
        'acc_1':
        im.RunningAverage(
            im.Accuracy(
                output_transform=lambda x: (x[1].squeeze(dim=1), x[7]))),
        'acc_5':
        im.RunningAverage(
            im.TopKCategoricalAccuracy(k=5,
                                       output_transform=lambda x:
                                       (x[1].squeeze(dim=1), x[7]))),
        'ce_loss':
        train_vae_ce_loss,
        'l1_loss':
        train_vae_l1_loss,
        'kld_loss':
        train_vae_kld_loss,
        'total_loss':
        train_vae_total_loss,
        'kld_factor':
        train_vae_kld_factor,
    }
    eval_class_metrics = {
        'acc_1':
        im.Accuracy(output_transform=lambda x: x[0:2]),
        'acc_5':
        im.TopKCategoricalAccuracy(k=5, output_transform=lambda x: x[0:2]),
        'ce_loss':
        im.Loss(nn.CrossEntropyLoss(), output_transform=lambda x: x[0:2]),
        'total_loss':
        im.Loss(nn.CrossEntropyLoss(), output_transform=lambda x: x[0:2])
    }
    eval_ae_metrics = {
        'acc_1':
        im.Accuracy(output_transform=lambda x: (x[1], x[5])),
        'acc_5':
        im.TopKCategoricalAccuracy(k=5,
                                   output_transform=lambda x: (x[1], x[5])),
        'ce_loss':
        eval_ae_loss_metric[0],
        'l1_loss':
        eval_ae_loss_metric[1],
        'total_loss':
        eval_ae_total_loss,
    }
    eval_gsnn_metrics = {
        'acc_1':
        im.Accuracy(output_transform=lambda x: (x[-1], x[-2])),
        'acc_5':
        im.TopKCategoricalAccuracy(k=5,
                                   output_transform=lambda x: (x[-1], x[-2])),
        'ce_loss':
        eval_gsnn_loss_metric[0],
        'kld_loss':
        eval_gsnn_loss_metric[1],
        'total_loss':
        eval_gsnn_total_loss,
    }
    eval_vae_metrics = {
        'acc_1':
        im.Accuracy(output_transform=lambda x: (x[-1], x[-2])),
        'acc_5':
        im.TopKCategoricalAccuracy(k=5,
                                   output_transform=lambda x: (x[-1], x[-2])),
        'ce_loss':
        eval_vae_loss_metric[0],
        'l1_loss':
        eval_vae_loss_metric[1],
        'kld_loss':
        eval_vae_loss_metric[2],
        'total_loss':
        eval_vae_total_loss,
    }
Ejemplo n.º 8
0
 def more_metrics(self, metrics_: OrderedDict):
     metrics_['loss'] = metrics.Loss(nn.CrossEntropyLoss())
     metrics_['accuracy'] = metrics.Accuracy()
     metrics_['recall'] = metrics.Recall()
     metrics_['precision'] = metrics.Precision()
Ejemplo n.º 9
0
        lr = Pipeline([("scaler", StandardScaler()),
                       ("lr", LogisticRegression(max_iter=10000))])
        lr.fit(z_tr, y_tr)
        acc = lr.score(z_ts, y_ts)
        return {
            "y": y,
            "loss": l,
            "y_pred": y_probs,
            "y_probs": y_probs,
            "lr_acc": acc
        }

    eval_engine = Engine(batch_eval)

    metrics.Accuracy().attach(eval_engine, "accuracy")
    metrics.Average().attach(train_engine, "average_loss")
    metrics.Average(output_transform=lambda x: x["lr_acc"]).attach(
        eval_engine, "lr_acc")
    metrics.Average(output_transform=lambda x: x["loss"]).attach(
        eval_engine, "average_loss")

    @eval_engine.on(Events.EPOCH_COMPLETED)
    def log_tboard(engine):
        tb.add_scalar(
            "train/loss",
            train_engine.state.metrics["average_loss"],
            train_engine.state.epoch,
        )
        tb.add_scalar(
            "eval/loss",
Ejemplo n.º 10
0
def create_sla_trainer(model,
                       transform,
                       optimizer=None,
                       with_large_loss=False,
                       name='train',
                       device=None):

    if device is not None:
        model.to(device)

    is_train = optimizer is not None

    def _update(engine, batch):
        model.train(is_train)

        with torch.set_grad_enabled(is_train):
            images, labels = convert_tensor(batch, device=device)
            batch_size = images.shape[0]
            images = transform(model, images, labels)
            n = images.shape[0] // batch_size

            preds = model(images)
            labels = torch.stack([labels * n + i for i in range(n)],
                                 1).view(-1)
            loss = F.cross_entropy(preds, labels)
            if with_large_loss:
                loss = loss * n

            single_preds = preds[::n, ::n]
            single_labels = labels[::n] // n

            agg_preds = 0
            for i in range(n):
                agg_preds = agg_preds + preds[i::n, i::n] / n

        if is_train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        return {
            'loss': loss.item(),
            'preds': preds,
            'labels': labels,
            'single_preds': single_preds,
            'single_labels': single_labels,
            'agg_preds': agg_preds,
        }

    engine = Engine(_update)
    engine.name = name

    metrics.Average(lambda o: o['loss']).attach(engine, 'total_loss')
    metrics.Accuracy(lambda o: (o['preds'], o['labels'])).attach(
        engine, 'total_acc')

    metrics.Average(lambda o: F.cross_entropy(o['single_preds'], o[
        'single_labels'])).attach(engine, 'single_loss')
    metrics.Accuracy(lambda o: (o['single_preds'], o['single_labels'])).attach(
        engine, 'single_acc')

    metrics.Average(
        lambda o: F.cross_entropy(o['agg_preds'], o['single_labels'])).attach(
            engine, 'agg_loss')
    metrics.Accuracy(lambda o: (o['agg_preds'], o['single_labels'])).attach(
        engine, 'agg_acc')

    return engine
Ejemplo n.º 11
0
def create_sla_sd_trainer(model,
                          transform,
                          optimizer=None,
                          T=1.0,
                          with_large_loss=False,
                          name='train',
                          device=None):

    if device is not None:
        model.to(device)

    is_train = optimizer is not None

    def _update(engine, batch):
        model.train(is_train)

        with torch.set_grad_enabled(is_train):
            images, single_labels = convert_tensor(batch, device=device)
            batch_size = images.shape[0]
            images = transform(model, images, single_labels)
            n = images.shape[0] // batch_size

            joint_preds, single_preds = model(images, None)
            single_preds = single_preds[::n]
            joint_labels = torch.stack(
                [single_labels * n + i for i in range(n)], 1).view(-1)

            joint_loss = F.cross_entropy(joint_preds, joint_labels)
            single_loss = F.cross_entropy(single_preds, single_labels)
            if with_large_loss:
                joint_loss = joint_loss * n

            agg_preds = 0
            for i in range(n):
                agg_preds = agg_preds + joint_preds[i::n, i::n] / n

            distillation_loss = F.kl_div(F.log_softmax(single_preds / T, 1),
                                         F.softmax(agg_preds.detach() / T, 1),
                                         reduction='batchmean')

            loss = joint_loss + single_loss + distillation_loss.mul(T**2)

        if is_train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        return {
            'loss': loss.item(),
            'preds': joint_preds,
            'labels': joint_labels,
            'single_preds': single_preds,
            'single_labels': single_labels,
            'agg_preds': agg_preds,
        }

    engine = Engine(_update)
    engine.name = name

    metrics.Average(lambda o: o['loss']).attach(engine, 'total_loss')
    metrics.Accuracy(lambda o: (o['preds'], o['labels'])).attach(
        engine, 'total_acc')

    metrics.Average(lambda o: F.cross_entropy(o['single_preds'], o[
        'single_labels'])).attach(engine, 'single_loss')
    metrics.Accuracy(lambda o: (o['single_preds'], o['single_labels'])).attach(
        engine, 'single_acc')

    metrics.Average(
        lambda o: F.cross_entropy(o['agg_preds'], o['single_labels'])).attach(
            engine, 'agg_loss')
    metrics.Accuracy(lambda o: (o['agg_preds'], o['single_labels'])).attach(
        engine, 'agg_acc')

    return engine
Ejemplo n.º 12
0
     # tt.Normalize (0.0, 1.0)
 # ])
 transforms=None
 DSet = NpyClfDatasets (CCSN, MSS, CHIRP, DSIR, transform=transforms)
 train_l, val_l   = DSet.train_test_split (random_state=24, test_size=0.25)
 t_DataLoader     = tud.DataLoader (DSet, sampler=train_l, batch_size=10, pin_memory=True)
 v_DataLoader     = tud.DataLoader (DSet, sampler=val_l,   batch_size=10, pin_memory=True)
 #########################
 DESC = "Epoch {} - loss {:.2f}"
 PBAR = tqdm (initial=0, leave=False, total=len(t_DataLoader), desc=DESC.format(0, 0))
 CLF  = CNN_ONE(idx=50)
 LFN  = tn.CrossEntropyLoss()
 OPM  = to.Adam(CLF.parameters(), lr=1e-3,)
 VAL_METRICS = {
     'loss':im.Loss (LFN), 
     'acc':im.Accuracy(),
     'recall':im.Recall(),
     'precision':im.Precision(),
     'cfm':im.ConfusionMatrix (3),
 }
 L_TRAIN = []
 L_EVAL  = []
 L_ACC   = []
 L_PRE   = []
 L_REC   = []
 L_CFM   = []
 #########################
 def train_step(engine, batch):
     CLF.train()
     OPM.zero_grad()
     x, y = batch['payload'], batch['target']
Ejemplo n.º 13
0
    _optimizer = init_optimizer(config)
    optimizer = _optimizer(model.parameters(), lr = config["learning_rate"])

    trainer = engine.create_supervised_trainer(
        model = model,
        optimizer = optimizer,
        loss_fn = loss_fn,
        device = device,
        non_blocking = True,
    )

    evaluator = engine.create_supervised_evaluator(
        model = model,
        metrics={
            "Loss": metrics.Loss(nn.CrossEntropyLoss()),
            "[email protected]": metrics.Accuracy(thresholded_transform(0.3)),
            "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), 
            "IOU": metrics.IoU(metrics.ConfusionMatrix(num_classes = config["n_classes"])),
            "mIOU": metrics.mIoU(metrics.ConfusionMatrix(num_classes = config["n_classes"])),
            # "FPS": metrics.Frequency(output_transform=lambda x: x[0]),
        },
        device = device,
        non_blocking=True,
        output_transform = lambda x, y, y_pred: (torch.sigmoid(y_pred["out"]), y),
    )

    writer = tensorboard.SummaryWriter(log_dir=f'summary/{config["model_tag"]}')
    attach_metric_logger(evaluator, eval_loader, 'val', writer=writer)
    attach_training_logger(trainer, writer=writer, log_interval=1)
    attach_model_checkpoint(trainer, {config["model_tag"]: model.module}, args.name)
def train():
    # initiate command line arguments, configuration file and logging block
    args = parse_args()
    config = read_config()
    try:
        if args.overwrite:
            shutil.rmtree(f"./logs/{args.name}", ignore_errors=True)
        os.mkdir(f"./logs/{args.name}")
    except:
        print(f"log folder {args.name} already exits.")

    init_logging(log_path=f"./logs/{args.name}")

    # determine train model on which device, cuda or cpu
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    logger.info(f"running training on {device}")
    device += f':{args.main_cuda}'

    # prepare training and validation datasets
    logger.info('creating dataset and data loaders')
    dataset = args.dataset

    train_dataset = AerialDataset("train", dataset,
                                  config[dataset]["train"]["image_path"],
                                  config[dataset]["train"]["mask_path"])
    val_dataset = AerialDataset("val", dataset,
                                config[dataset]["val"]["image_path"],
                                config[dataset]["val"]["mask_path"])
    train_loader, train_metrics_loader, val_metrics_loader = create_data_loaders(
        train_dataset=train_dataset,
        val_dataset=val_dataset,
        num_workers=config["num_workers"],
        batch_size=config["batchsize"],
    )

    # create model
    logger.info(
        f'creating BiseNetv2 and optimizer with initial lr of {config["learning_rate"]}'
    )

    model = BiSeNetV2(config["n_classes"])
    model = nn.DataParallel(model,
                            device_ids=[x for x in range(args.main_cuda, 4)
                                        ]).to(device)

    # initiate loss function and optimizer
    optimizer_fn = init_optimizer(config)
    optimizer = optimizer_fn(model.parameters(), lr=config["learning_rate"])

    logger.info('creating trainer and evaluator engines')

    _loss_fn = init_loss(config["loss_fn"])
    loss_fn = LossWithAux(_loss_fn)

    # create trainer and evaluator wiht ignite.engine
    trainer = engine.create_supervised_trainer(
        model=model,
        optimizer=optimizer,
        loss_fn=loss_fn,
        device=device,
        non_blocking=True,
    )

    evaluator = engine.create_supervised_evaluator(
        model=model,
        metrics={
            'loss':
            metrics.Loss(nn.CrossEntropyLoss()),
            "[email protected]":
            metrics.Accuracy(thresholded_transform(0.3)),
            "[email protected]":
            metrics.Accuracy(thresholded_transform(0.3)),
            "IOU":
            metrics.IoU(
                metrics.ConfusionMatrix(num_classes=config["n_classes"])),
            "mIOU":
            metrics.mIoU(
                metrics.ConfusionMatrix(num_classes=config["n_classes"])),
        },
        device=device,
        non_blocking=True,
        output_transform=lambda x, y, y_pred:
        (torch.sigmoid(y_pred["out"]), y),
    )

    # attach event listener to do post process after each iteration and epoch

    logger.info(f'creating summary writer with tag {config["model_tag"]}')
    writer = tensorboard.SummaryWriter(log_dir=f'logs/{config["model_tag"]}')

    # logger.info('attaching lr scheduler')
    # lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    # attach_lr_scheduler(trainer, lr_scheduler, writer)

    logger.info('attaching event driven calls')
    attach_model_checkpoint(trainer, {config["model_tag"]: model.module},
                            args.name)
    attach_training_logger(trainer, writer=writer)

    attach_metric_logger(trainer, evaluator, 'train', train_metrics_loader,
                         writer)
    attach_metric_logger(trainer, evaluator, 'val', val_metrics_loader, writer)

    # start training (evaluation is included too)
    logger.info('training...')
    trainer.run(train_loader, max_epochs=config["epochs"])