Esempio n. 1
0
def test_metric_learning_pipeline():
    """
    Test if classification pipeline can run and compute metrics.
    In this test we check that LoaderMetricCallback works with
    CMCMetric (ICallbackLoaderMetric).
    """
    with TemporaryDirectory() as tmp_dir:
        dataset_train = MnistMLDataset(root=tmp_dir, download=True)
        sampler = BatchBalanceClassSampler(
            labels=dataset_train.get_labels(),
            num_classes=3,
            num_samples=10,
            num_batches=10,
        )
        train_loader = DataLoader(
            dataset=dataset_train, batch_sampler=sampler, num_workers=0
        )
        dataset_val = MnistQGDataset(root=tmp_dir, gallery_fraq=0.2)
        val_loader = DataLoader(dataset=dataset_val, batch_size=1024)

        model = DummyModel(num_features=28 * 28, num_classes=NUM_CLASSES)
        optimizer = Adam(model.parameters(), lr=0.001)

        sampler_inbatch = HardTripletsSampler(norm_required=False)
        criterion = TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch
        )

        callbacks = OrderedDict(
            {
                "cmc": dl.ControlFlowCallbackWrapper(
                    dl.CMCScoreCallback(
                        embeddings_key="embeddings",
                        labels_key="targets",
                        is_query_key="is_query",
                        topk=[1],
                    ),
                    loaders="valid",
                ),
                "control": dl.PeriodicLoaderCallback(
                    valid_loader_key="valid",
                    valid_metric_key="cmc",
                    minimize=False,
                    valid=2,
                ),
            }
        )

        runner = CustomRunner(input_key="features", output_key="embeddings")
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            loaders=OrderedDict({"train": train_loader, "valid": val_loader}),
            verbose=False,
            valid_loader="valid",
            num_epochs=4,
        )
        assert "cmc01" in runner.loader_metrics
def run_ml_pipeline(sampler_inbatch: data.IInbatchTripletSampler) -> float:
    """
    Full metric learning pipeline, including train and val.

    This function is also used as minimal example in README.md, section name:
    'CV - MNIST with Metric Learning'.

    Args:
        sampler_inbatch: sampler to forming triplets

    Returns:
        best metric value
    """
    # 1. train and valid datasets
    dataset_root = "./data"
    transforms = t.Compose([t.ToTensor(), t.Normalize((0.1307, ), (0.3081, ))])

    dataset_train = datasets.MnistMLDataset(
        root=dataset_root,
        train=True,
        download=True,
        transform=transforms,
    )
    sampler = data.BalanceBatchSampler(labels=dataset_train.get_labels(),
                                       p=5,
                                       k=10)
    train_loader = DataLoader(dataset=dataset_train,
                              sampler=sampler,
                              batch_size=sampler.batch_size)

    dataset_val = datasets.MnistQGDataset(root=dataset_root,
                                          transform=transforms,
                                          gallery_fraq=0.2)
    val_loader = DataLoader(dataset=dataset_val, batch_size=1024)

    # 2. model and optimizer
    model = models.SimpleConv(features_dim=16)
    optimizer = Adam(model.parameters(), lr=0.0005)

    # 3. criterion with triplets sampling
    criterion = nn.TripletMarginLossWithSampler(
        margin=0.5, sampler_inbatch=sampler_inbatch)

    # 4. training with catalyst Runner
    callbacks = [
        dl.ControlFlowCallback(dl.CriterionCallback(), loaders="train"),
        dl.ControlFlowCallback(dl.CMCScoreCallback(topk_args=[1]),
                               loaders="valid"),
        dl.PeriodicLoaderCallback(valid=100),
    ]

    runner = dl.SupervisedRunner(device=utils.get_device())
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        callbacks=callbacks,
        loaders={
            "train": train_loader,
            "valid": val_loader
        },
        minimize_metric=False,
        verbose=True,
        valid_loader="valid",
        num_epochs=100,
        main_metric="cmc01",
    )
    return runner.best_valid_metrics["cmc01"]
Esempio n. 3
0
def train_experiment(engine=None):
    with TemporaryDirectory() as logdir:

        # 1. train and valid loaders
        train_dataset = MnistMLDataset(root=DATA_ROOT)
        sampler = BatchBalanceClassSampler(
            labels=train_dataset.get_labels(),
            num_classes=5,
            num_samples=10,
            num_batches=10,
        )
        train_loader = DataLoader(dataset=train_dataset, batch_sampler=sampler)

        valid_dataset = MnistQGDataset(root=DATA_ROOT, gallery_fraq=0.2)
        valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024)

        # 2. model and optimizer
        model = MnistSimpleNet(out_features=16)
        optimizer = Adam(model.parameters(), lr=0.001)

        # 3. criterion with triplets sampling
        sampler_inbatch = HardTripletsSampler(norm_required=False)
        criterion = TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch)

        # 4. training with catalyst Runner
        callbacks = [
            dl.ControlFlowCallbackWrapper(
                dl.CriterionCallback(input_key="embeddings",
                                     target_key="targets",
                                     metric_key="loss"),
                loaders="train",
            ),
            dl.ControlFlowCallbackWrapper(
                dl.CMCScoreCallback(
                    embeddings_key="embeddings",
                    labels_key="targets",
                    is_query_key="is_query",
                    topk=[1],
                ),
                loaders="valid",
            ),
            dl.PeriodicLoaderCallback(
                valid_loader_key="valid",
                valid_metric_key="cmc01",
                minimize=False,
                valid=2,
            ),
        ]

        runner = CustomRunner(input_key="features", output_key="embeddings")
        runner.train(
            engine=engine,
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            loaders={
                "train": train_loader,
                "valid": valid_loader
            },
            verbose=False,
            logdir=logdir,
            valid_loader="valid",
            valid_metric="cmc01",
            minimize_valid_metric=False,
            num_epochs=2,
        )
runner = MonaiSupervisedRunner(
    input_key="img", input_target_key="seg",
    output_key="logits")  # you can also specify `device` here
runner.train(
    loaders={
        "train": train_loader,
        "valid": val_loader
    },
    model=model,
    criterion=loss_function,
    optimizer=optimizer,
    num_epochs=6,
    logdir="./logs",
    main_metric="dice_metric",
    minimize_metric=False,
    verbose=False,
    timeit=True,  # let's use minimal logs, but with time checkers
    callbacks={
        "loss":
        dl.CriterionCallback(input_key="seg", output_key="logits"),
        "periodic_valid":
        dl.PeriodicLoaderCallback(valid=2),
        "dice_metric":
        dl.MetricCallback(prefix="dice_metric",
                          metric_fn=dice_metric,
                          input_key="seg",
                          output_key="logits")
    },
    load_best_on_end=True,  # user-friendly API :)
)
Esempio n. 5
0
def test_reid_pipeline():
    """This test checks that reid pipeline runs and compute metrics with ReidCMCScoreCallback"""
    with TemporaryDirectory() as logdir:

        # 1. train and valid loaders
        train_dataset = MnistMLDataset(root=DATA_ROOT)
        sampler = BatchBalanceClassSampler(
            labels=train_dataset.get_labels(),
            num_classes=3,
            num_samples=10,
            num_batches=20,
        )
        train_loader = DataLoader(
            dataset=train_dataset, batch_sampler=sampler, num_workers=0
        )

        valid_dataset = MnistReIDQGDataset(root=DATA_ROOT, gallery_fraq=0.2)
        valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024)

        # 2. model and optimizer
        model = MnistSimpleNet(out_features=16)
        optimizer = Adam(model.parameters(), lr=0.001)

        # 3. criterion with triplets sampling
        sampler_inbatch = AllTripletsSampler(max_output_triplets=1000)
        criterion = TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch
        )

        # 4. training with catalyst Runner
        callbacks = [
            dl.ControlFlowCallbackWrapper(
                dl.CriterionCallback(
                    input_key="embeddings", target_key="targets", metric_key="loss"
                ),
                loaders="train",
            ),
            dl.ControlFlowCallbackWrapper(
                dl.ReidCMCScoreCallback(
                    embeddings_key="embeddings",
                    pids_key="targets",
                    cids_key="cids",
                    is_query_key="is_query",
                    topk=[1],
                ),
                loaders="valid",
            ),
            dl.PeriodicLoaderCallback(
                valid_loader_key="valid",
                valid_metric_key="cmc01",
                minimize=False,
                valid=2,
            ),
        ]

        runner = ReIDCustomRunner()
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            loaders=OrderedDict({"train": train_loader, "valid": valid_loader}),
            verbose=False,
            logdir=logdir,
            valid_loader="valid",
            valid_metric="cmc01",
            minimize_valid_metric=False,
            num_epochs=10,
        )
        assert "cmc01" in runner.loader_metrics
        assert runner.loader_metrics["cmc01"] > 0.65
Esempio n. 6
0
def train_experiment(device, engine=None):
    with TemporaryDirectory() as logdir:

        # 1. train and valid loaders
        transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))])

        train_dataset = datasets.MnistMLDataset(root=os.getcwd(),
                                                download=True,
                                                transform=transforms)
        sampler = data.BatchBalanceClassSampler(
            labels=train_dataset.get_labels(),
            num_classes=5,
            num_samples=10,
            num_batches=10)
        train_loader = DataLoader(dataset=train_dataset, batch_sampler=sampler)

        valid_dataset = datasets.MnistQGDataset(root=os.getcwd(),
                                                transform=transforms,
                                                gallery_fraq=0.2)
        valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024)

        # 2. model and optimizer
        model = models.MnistSimpleNet(out_features=16)
        optimizer = Adam(model.parameters(), lr=0.001)

        # 3. criterion with triplets sampling
        sampler_inbatch = data.HardTripletsSampler(norm_required=False)
        criterion = nn.TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch)

        # 4. training with catalyst Runner
        callbacks = [
            dl.ControlFlowCallback(
                dl.CriterionCallback(input_key="embeddings",
                                     target_key="targets",
                                     metric_key="loss"),
                loaders="train",
            ),
            dl.ControlFlowCallback(
                dl.CMCScoreCallback(
                    embeddings_key="embeddings",
                    labels_key="targets",
                    is_query_key="is_query",
                    topk_args=[1],
                ),
                loaders="valid",
            ),
            dl.PeriodicLoaderCallback(valid_loader_key="valid",
                                      valid_metric_key="cmc01",
                                      minimize=False,
                                      valid=2),
        ]

        runner = CustomRunner(input_key="features", output_key="embeddings")
        runner.train(
            engine=engine or dl.DeviceEngine(device),
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            loaders={
                "train": train_loader,
                "valid": valid_loader
            },
            verbose=False,
            logdir=logdir,
            valid_loader="valid",
            valid_metric="cmc01",
            minimize_valid_metric=False,
            num_epochs=2,
        )
Esempio n. 7
0
def test_reid_pipeline():
    """This test checks that reid pipeline runs and compute metrics with ReidCMCScoreCallback"""
    with TemporaryDirectory() as logdir:

        # 1. train and valid loaders
        transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))])

        train_dataset = MnistMLDataset(root=os.getcwd(),
                                       download=True,
                                       transform=transforms)
        sampler = data.BalanceBatchSampler(labels=train_dataset.get_labels(),
                                           p=5,
                                           k=10)
        train_loader = DataLoader(dataset=train_dataset,
                                  sampler=sampler,
                                  batch_size=sampler.batch_size)

        valid_dataset = MnistReIDQGDataset(root=os.getcwd(),
                                           transform=transforms,
                                           gallery_fraq=0.2)
        valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024)

        # 2. model and optimizer
        model = models.MnistSimpleNet(out_features=16)
        optimizer = Adam(model.parameters(), lr=0.001)

        # 3. criterion with triplets sampling
        sampler_inbatch = data.AllTripletsSampler(max_output_triplets=1000)
        criterion = nn.TripletMarginLossWithSampler(
            margin=0.5, sampler_inbatch=sampler_inbatch)

        # 4. training with catalyst Runner
        callbacks = [
            dl.ControlFlowCallback(
                dl.CriterionCallback(input_key="embeddings",
                                     target_key="targets",
                                     metric_key="loss"),
                loaders="train",
            ),
            dl.ControlFlowCallback(
                dl.ReidCMCScoreCallback(
                    embeddings_key="embeddings",
                    pids_key="targets",
                    cids_key="cids",
                    is_query_key="is_query",
                    topk_args=[1],
                ),
                loaders="valid",
            ),
            dl.PeriodicLoaderCallback(valid_loader_key="valid",
                                      valid_metric_key="cmc01",
                                      minimize=False,
                                      valid=2),
        ]

        runner = ReIDCustomRunner()
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            callbacks=callbacks,
            loaders=OrderedDict({
                "train": train_loader,
                "valid": valid_loader
            }),
            verbose=False,
            logdir=logdir,
            valid_loader="valid",
            valid_metric="cmc01",
            minimize_valid_metric=False,
            num_epochs=6,
        )
        assert "cmc01" in runner.loader_metrics
        assert runner.loader_metrics["cmc01"] > 0.7