def test_metric_learning_pipeline(): """ Test if classification pipeline can run and compute metrics. In this test we check that LoaderMetricCallback works with CMCMetric (ICallbackLoaderMetric). """ with TemporaryDirectory() as tmp_dir: dataset_train = datasets.MnistMLDataset(root=tmp_dir, download=True) sampler = data.BalanceBatchSampler(labels=dataset_train.get_labels(), p=5, k=10) train_loader = DataLoader( dataset=dataset_train, sampler=sampler, batch_size=sampler.batch_size, ) dataset_val = datasets.MnistQGDataset(root=tmp_dir, transform=None, gallery_fraq=0.2) val_loader = DataLoader(dataset=dataset_val, batch_size=1024) model = DummyModel(num_features=28 * 28, num_classes=NUM_CLASSES) optimizer = Adam(model.parameters(), lr=0.001) sampler_inbatch = data.HardTripletsSampler(norm_required=False) criterion = nn.TripletMarginLossWithSampler(margin=0.5, sampler_inbatch=sampler_inbatch) callbacks = OrderedDict( { "cmc": dl.ControlFlowCallback( LoaderMetricCallback( CMCMetric( topk_args=[1], embeddings_key="embeddings", labels_key="targets", is_query_key="is_query", ), input_key=["embeddings", "is_query"], target_key=["targets"], ), loaders="valid", ), "control": dl.PeriodicLoaderCallback( valid_loader_key="valid", valid_metric_key="cmc", valid=2 ), } ) runner = CustomRunner(input_key="features", output_key="embeddings") runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders=OrderedDict({"train": train_loader, "valid": val_loader}), verbose=False, valid_loader="valid", num_epochs=4, ) assert "cmc01" in runner.loader_metrics
def run_ml_pipeline(sampler_inbatch: data.IInbatchTripletSampler) -> float: """ Full metric learning pipeline, including train and val. This function is also used as minimal example in README.md, section name: 'CV - MNIST with Metric Learning'. Args: sampler_inbatch: sampler to forming triplets Returns: best metric value """ # 1. train and valid datasets dataset_root = "./data" transforms = t.Compose([t.ToTensor(), t.Normalize((0.1307, ), (0.3081, ))]) dataset_train = datasets.MnistMLDataset( root=dataset_root, train=True, download=True, transform=transforms, ) sampler = data.BalanceBatchSampler(labels=dataset_train.get_labels(), p=5, k=10) train_loader = DataLoader(dataset=dataset_train, sampler=sampler, batch_size=sampler.batch_size) dataset_val = datasets.MnistQGDataset(root=dataset_root, transform=transforms, gallery_fraq=0.2) val_loader = DataLoader(dataset=dataset_val, batch_size=1024) # 2. model and optimizer model = models.SimpleConv(features_dim=16) optimizer = Adam(model.parameters(), lr=0.0005) # 3. criterion with triplets sampling criterion = nn.TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner callbacks = [ dl.ControlFlowCallback(dl.CriterionCallback(), loaders="train"), dl.ControlFlowCallback(dl.CMCScoreCallback(topk_args=[1]), loaders="valid"), dl.PeriodicLoaderCallback(valid=100), ] runner = dl.SupervisedRunner(device=utils.get_device()) runner.train( model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders={ "train": train_loader, "valid": val_loader }, minimize_metric=False, verbose=True, valid_loader="valid", num_epochs=100, main_metric="cmc01", ) return runner.best_valid_metrics["cmc01"]
def train_experiment(device, engine=None): with TemporaryDirectory() as logdir: # 1. train and valid loaders transforms = Compose([ToTensor(), Normalize((0.1307, ), (0.3081, ))]) train_dataset = datasets.MnistMLDataset(root=os.getcwd(), download=True, transform=transforms) sampler = data.BatchBalanceClassSampler( labels=train_dataset.get_labels(), num_classes=5, num_samples=10, num_batches=10) train_loader = DataLoader(dataset=train_dataset, batch_sampler=sampler) valid_dataset = datasets.MnistQGDataset(root=os.getcwd(), transform=transforms, gallery_fraq=0.2) valid_loader = DataLoader(dataset=valid_dataset, batch_size=1024) # 2. model and optimizer model = models.MnistSimpleNet(out_features=16) optimizer = Adam(model.parameters(), lr=0.001) # 3. criterion with triplets sampling sampler_inbatch = data.HardTripletsSampler(norm_required=False) criterion = nn.TripletMarginLossWithSampler( margin=0.5, sampler_inbatch=sampler_inbatch) # 4. training with catalyst Runner callbacks = [ dl.ControlFlowCallback( dl.CriterionCallback(input_key="embeddings", target_key="targets", metric_key="loss"), loaders="train", ), dl.ControlFlowCallback( dl.CMCScoreCallback( embeddings_key="embeddings", labels_key="targets", is_query_key="is_query", topk_args=[1], ), loaders="valid", ), dl.PeriodicLoaderCallback(valid_loader_key="valid", valid_metric_key="cmc01", minimize=False, valid=2), ] runner = CustomRunner(input_key="features", output_key="embeddings") runner.train( engine=engine or dl.DeviceEngine(device), model=model, criterion=criterion, optimizer=optimizer, callbacks=callbacks, loaders={ "train": train_loader, "valid": valid_loader }, verbose=False, logdir=logdir, valid_loader="valid", valid_metric="cmc01", minimize_valid_metric=False, num_epochs=2, )