train_valid_dataset, [50_000, 10_000],
    generator=torch.Generator().manual_seed(42))
# Select CUDA device if available
cuda_device = 0
device = torch.device("cuda:%d" %
                      cuda_device if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print(f"Running on {torch.cuda.get_device_name(device)}")
else:
    print("Running on CPU")

# Define the network
network = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 100), nn.ReLU(),
                        nn.Linear(100, 10))

# Train
model = Model(network,
              'sgd',
              'cross_entropy',
              batch_metrics=['accuracy'],
              epoch_metrics=['f1'],
              device=device)
# Change the number of epochs to find the optimum value for your work
model.fit_dataset(train_dataset,
                  valid_dataset,
                  epochs=1,
                  batch_size=32,
                  num_workers=2)

model.evaluate_dataset(test_dataset)
Beispiel #2
0
    # TRAIN
    model_path = results_path / "model.pickle"

    if model_path.exists():
        model = torch.load(model_path)
    else:
        model = CNN((1, 28, 28))
        poutyne_model = Model(model,
                              optimizer='adam',
                              loss_function='cross_entropy',
                              batch_metrics=['accuracy'],
                              device=device)
        poutyne_model.fit_dataset(train_dataset,
                                  test_dataset,
                                  epochs=5,
                                  batch_size=128,
                                  num_workers=2,
                                  dataloader_kwargs={"pin_memory": True})
        torch.save(model, model_path)

    # Measure model's invariance  to rotations

    # Iterate over images from MNIST without labels
    # Using same name as before to avoid double download
    class MNIST(datasets.MNIST):
        def __getitem__(self, index):
            x, y = super().__getitem__(index)
            return x

    dataset_nolabels = MNIST(
        path,
Beispiel #3
0
# Instanciate the MNIST dataset
train_valid_dataset = MNIST('./datasets',
                            train=True,
                            download=True,
                            transform=ToTensor())
test_dataset = MNIST('./datasets',
                     train=False,
                     download=True,
                     transform=ToTensor())
train_dataset, valid_dataset = random_split(
    train_valid_dataset, [50_000, 10_000],
    generator=torch.Generator().manual_seed(42))

# Select CUDA device if available
cuda_device = 0
device = torch.device('cuda:%d' %
                      cuda_device if torch.cuda.is_available() else 'cpu')

# Define the network
network = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28 * 28, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
)
epochs = 5

# Define the Model and train
model = Model(network, 'sgd', 'cross_entropy', device=device)
model.fit_dataset(train_dataset, valid_dataset, epochs=epochs)
Beispiel #4
0
class ModelDatasetMethodsTest(ModelFittingTestCase):
    @classmethod
    def setUpClass(cls):
        cls.temp_dir_obj = TemporaryDirectory()
        cls.train_dataset = MNIST(cls.temp_dir_obj.name,
                                  train=True,
                                  download=True,
                                  transform=ToTensor())
        cls.test_dataset = MNIST(cls.temp_dir_obj.name,
                                 train=False,
                                 download=True,
                                 transform=ToTensor())
        cls.train_sub_dataset, cls.valid_sub_dataset = random_split(
            cls.train_dataset, [50_000, 10_000],
            generator=torch.Generator().manual_seed(42))

    @classmethod
    def tearDownClass(cls):
        cls.temp_dir_obj.cleanup()

    def setUp(self):
        super().setUp()
        torch.manual_seed(42)
        self.pytorch_network = nn.Sequential(nn.Flatten(),
                                             nn.Linear(28 * 28, 10))
        self.batch_metrics = ['accuracy']
        self.batch_metrics_names = ['acc']
        self.batch_metrics_values = [ANY]
        self.epoch_metrics = ['f1']
        self.epoch_metrics_names = ['fscore_micro']
        self.epoch_metrics_values = [ANY]
        self.model = Model(self.pytorch_network,
                           'sgd',
                           'cross_entropy',
                           batch_metrics=self.batch_metrics,
                           epoch_metrics=self.epoch_metrics)

    def assertStdoutContains(self, values):
        for value in values:
            self.assertIn(value, self.test_out.getvalue().strip())

    def test_fitting_mnist(self):
        logs = self.model.fit_dataset(
            self.train_sub_dataset,
            self.valid_sub_dataset,
            epochs=ModelTest.epochs,
            steps_per_epoch=ModelTest.steps_per_epoch,
            validation_steps=ModelTest.steps_per_epoch,
            callbacks=[self.mock_callback])
        params = {
            'epochs': ModelTest.epochs,
            'steps': ModelTest.steps_per_epoch,
            'valid_steps': ModelTest.steps_per_epoch
        }
        self._test_callbacks_train(params,
                                   logs,
                                   valid_steps=ModelTest.steps_per_epoch)

    def test_fitting_mnist_without_valid(self):
        logs = self.model.fit_dataset(
            self.train_dataset,
            epochs=ModelTest.epochs,
            steps_per_epoch=ModelTest.steps_per_epoch,
            validation_steps=ModelTest.steps_per_epoch,
            callbacks=[self.mock_callback])
        params = {
            'epochs': ModelTest.epochs,
            'steps': ModelTest.steps_per_epoch,
            'valid_steps': ModelTest.steps_per_epoch
        }
        self._test_callbacks_train(params, logs, has_valid=False)

    def test_evaluate_dataset(self):
        num_steps = 10
        loss, metrics, pred_y = self.model.evaluate_dataset(
            self.test_dataset,
            batch_size=ModelTest.batch_size,
            steps=num_steps,
            return_pred=True)
        self.assertEqual(type(loss), float)
        self.assertEqual(type(metrics), np.ndarray)
        self.assertEqual(metrics.tolist(),
                         self.batch_metrics_values + self.epoch_metrics_values)
        self.assertEqual(type(pred_y), np.ndarray)
        self.assertEqual(pred_y.shape, (num_steps * ModelTest.batch_size, 10))

    def test_evaluate_dataset_with_progress_bar_coloring(self):
        num_steps = 10
        self._capture_output()

        self.model.evaluate_dataset(self.test_dataset,
                                    batch_size=ModelTest.batch_size,
                                    steps=num_steps)
        self.assertStdoutContains(
            ["%", "[32m", "[35m", "[36m", "[94m", "\u2588"])

    def test_evaluate_dataset_with_callback(self):
        num_steps = 10
        self.model.evaluate_dataset(self.test_dataset,
                                    batch_size=ModelTest.batch_size,
                                    steps=num_steps,
                                    callbacks=[self.mock_callback])

        params = {'steps': ModelTest.epochs}
        self._test_callbacks_test(params)

    def test_evaluate_dataset_with_return_dict(self):
        num_steps = 10
        logs = self.model.evaluate_dataset(self.test_dataset,
                                           batch_size=ModelTest.batch_size,
                                           steps=num_steps,
                                           return_dict_format=True)
        self._test_return_dict_logs(logs)

    def test_evaluate_dataset_with_ground_truth(self):
        num_steps = 10
        loss, metrics, pred_y, true_y = self.model.evaluate_dataset(
            self.test_dataset,
            batch_size=ModelTest.batch_size,
            steps=num_steps,
            return_pred=True,
            return_ground_truth=True)
        self.assertEqual(type(loss), float)
        self.assertEqual(type(metrics), np.ndarray)
        self.assertEqual(metrics.tolist(),
                         self.batch_metrics_values + self.epoch_metrics_values)
        self.assertEqual(type(pred_y), np.ndarray)
        self.assertEqual(type(true_y), np.ndarray)
        self.assertEqual(pred_y.shape, (num_steps * ModelTest.batch_size, 10))
        self.assertEqual(true_y.shape, (num_steps * ModelTest.batch_size, ))

    def test_predict_dataset(self):
        class PredictDataset(Dataset):
            def __init__(self, dataset):
                super().__init__()
                self.dataset = dataset

            def __getitem__(self, index):
                return self.dataset[index][0]

            def __len__(self):
                return len(self.dataset)

        num_steps = 10
        pred_y = self.model.predict_dataset(PredictDataset(self.test_dataset),
                                            batch_size=ModelTest.batch_size,
                                            steps=num_steps)
        self.assertEqual(type(pred_y), np.ndarray)
        self.assertEqual(pred_y.shape, (num_steps * ModelTest.batch_size, 10))