Beispiel #1
0
def test_comparer_incompat_trigger():
    model_cpu = Model("cpu", 1.0)
    ppe.to(model_cpu, 'cpu')
    optimizer_cpu = torch.optim.SGD(model_cpu.parameters(), lr=1.0)
    trainer_cpu = ppe.engine.create_trainer(
        model_cpu,
        optimizer_cpu,
        1,
        device="cpu",
    )

    model_gpu = Model("cuda:0", 1.0)
    ppe.to(model_gpu, 'cuda:0')
    optimizer_gpu = torch.optim.SGD(model_gpu.parameters(), lr=1.0)
    trainer_gpu = ppe.engine.create_trainer(
        model_gpu,
        optimizer_gpu,
        1,
        device="cuda:0",
        stop_trigger=(1, "iteration"),
    )

    comp = ppe.utils.comparer.OutputsComparer(
        {
            "cpu": trainer_cpu,
            "gpu": trainer_gpu
        },
        "a",
    )
    train_1 = list(torch.ones(10) for _ in range(10))
    train_2 = list(torch.ones(10) for _ in range(10))
    with pytest.raises(ValueError):
        comp.compare({"cpu": (train_1, ), "gpu": (train_2, )})
def test_trainer_with_code_block(device, progress_bar, path):
    model = MyModel()
    model_with_loss = MyModelWithLossDictOutput(model)
    ppe.to(model_with_loss, device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader([{
        'x': torch.rand(20, ),
        't': torch.rand(10, )
    } for i in range(10)])
    extensions = _make_extensions()

    evaluator = engine.create_evaluator(model_with_loss,
                                        device=device,
                                        progress_bar=progress_bar,
                                        logic=ppe.handler.CodeBlockLogic())

    trainer = engine.create_trainer(model_with_loss,
                                    optimizer,
                                    20,
                                    device=device,
                                    evaluator=evaluator,
                                    extensions=extensions,
                                    out_dir=path,
                                    logic=ppe.handler.CodeBlockLogic())
    trainer.run(data, data)
def test_trainer_defer_wrong_order(path):
    class WrongOrderHandler(ppe.handler.Handler):
        def _complete_train_step(self, trainer, outs, block, sn, sm, rt):
            p_iter = self.pending_iters[sn][0]
            if p_iter.idx < 10:
                super()._complete_train_step(trainer, p_iter.deferred, block,
                                             sn, sm, rt)
            else:
                p_iter.cback(90, None, is_deferred=block)

    device = 'cpu'
    model = MyModel()
    model_with_loss = MyModelWithLossAsync(model)
    ppe.to(model_with_loss, device)
    # Register the handler
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader([(torch.rand(20, ), torch.rand(10, ))
                                        for i in range(100)])

    trainer = engine.create_trainer(model_with_loss,
                                    optimizer,
                                    2,
                                    device=device,
                                    handler_class=WrongOrderHandler,
                                    out_dir=path)
    with pytest.raises(RuntimeError, match="Completed a not expected"):
        trainer.run(data)
Beispiel #4
0
def test_evaluator_async(accuracy):
    device = 'async-cpu'
    model = AsyncModel(accuracy)
    data = torch.utils.data.DataLoader([{
        'x': torch.rand(20),
        't': torch.rand(1)
    } for i in range(1000)],
                                       batch_size=10)

    options = {'eval_report_keys': ['accuracy'], 'async': True}
    # Register the handler
    ppe.runtime.runtime_registry.register(device, DeferRuntime)

    ppe.to(model, device)
    evaluator = engine.create_evaluator(
        model,
        device=device,
        options=options,
        metrics=[ppe.training.metrics.AccuracyMetric('t', 'y')])

    reporter = ppe.reporting.Reporter()
    observation = {}
    with reporter.scope(observation):
        evaluator.run(data)
    assert pytest.approx(observation['val/accuracy'], accuracy)
    assert model._pending_called
Beispiel #5
0
def test_model_comparer_invalid():
    model_cpu = ModelForComparer()
    model_gpu = ModelForComparer()
    ppe.to(model_cpu, 'cpu')
    ppe.to(model_gpu, device='cuda:0')

    optimizer_cpu = torch.optim.SGD(model_cpu.parameters(), lr=0.01)
    trainer_cpu = ppe.engine.create_trainer(model_cpu,
                                            optimizer_cpu,
                                            1,
                                            device='cpu')
    optimizer_gpu = torch.optim.SGD(model_gpu.parameters(), lr=0.01)
    trainer_gpu = ppe.engine.create_trainer(model_gpu,
                                            optimizer_gpu,
                                            1,
                                            device='cuda:0')
    compare_fn = ppe.utils.comparer.get_default_comparer(rtol=1e-2, atol=1e-2)
    comp = ppe.utils.comparer.ModelComparer(
        {
            "cpu": trainer_cpu,
            "gpu": trainer_gpu
        }, compare_fn=compare_fn)

    train_1 = list(torch.ones(2, 10, 10, 10) for _ in range(10))
    train_2 = list(torch.ones(2, 10, 10, 10) for _ in range(10))
    with pytest.raises(AssertionError):
        comp.compare({"cpu": train_1, "gpu": train_2})
def test_evaluator_trigger(evaluator_trigger, path):
    device = 'cpu'
    progress_bar = False
    model = MyModel()
    ppe.to(model, device)
    model_with_loss = MyModelWithLossFn(model)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader([(torch.rand(20, ), torch.rand(10, ))
                                        for i in range(10)])
    extensions = _make_extensions()

    evaluator = engine.create_evaluator(model_with_loss,
                                        device=device,
                                        progress_bar=progress_bar)

    trainer = engine.create_trainer(model_with_loss,
                                    optimizer,
                                    20,
                                    device=device,
                                    evaluator=(evaluator,
                                               evaluator_trigger[1]),
                                    extensions=extensions,
                                    out_dir=path)
    path = 'pytorch_pfn_extras.training._evaluator.Evaluator.run'
    with mock.patch(path) as patched:
        trainer.run(data, data)
        assert patched.call_count == evaluator_trigger[0]
def test_module_change_forward():
    class Module1(torch.nn.Module):
        def forward(self, input):
            raise RuntimeError('The module forward should never be executed')

    class Module2:
        def __init__(self):
            self.value = 5

        def forward(self, input):
            return torch.tensor(self.value)

    class ForwardIntercepterRuntime(ppe.runtime.BaseRuntime):
        def initialize_module(self, module, loader_or_batch):
            self.new_module = Module2()
            module.forward = self.new_module.forward
            # TODO(ecastill): also reroute state_dict ?

        def move_module(self, module):
            self.initialize_module(module, None)
            return module

    module = Module1()
    with pytest.raises(RuntimeError):
        module(None)

    ppe.to(module, device='dummy', runtime_class=ForwardIntercepterRuntime)
    assert int(module(None)) == 5
    def test_autocast(self, autocast):
        trainer = MockTrainer()
        logic = ppe.handler.Logic(options={'autocast': autocast})
        handler = ppe.handler.Handler(
            logic, ppe.runtime.PyTorchRuntime('cuda'), {}
        )

        completed = False

        class _MModule(torch.nn.Module):
            def forward(self, x, y):
                return torch.mm(x, y)

        trainer.models['main'] = _MModule()
        trainer.optimizers['main'] = torch.optim.SGD(
            [torch.nn.Parameter(torch.zeros(10))], 0.01
        )
        ppe.to(trainer.models['main'], 'cuda')
        completed = False

        def callback(batch_idx, outs):
            nonlocal completed
            if autocast:
                assert outs.dtype == torch.float16
            else:
                assert outs.dtype == torch.float32
            completed = True

        inputs = {
            'x': torch.rand((2, 2)).cuda(),
            'y': torch.rand((2, 2)).cuda(),
        }
        handler.train_step(trainer, 0, inputs, callback)
        assert completed
def test_trainer_defer(path):
    class Extension:
        def __init__(self, is_async):
            self.name = 'Dummy'
            self.trigger = (1, 'iteration')
            self.called = 0
            self.is_async = is_async

        def __call__(self, manager):
            self.called += 1

    device = 'cpu'
    model = MyModel()
    model_with_loss = MyModelWithLossAsync(model)
    ppe.to(model_with_loss, device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader([(torch.rand(20, ), torch.rand(10, ))
                                        for i in range(100)])

    extensions = [Extension(True), Extension(False)]

    trainer = engine.create_trainer(model_with_loss,
                                    optimizer,
                                    2,
                                    device=device,
                                    extensions=extensions,
                                    out_dir=path)
    trainer.run(data)
    assert trainer.manager.iteration == 200
    assert trainer.manager.execution == 200
    assert extensions[0].called == 200
    assert extensions[1].called == 200
Beispiel #10
0
def test_trainer_with_code_block_with_multiple_optimizers(
        device, progress_bar, path):
    if not torch.cuda.is_available() and device == 'cuda':
        pytest.skip()
    model = MyModel()
    model_with_loss = MyModelWithLossDictOutput(model)
    ppe.to(model_with_loss, device)
    optimizer0 = torch.optim.SGD(model.parameters(), lr=0.1)
    optimizer1 = torch.optim.Adam(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader([{
        'x': torch.rand(20, ),
        't': torch.rand(10, )
    } for i in range(10)])
    extensions = _make_extensions()

    evaluator = engine.create_evaluator(model_with_loss,
                                        device=device,
                                        progress_bar=progress_bar,
                                        logic=ppe.handler.CodeBlockLogic())

    trainer = engine.create_trainer(model_with_loss, {
        "0": optimizer0,
        "1": optimizer1
    },
                                    20,
                                    device=device,
                                    evaluator=evaluator,
                                    extensions=extensions,
                                    out_dir=path,
                                    logic=ppe.handler.CodeBlockLogic())
    trainer.run(data, data)
Beispiel #11
0
def test_trainer_profile():
    device = 'cpu'
    model = MyModel()
    model_with_loss = MyModelWithLossDictOutput(model)
    ppe.to(model_with_loss, device)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader([{
        'x': torch.rand(20, ),
        't': torch.rand(10, )
    } for i in range(10)])
    extensions = _make_extensions()

    evaluator = engine.create_evaluator(model_with_loss, device=device)

    trace_handler = mock.Mock()
    warmup = 1
    active = len(data) - warmup
    profile = torch.profiler.profile(
        activities=[torch.profiler.ProfilerActivity.CPU],
        on_trace_ready=trace_handler,
        schedule=torch.profiler.schedule(wait=0, warmup=warmup, active=active),
    )
    trainer = engine.create_trainer(
        model_with_loss,
        optimizer,
        20,
        device=device,
        evaluator=evaluator,
        extensions=extensions,
        profile=profile,
    )
    trainer.run(data, data)
    assert trace_handler.call_count == 20  # n_epochs
Beispiel #12
0
def test_trainer_dict_input(device, progress_bar, path):
    if not torch.cuda.is_available() and device == 'cuda':
        pytest.skip()
    model = MyModel()
    ppe.to(model, device)
    model_with_loss = MyModelWithLossDictOutput(model)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader([{
        'x': torch.rand(20, ),
        't': torch.rand(10, )
    } for i in range(10)])
    extensions = _make_extensions()

    evaluator = engine.create_evaluator(model_with_loss,
                                        device=device,
                                        progress_bar=progress_bar)

    trainer = engine.create_trainer(model_with_loss,
                                    optimizer,
                                    20,
                                    device=device,
                                    evaluator=evaluator,
                                    extensions=extensions,
                                    out_dir=path)
    trainer.run(data, data)
Beispiel #13
0
def test_train_with_evaluator(device, progress_bar, path):
    if not torch.cuda.is_available() and device == 'cuda':
        pytest.skip()
    model = MyModel()
    ppe.to(model, device)
    model_with_loss = MyModelWithLossFn(model)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader([(torch.rand(20, ), torch.rand(10, ))
                                        for i in range(10)])
    extensions = _make_extensions()

    evaluator = engine.create_evaluator(model_with_loss,
                                        device=device,
                                        progress_bar=progress_bar)

    trainer = engine.create_trainer(model_with_loss,
                                    optimizer,
                                    20,
                                    device=device,
                                    evaluator=evaluator,
                                    extensions=extensions,
                                    out_dir=path)
    mpath = 'pytorch_pfn_extras.training._evaluator.Evaluator.run'
    with mock.patch(mpath) as patched:
        trainer.run(data, data)
        assert patched.call_count == 20
 def test_setup_multi_device_split_invalid(self):
     options = {'eval_report_keys': ['output'], 'async': True}
     trainer = MockTrainer()
     handler = self._get_handler(options)
     ppe.to(trainer.models['main'].sm1, 'test_rt')
     ppe.to(trainer.models['main'].sm2, 'cpu')
     with pytest.raises(RuntimeError, match='models splitted'):
         handler._setup(trainer.models, [], None)
Beispiel #15
0
def _get_trainer_with_evaluator(device, ret_val, model_class=Model):
    model = model_class(device, ret_val)
    ppe.to(model, device)
    optimizer = torch.optim.SGD(model.parameters(), lr=1.0)
    evaluator = ppe.engine.create_evaluator(model, device=device)
    trainer = ppe.engine.create_trainer(model,
                                        optimizer,
                                        1,
                                        device=device,
                                        evaluator=evaluator)
    return trainer
 def _get_trainer(self, epochs, out_dir):
     model = MyModel()
     ppe.to(model, 'cpu')
     model_with_loss = MyModelWithLossFn(model)
     optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
     extensions = _make_extensions()
     trainer = engine.create_trainer(
         model_with_loss, optimizer, 20,
         device='cpu', extensions=extensions,
         out_dir=out_dir
     )
     return trainer
def _get_evaluator(model_class,
                   device,
                   args,
                   loader,
                   *,
                   seed=0,
                   max_epochs=None):
    torch.manual_seed(seed)
    model = model_class(device, *args)
    ppe.to(model, device)
    evaluator = ppe.engine.create_evaluator(model, device=device)
    return evaluator, (loader, )
Beispiel #18
0
def test_module_split_ppe_to():
    class TestRuntime(ppe.runtime.BaseRuntime):
        def move_module(self, module):
            # Don't do the actual move
            return module

        def initialize_module(self, module, loader_or_batch):
            pass

    module = MyModule()
    ppe.to(module.layer2, 'dummy', runtime_class=TestRuntime)
    assert str(next(iter(module.layer1.parameters())).device) == "cpu"
    assert ppe.runtime._runtime._module_runtime_tag(module.layer1) is None
    assert ppe.runtime._runtime._module_runtime_tag(module.layer2) is not None
    def test_setup_multi_device_split_invalid(self):
        options = {'eval_report_keys': ['output']}
        trainer = MockTrainer()
        handler = self._get_handler(options)
        amodel = AsyncModel()
        amodel.sm1 = trainer.models['main'].sm1
        amodel.sm2 = trainer.models['main'].sm2
        trainer.models['main'] = amodel
        ppe.to(trainer.models['main'].sm1, 'test_rt')
        ppe.to(trainer.models['main'].sm2, 'cpu')
        handler._setup(trainer.models, [], None)

        def callback(batch_idx, outs, is_deferred):
            pass

        with pytest.raises(RuntimeError, match='models splitted'):
            handler.train_step(trainer, 0, None, callback)

        evaluator = MockEvaluator()
        handler = self._get_handler(options)
        evaluator.models['main'] = amodel
        ppe.to(evaluator.models['main'].sm1, 'test_rt')
        ppe.to(evaluator.models['main'].sm2, 'cpu')
        handler._setup(evaluator.models, [], None)

        with pytest.raises(RuntimeError, match='models splitted'):
            handler.eval_step(trainer, 0, None, callback)
Beispiel #20
0
def test_runtime_nested():
    class TestRuntime(ppe.runtime.BaseRuntime):
        def move_module(self, module):
            # Don't do the actual move
            return module

        def initialize_module(self, module, loader_or_batch):
            pass

    module = MyModule()
    ppe.to(module, 'dummy', runtime_class=TestRuntime)
    ppe.to(module.layer2, 'dummy', runtime_class=TestRuntime)
    with pytest.raises(ValueError, match="nested"):
        for _ in ppe.runtime._runtime.named_runtime_modules(module):
            pass
def test_trainer_invalid_options(path):
    device = 'cpu'
    model = MyModel()
    ppe.to(model, device)
    model_with_loss = MyModelWithLossFn(model)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    extensions = _make_extensions()
    options = {'UNKNOWN_OPTIONS': True}
    with pytest.raises(ValueError, match="UNKNOWN_OPTIONS"):
        engine.create_trainer(
            model_with_loss, optimizer, 20,
            device=device, extensions=extensions,
            out_dir=path,
            options=options,
        )
    def get_result_from_training_loop():
        model = MyModel()
        ppe.to(model, device)
        model_with_loss = MyModelWithLossFn(model)
        optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

        model_with_loss.train()
        for _ in range(20):
            for x, t in train_data:
                optimizer.zero_grad()
                loss = model_with_loss(x.to(device), t.to(device))
                loss.backward()
                optimizer.step()

        model.eval()
        with torch.no_grad():
            return [model(x.to(device)) for x, in data]
    def get_result_from_trainer():
        model = MyModel()
        ppe.to(model, device)
        model_with_loss = MyModelWithLossFn(model)
        optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
        extensions = _make_extensions()

        trainer = engine.create_trainer(
            model_with_loss, optimizer, 20,
            device=device, extensions=extensions,
            out_dir=path
        )
        trainer.run(train_data)

        model.eval()
        with torch.no_grad():
            return [model(x.to(device)) for x, in data]
def test_trainer(device, path):
    if not torch.cuda.is_available() and device == 'cuda':
        pytest.skip()
    model = MyModel()
    ppe.to(model, device)
    model_with_loss = MyModelWithLossFn(model)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader(
        [(torch.rand(20,), torch.rand(10,)) for i in range(10)])
    extensions = _make_extensions()

    trainer = engine.create_trainer(
        model_with_loss, optimizer, 20,
        device=device, extensions=extensions,
        out_dir=path,
    )
    trainer.run(data)
def _get_trainer(model_class,
                 device,
                 args,
                 loader,
                 *,
                 seed=0,
                 max_epochs=10,
                 stop_trigger=None):
    torch.manual_seed(seed)
    model = model_class(device, *args)
    ppe.to(model, device)
    optimizer = torch.optim.SGD(model.parameters(), lr=1.0)
    trainer = ppe.engine.create_trainer(model,
                                        optimizer,
                                        max_epochs,
                                        device=device,
                                        stop_trigger=stop_trigger)
    return trainer, (loader, )
Beispiel #26
0
def test_trainer(device):
    iters_per_epoch = 10
    epochs = 20
    model = MyModel()
    ppe.to(model, device)
    model_with_loss = MyModelWithLossFn(model)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader([(torch.rand(20, ), torch.rand(10, ))
                                        for i in range(iters_per_epoch)])
    backward_fn = mock.Mock(return_value=None)

    trainer = ppe.engine.create_trainer(
        model_with_loss,
        optimizer,
        epochs,
        device=device,
        options={'backward_function': backward_fn})
    trainer.run(data)
    assert backward_fn.call_count == epochs * iters_per_epoch
def test_evaluator_with_metric(device, accuracy):
    model = MyModel(accuracy)
    data = torch.utils.data.DataLoader([{
        'x': torch.rand(20),
        't': torch.rand(1)
    } for i in range(10)],
                                       batch_size=10)

    ppe.to(model, device)
    evaluator = engine.create_evaluator(
        model,
        device=device,
        metrics=[ppe.training.metrics.AccuracyMetric('t', 'y')],
        options={'eval_report_keys': ['accuracy']})
    evaluator.handler.eval_setup(evaluator, data)
    reporter = ppe.reporting.Reporter()
    observation = {}
    with reporter.scope(observation):
        evaluator.run(data)
    assert pytest.approx(observation['val/accuracy']) == accuracy
def test_trainer_namedtuple_input(device, progress_bar, path):
    model = MyModel()
    ppe.to(model, device)
    model_with_loss = ModelNamedTupleIO(model)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
    data = torch.utils.data.DataLoader(
        [Input(torch.rand(20, ), torch.rand(10, ), str(i)) for i in range(10)])
    extensions = _make_extensions()

    evaluator = engine.create_evaluator(model_with_loss,
                                        device=device,
                                        progress_bar=progress_bar)

    trainer = engine.create_trainer(model_with_loss,
                                    optimizer,
                                    20,
                                    device=device,
                                    evaluator=evaluator,
                                    extensions=extensions,
                                    out_dir=path)
    trainer.run(data, data)
Beispiel #29
0
def test_model_comparer():
    model_cpu = ModelForComparer()
    model_gpu = ModelForComparer()
    ppe.to(model_cpu, 'cpu')
    ppe.to(model_gpu, 'cuda:0')
    # Make the models to have the same initial weights
    model_gpu.load_state_dict(model_cpu.state_dict())
    ppe.to(model_gpu, device='cuda:0')

    optimizer_cpu = torch.optim.SGD(model_cpu.parameters(), lr=0.01)
    trainer_cpu = ppe.engine.create_trainer(model_cpu,
                                            optimizer_cpu,
                                            1,
                                            device='cpu')
    optimizer_gpu = torch.optim.SGD(model_gpu.parameters(), lr=0.01)
    trainer_gpu = ppe.engine.create_trainer(model_gpu,
                                            optimizer_gpu,
                                            1,
                                            device='cuda:0')
    compare_fn = ppe.utils.comparer.get_default_comparer(rtol=1e-2, atol=1e-2)
    comp = ppe.utils.comparer.ModelComparer(
        {
            "cpu": trainer_cpu,
            "gpu": trainer_gpu
        }, compare_fn=compare_fn)

    train_1 = list(torch.ones(2, 10, 10, 10) for _ in range(10))
    train_2 = list(torch.ones(2, 10, 10, 10) for _ in range(10))
    comp.compare({"cpu": train_1, "gpu": train_2})
    def test_eval_step_async(self):
        options = {'eval_report_keys': ['output'], 'async': True}
        handler = self._get_handler(options)
        evaluator = MockEvaluator()
        ppe.to(evaluator.models['main'], 'test_rt')
        prev_batch_idx = 0

        def callback(batch_idx, outs, is_deferred):
            nonlocal prev_batch_idx
            # Check that iterations complete in order
            assert prev_batch_idx == batch_idx
            prev_batch_idx += 1
            assert outs == 1

        for i in range(40):
            handler.eval_step(evaluator, i, None, callback)

        assert prev_batch_idx == 4
        assert len(handler.pending_iters['main']) == 36
        handler.eval_loop_end(evaluator)
        assert prev_batch_idx == 40
        assert len(handler.pending_iters['main']) == 0