def test_comparer_incompat_trigger(): model_cpu = Model("cpu", 1.0) ppe.to(model_cpu, 'cpu') optimizer_cpu = torch.optim.SGD(model_cpu.parameters(), lr=1.0) trainer_cpu = ppe.engine.create_trainer( model_cpu, optimizer_cpu, 1, device="cpu", ) model_gpu = Model("cuda:0", 1.0) ppe.to(model_gpu, 'cuda:0') optimizer_gpu = torch.optim.SGD(model_gpu.parameters(), lr=1.0) trainer_gpu = ppe.engine.create_trainer( model_gpu, optimizer_gpu, 1, device="cuda:0", stop_trigger=(1, "iteration"), ) comp = ppe.utils.comparer.OutputsComparer( { "cpu": trainer_cpu, "gpu": trainer_gpu }, "a", ) train_1 = list(torch.ones(10) for _ in range(10)) train_2 = list(torch.ones(10) for _ in range(10)) with pytest.raises(ValueError): comp.compare({"cpu": (train_1, ), "gpu": (train_2, )})
def test_trainer_with_code_block(device, progress_bar, path): model = MyModel() model_with_loss = MyModelWithLossDictOutput(model) ppe.to(model_with_loss, device) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader([{ 'x': torch.rand(20, ), 't': torch.rand(10, ) } for i in range(10)]) extensions = _make_extensions() evaluator = engine.create_evaluator(model_with_loss, device=device, progress_bar=progress_bar, logic=ppe.handler.CodeBlockLogic()) trainer = engine.create_trainer(model_with_loss, optimizer, 20, device=device, evaluator=evaluator, extensions=extensions, out_dir=path, logic=ppe.handler.CodeBlockLogic()) trainer.run(data, data)
def test_trainer_defer_wrong_order(path): class WrongOrderHandler(ppe.handler.Handler): def _complete_train_step(self, trainer, outs, block, sn, sm, rt): p_iter = self.pending_iters[sn][0] if p_iter.idx < 10: super()._complete_train_step(trainer, p_iter.deferred, block, sn, sm, rt) else: p_iter.cback(90, None, is_deferred=block) device = 'cpu' model = MyModel() model_with_loss = MyModelWithLossAsync(model) ppe.to(model_with_loss, device) # Register the handler optimizer = torch.optim.SGD(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader([(torch.rand(20, ), torch.rand(10, )) for i in range(100)]) trainer = engine.create_trainer(model_with_loss, optimizer, 2, device=device, handler_class=WrongOrderHandler, out_dir=path) with pytest.raises(RuntimeError, match="Completed a not expected"): trainer.run(data)
def test_evaluator_async(accuracy): device = 'async-cpu' model = AsyncModel(accuracy) data = torch.utils.data.DataLoader([{ 'x': torch.rand(20), 't': torch.rand(1) } for i in range(1000)], batch_size=10) options = {'eval_report_keys': ['accuracy'], 'async': True} # Register the handler ppe.runtime.runtime_registry.register(device, DeferRuntime) ppe.to(model, device) evaluator = engine.create_evaluator( model, device=device, options=options, metrics=[ppe.training.metrics.AccuracyMetric('t', 'y')]) reporter = ppe.reporting.Reporter() observation = {} with reporter.scope(observation): evaluator.run(data) assert pytest.approx(observation['val/accuracy'], accuracy) assert model._pending_called
def test_model_comparer_invalid(): model_cpu = ModelForComparer() model_gpu = ModelForComparer() ppe.to(model_cpu, 'cpu') ppe.to(model_gpu, device='cuda:0') optimizer_cpu = torch.optim.SGD(model_cpu.parameters(), lr=0.01) trainer_cpu = ppe.engine.create_trainer(model_cpu, optimizer_cpu, 1, device='cpu') optimizer_gpu = torch.optim.SGD(model_gpu.parameters(), lr=0.01) trainer_gpu = ppe.engine.create_trainer(model_gpu, optimizer_gpu, 1, device='cuda:0') compare_fn = ppe.utils.comparer.get_default_comparer(rtol=1e-2, atol=1e-2) comp = ppe.utils.comparer.ModelComparer( { "cpu": trainer_cpu, "gpu": trainer_gpu }, compare_fn=compare_fn) train_1 = list(torch.ones(2, 10, 10, 10) for _ in range(10)) train_2 = list(torch.ones(2, 10, 10, 10) for _ in range(10)) with pytest.raises(AssertionError): comp.compare({"cpu": train_1, "gpu": train_2})
def test_evaluator_trigger(evaluator_trigger, path): device = 'cpu' progress_bar = False model = MyModel() ppe.to(model, device) model_with_loss = MyModelWithLossFn(model) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader([(torch.rand(20, ), torch.rand(10, )) for i in range(10)]) extensions = _make_extensions() evaluator = engine.create_evaluator(model_with_loss, device=device, progress_bar=progress_bar) trainer = engine.create_trainer(model_with_loss, optimizer, 20, device=device, evaluator=(evaluator, evaluator_trigger[1]), extensions=extensions, out_dir=path) path = 'pytorch_pfn_extras.training._evaluator.Evaluator.run' with mock.patch(path) as patched: trainer.run(data, data) assert patched.call_count == evaluator_trigger[0]
def test_module_change_forward(): class Module1(torch.nn.Module): def forward(self, input): raise RuntimeError('The module forward should never be executed') class Module2: def __init__(self): self.value = 5 def forward(self, input): return torch.tensor(self.value) class ForwardIntercepterRuntime(ppe.runtime.BaseRuntime): def initialize_module(self, module, loader_or_batch): self.new_module = Module2() module.forward = self.new_module.forward # TODO(ecastill): also reroute state_dict ? def move_module(self, module): self.initialize_module(module, None) return module module = Module1() with pytest.raises(RuntimeError): module(None) ppe.to(module, device='dummy', runtime_class=ForwardIntercepterRuntime) assert int(module(None)) == 5
def test_autocast(self, autocast): trainer = MockTrainer() logic = ppe.handler.Logic(options={'autocast': autocast}) handler = ppe.handler.Handler( logic, ppe.runtime.PyTorchRuntime('cuda'), {} ) completed = False class _MModule(torch.nn.Module): def forward(self, x, y): return torch.mm(x, y) trainer.models['main'] = _MModule() trainer.optimizers['main'] = torch.optim.SGD( [torch.nn.Parameter(torch.zeros(10))], 0.01 ) ppe.to(trainer.models['main'], 'cuda') completed = False def callback(batch_idx, outs): nonlocal completed if autocast: assert outs.dtype == torch.float16 else: assert outs.dtype == torch.float32 completed = True inputs = { 'x': torch.rand((2, 2)).cuda(), 'y': torch.rand((2, 2)).cuda(), } handler.train_step(trainer, 0, inputs, callback) assert completed
def test_trainer_defer(path): class Extension: def __init__(self, is_async): self.name = 'Dummy' self.trigger = (1, 'iteration') self.called = 0 self.is_async = is_async def __call__(self, manager): self.called += 1 device = 'cpu' model = MyModel() model_with_loss = MyModelWithLossAsync(model) ppe.to(model_with_loss, device) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader([(torch.rand(20, ), torch.rand(10, )) for i in range(100)]) extensions = [Extension(True), Extension(False)] trainer = engine.create_trainer(model_with_loss, optimizer, 2, device=device, extensions=extensions, out_dir=path) trainer.run(data) assert trainer.manager.iteration == 200 assert trainer.manager.execution == 200 assert extensions[0].called == 200 assert extensions[1].called == 200
def test_trainer_with_code_block_with_multiple_optimizers( device, progress_bar, path): if not torch.cuda.is_available() and device == 'cuda': pytest.skip() model = MyModel() model_with_loss = MyModelWithLossDictOutput(model) ppe.to(model_with_loss, device) optimizer0 = torch.optim.SGD(model.parameters(), lr=0.1) optimizer1 = torch.optim.Adam(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader([{ 'x': torch.rand(20, ), 't': torch.rand(10, ) } for i in range(10)]) extensions = _make_extensions() evaluator = engine.create_evaluator(model_with_loss, device=device, progress_bar=progress_bar, logic=ppe.handler.CodeBlockLogic()) trainer = engine.create_trainer(model_with_loss, { "0": optimizer0, "1": optimizer1 }, 20, device=device, evaluator=evaluator, extensions=extensions, out_dir=path, logic=ppe.handler.CodeBlockLogic()) trainer.run(data, data)
def test_trainer_profile(): device = 'cpu' model = MyModel() model_with_loss = MyModelWithLossDictOutput(model) ppe.to(model_with_loss, device) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader([{ 'x': torch.rand(20, ), 't': torch.rand(10, ) } for i in range(10)]) extensions = _make_extensions() evaluator = engine.create_evaluator(model_with_loss, device=device) trace_handler = mock.Mock() warmup = 1 active = len(data) - warmup profile = torch.profiler.profile( activities=[torch.profiler.ProfilerActivity.CPU], on_trace_ready=trace_handler, schedule=torch.profiler.schedule(wait=0, warmup=warmup, active=active), ) trainer = engine.create_trainer( model_with_loss, optimizer, 20, device=device, evaluator=evaluator, extensions=extensions, profile=profile, ) trainer.run(data, data) assert trace_handler.call_count == 20 # n_epochs
def test_trainer_dict_input(device, progress_bar, path): if not torch.cuda.is_available() and device == 'cuda': pytest.skip() model = MyModel() ppe.to(model, device) model_with_loss = MyModelWithLossDictOutput(model) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader([{ 'x': torch.rand(20, ), 't': torch.rand(10, ) } for i in range(10)]) extensions = _make_extensions() evaluator = engine.create_evaluator(model_with_loss, device=device, progress_bar=progress_bar) trainer = engine.create_trainer(model_with_loss, optimizer, 20, device=device, evaluator=evaluator, extensions=extensions, out_dir=path) trainer.run(data, data)
def test_train_with_evaluator(device, progress_bar, path): if not torch.cuda.is_available() and device == 'cuda': pytest.skip() model = MyModel() ppe.to(model, device) model_with_loss = MyModelWithLossFn(model) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader([(torch.rand(20, ), torch.rand(10, )) for i in range(10)]) extensions = _make_extensions() evaluator = engine.create_evaluator(model_with_loss, device=device, progress_bar=progress_bar) trainer = engine.create_trainer(model_with_loss, optimizer, 20, device=device, evaluator=evaluator, extensions=extensions, out_dir=path) mpath = 'pytorch_pfn_extras.training._evaluator.Evaluator.run' with mock.patch(mpath) as patched: trainer.run(data, data) assert patched.call_count == 20
def test_setup_multi_device_split_invalid(self): options = {'eval_report_keys': ['output'], 'async': True} trainer = MockTrainer() handler = self._get_handler(options) ppe.to(trainer.models['main'].sm1, 'test_rt') ppe.to(trainer.models['main'].sm2, 'cpu') with pytest.raises(RuntimeError, match='models splitted'): handler._setup(trainer.models, [], None)
def _get_trainer_with_evaluator(device, ret_val, model_class=Model): model = model_class(device, ret_val) ppe.to(model, device) optimizer = torch.optim.SGD(model.parameters(), lr=1.0) evaluator = ppe.engine.create_evaluator(model, device=device) trainer = ppe.engine.create_trainer(model, optimizer, 1, device=device, evaluator=evaluator) return trainer
def _get_trainer(self, epochs, out_dir): model = MyModel() ppe.to(model, 'cpu') model_with_loss = MyModelWithLossFn(model) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) extensions = _make_extensions() trainer = engine.create_trainer( model_with_loss, optimizer, 20, device='cpu', extensions=extensions, out_dir=out_dir ) return trainer
def _get_evaluator(model_class, device, args, loader, *, seed=0, max_epochs=None): torch.manual_seed(seed) model = model_class(device, *args) ppe.to(model, device) evaluator = ppe.engine.create_evaluator(model, device=device) return evaluator, (loader, )
def test_module_split_ppe_to(): class TestRuntime(ppe.runtime.BaseRuntime): def move_module(self, module): # Don't do the actual move return module def initialize_module(self, module, loader_or_batch): pass module = MyModule() ppe.to(module.layer2, 'dummy', runtime_class=TestRuntime) assert str(next(iter(module.layer1.parameters())).device) == "cpu" assert ppe.runtime._runtime._module_runtime_tag(module.layer1) is None assert ppe.runtime._runtime._module_runtime_tag(module.layer2) is not None
def test_setup_multi_device_split_invalid(self): options = {'eval_report_keys': ['output']} trainer = MockTrainer() handler = self._get_handler(options) amodel = AsyncModel() amodel.sm1 = trainer.models['main'].sm1 amodel.sm2 = trainer.models['main'].sm2 trainer.models['main'] = amodel ppe.to(trainer.models['main'].sm1, 'test_rt') ppe.to(trainer.models['main'].sm2, 'cpu') handler._setup(trainer.models, [], None) def callback(batch_idx, outs, is_deferred): pass with pytest.raises(RuntimeError, match='models splitted'): handler.train_step(trainer, 0, None, callback) evaluator = MockEvaluator() handler = self._get_handler(options) evaluator.models['main'] = amodel ppe.to(evaluator.models['main'].sm1, 'test_rt') ppe.to(evaluator.models['main'].sm2, 'cpu') handler._setup(evaluator.models, [], None) with pytest.raises(RuntimeError, match='models splitted'): handler.eval_step(trainer, 0, None, callback)
def test_runtime_nested(): class TestRuntime(ppe.runtime.BaseRuntime): def move_module(self, module): # Don't do the actual move return module def initialize_module(self, module, loader_or_batch): pass module = MyModule() ppe.to(module, 'dummy', runtime_class=TestRuntime) ppe.to(module.layer2, 'dummy', runtime_class=TestRuntime) with pytest.raises(ValueError, match="nested"): for _ in ppe.runtime._runtime.named_runtime_modules(module): pass
def test_trainer_invalid_options(path): device = 'cpu' model = MyModel() ppe.to(model, device) model_with_loss = MyModelWithLossFn(model) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) extensions = _make_extensions() options = {'UNKNOWN_OPTIONS': True} with pytest.raises(ValueError, match="UNKNOWN_OPTIONS"): engine.create_trainer( model_with_loss, optimizer, 20, device=device, extensions=extensions, out_dir=path, options=options, )
def get_result_from_training_loop(): model = MyModel() ppe.to(model, device) model_with_loss = MyModelWithLossFn(model) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) model_with_loss.train() for _ in range(20): for x, t in train_data: optimizer.zero_grad() loss = model_with_loss(x.to(device), t.to(device)) loss.backward() optimizer.step() model.eval() with torch.no_grad(): return [model(x.to(device)) for x, in data]
def get_result_from_trainer(): model = MyModel() ppe.to(model, device) model_with_loss = MyModelWithLossFn(model) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) extensions = _make_extensions() trainer = engine.create_trainer( model_with_loss, optimizer, 20, device=device, extensions=extensions, out_dir=path ) trainer.run(train_data) model.eval() with torch.no_grad(): return [model(x.to(device)) for x, in data]
def test_trainer(device, path): if not torch.cuda.is_available() and device == 'cuda': pytest.skip() model = MyModel() ppe.to(model, device) model_with_loss = MyModelWithLossFn(model) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader( [(torch.rand(20,), torch.rand(10,)) for i in range(10)]) extensions = _make_extensions() trainer = engine.create_trainer( model_with_loss, optimizer, 20, device=device, extensions=extensions, out_dir=path, ) trainer.run(data)
def _get_trainer(model_class, device, args, loader, *, seed=0, max_epochs=10, stop_trigger=None): torch.manual_seed(seed) model = model_class(device, *args) ppe.to(model, device) optimizer = torch.optim.SGD(model.parameters(), lr=1.0) trainer = ppe.engine.create_trainer(model, optimizer, max_epochs, device=device, stop_trigger=stop_trigger) return trainer, (loader, )
def test_trainer(device): iters_per_epoch = 10 epochs = 20 model = MyModel() ppe.to(model, device) model_with_loss = MyModelWithLossFn(model) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader([(torch.rand(20, ), torch.rand(10, )) for i in range(iters_per_epoch)]) backward_fn = mock.Mock(return_value=None) trainer = ppe.engine.create_trainer( model_with_loss, optimizer, epochs, device=device, options={'backward_function': backward_fn}) trainer.run(data) assert backward_fn.call_count == epochs * iters_per_epoch
def test_evaluator_with_metric(device, accuracy): model = MyModel(accuracy) data = torch.utils.data.DataLoader([{ 'x': torch.rand(20), 't': torch.rand(1) } for i in range(10)], batch_size=10) ppe.to(model, device) evaluator = engine.create_evaluator( model, device=device, metrics=[ppe.training.metrics.AccuracyMetric('t', 'y')], options={'eval_report_keys': ['accuracy']}) evaluator.handler.eval_setup(evaluator, data) reporter = ppe.reporting.Reporter() observation = {} with reporter.scope(observation): evaluator.run(data) assert pytest.approx(observation['val/accuracy']) == accuracy
def test_trainer_namedtuple_input(device, progress_bar, path): model = MyModel() ppe.to(model, device) model_with_loss = ModelNamedTupleIO(model) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) data = torch.utils.data.DataLoader( [Input(torch.rand(20, ), torch.rand(10, ), str(i)) for i in range(10)]) extensions = _make_extensions() evaluator = engine.create_evaluator(model_with_loss, device=device, progress_bar=progress_bar) trainer = engine.create_trainer(model_with_loss, optimizer, 20, device=device, evaluator=evaluator, extensions=extensions, out_dir=path) trainer.run(data, data)
def test_model_comparer(): model_cpu = ModelForComparer() model_gpu = ModelForComparer() ppe.to(model_cpu, 'cpu') ppe.to(model_gpu, 'cuda:0') # Make the models to have the same initial weights model_gpu.load_state_dict(model_cpu.state_dict()) ppe.to(model_gpu, device='cuda:0') optimizer_cpu = torch.optim.SGD(model_cpu.parameters(), lr=0.01) trainer_cpu = ppe.engine.create_trainer(model_cpu, optimizer_cpu, 1, device='cpu') optimizer_gpu = torch.optim.SGD(model_gpu.parameters(), lr=0.01) trainer_gpu = ppe.engine.create_trainer(model_gpu, optimizer_gpu, 1, device='cuda:0') compare_fn = ppe.utils.comparer.get_default_comparer(rtol=1e-2, atol=1e-2) comp = ppe.utils.comparer.ModelComparer( { "cpu": trainer_cpu, "gpu": trainer_gpu }, compare_fn=compare_fn) train_1 = list(torch.ones(2, 10, 10, 10) for _ in range(10)) train_2 = list(torch.ones(2, 10, 10, 10) for _ in range(10)) comp.compare({"cpu": train_1, "gpu": train_2})
def test_eval_step_async(self): options = {'eval_report_keys': ['output'], 'async': True} handler = self._get_handler(options) evaluator = MockEvaluator() ppe.to(evaluator.models['main'], 'test_rt') prev_batch_idx = 0 def callback(batch_idx, outs, is_deferred): nonlocal prev_batch_idx # Check that iterations complete in order assert prev_batch_idx == batch_idx prev_batch_idx += 1 assert outs == 1 for i in range(40): handler.eval_step(evaluator, i, None, callback) assert prev_batch_idx == 4 assert len(handler.pending_iters['main']) == 36 handler.eval_loop_end(evaluator) assert prev_batch_idx == 40 assert len(handler.pending_iters['main']) == 0