def __init__(self, optimizer): self.optimizer = optimizer # Torch doesnt track the optimizer params # until calculations are performed # We need to delay the conversion. self.torch_model = None if isinstance(optimizer.target, chainer.Link): self.target = optimizer.target else: self.torch_model = optimizer.target self.target = cpm.TorchModule(optimizer.target) # There is no API in torch to know wether a model is on cuda param_tensor = next(optimizer.target.parameters()) if param_tensor.is_cuda: self.target.to_gpu(param_tensor.device.index)
def test_to_torch_module(): model = torch.nn.Linear(3, 1) model.weight.data = torch.ones(1, 3) # Conversion chained = cpm.TorchModule(model) assert isinstance(chained.weight, chainer.Variable) assert isinstance(chained.bias, chainer.Variable) assert chained.weight.shape == (1, 3) assert chained.bias.shape == (1, ) assert (chained.weight.array == numpy.ones((1, 3))).all() # Test memory sharing chained.weight.array[...] = numpy.arange(3).reshape((1, 3)) assert (model.weight.data == torch.arange(3).reshape((1, 3))).all()
def set_model(self): # torch_model = MLPWrapper(lazy=False) torch_model = MLPWrapper() torch_model = torch_model.cuda() dummy_input = self.train_dataset[0] dummy_input = self.converter([dummy_input], self.device.id) torch_model(dummy_input[0]) self.model = cpm.TorchModule(torch_model) self.model.to_gpu(self.device) # We create a classifier over the PyTorch model, # since it is the one that will be called self.classifier = L.Classifier(torch_model, lossfun=torchF.nll_loss, accfun=torch_accuracy)