def _test_basic_func(rank, world_size, tempfile_name, test_case): _dist_init(rank, world_size, tempfile_name, backend="nccl") # Covers nccl model = Linear(2, 2, bias=False) model.to("cuda") model = DDP(model, device_ids=[rank]) optim = AdaScale(SGD(model.parameters(), lr=0.1)) if "input" in test_case: # single iter in_data = Tensor(test_case["input"][rank]) in_data = in_data.cuda() out = model(in_data) out.sum().backward() assert np.allclose(optim.gain(), test_case["expected_gain"]), optim.gain() optim.step() optim.zero_grad() else: # multiple iters for in_data in test_case["inputs"]: in_data = Tensor(in_data[rank]).cuda() out = model(in_data) out.sum().backward() optim.step() optim.zero_grad() assert np.allclose(optim.gain(), test_case["expected_gain"]), optim.gain() dist.destroy_process_group()
def test_one_iteration(self): """Test FSDP with uneven divide of parameter shards.""" model = Linear(3, 3, bias=False) input = torch.rand(8, 3) my_lr = 0.1 ref_forward_output_my_rank, ref_weight_out = self._get_ref_results( model, input, my_lr ) model.to(self.rank) model = FSDP(model) optim = SGD(model.parameters(), lr=my_lr) self.assertTrue(len(input) >= self.world_size) in_data = torch.Tensor(input[self.rank]).to(self.rank) out = model(in_data) out.float().sum().backward() optim.step() optim.zero_grad() with model._summon_full_params(): torch.cuda.synchronize() # TODO: This is here because it was # originally part of get_full_params(), debug why it is needed here. weight_out = model.module.weight.T.clone() self.assertEqual(ref_forward_output_my_rank, out) self.assertEqual(ref_weight_out, weight_out)
def _test_create_supervised_trainer( model_device: Optional[str] = None, trainer_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False, ): model = Linear(1, 1) if model_device: model.to(model_device) model.weight.data.zero_() model.bias.data.zero_() optimizer = SGD(model.parameters(), 0.1) if trace: example_input = torch.randn(1, 1) model = torch.jit.trace(model, example_input) if amp_mode == "apex" and model_device == trainer_device == "cuda": from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level="O2") trainer = create_supervised_trainer( model, optimizer, mse_loss, device=trainer_device, output_transform=lambda x, y, y_pred, loss: (y_pred, loss.item()), amp_mode=amp_mode, scaler=scaler, ) x = torch.tensor([[0.1], [0.2]]) y = torch.tensor([[0.3], [0.5]]) data = [(x, y)] assert model.weight.data[0, 0].item() == approx(0.0) assert model.bias.item() == approx(0.0) if model_device == trainer_device or ((model_device == "cpu") ^ (trainer_device == "cpu")): state = trainer.run(data) assert state.output[-1] == approx(0.17), state.output[-1] assert round(model.weight.data[0, 0].item(), 3) == approx(0.013), model.weight.item() assert round(model.bias.item(), 3) == approx(0.08), model.bias.item() if amp_mode == "amp": assert state.output[0].dtype is torch.half if scaler and isinstance(scaler, bool): assert hasattr(state, "scaler") else: assert not hasattr(state, "scaler") else: if LooseVersion(torch.__version__) >= LooseVersion("1.7.0"): # This is broken in 1.6.0 but will be probably fixed with 1.7.0 with pytest.raises(RuntimeError, match=r"is on CPU, but expected them to be on GPU"): trainer.run(data)
def _test_create_evaluation_step( mock_torch_cuda_amp_module, model_device: Optional[str] = None, evaluator_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, ): output_transform_mock = MagicMock() model = Linear(1, 1) if model_device: model.to(model_device) model.weight.data.zero_() model.bias.data.zero_() if trace: example_input = torch.randn(1, 1) model = torch.jit.trace(model, example_input) device_type = evaluator_device.type if isinstance(evaluator_device, torch.device) else evaluator_device on_tpu = "xla" in device_type if device_type is not None else False mode, _ = _check_arg(on_tpu, amp_mode, None) evaluate_step = supervised_evaluation_step(model, evaluator_device, output_transform=output_transform_mock) x = torch.tensor([[1.0], [2.0]]) y = torch.tensor([[3.0], [5.0]]) data = [(x, y)] evaluator = Engine(evaluate_step) evaluator.run(data) assert not mock_torch_cuda_amp_module.called assert output_transform_mock.called
def _test_apex_average(device, amp_mode, opt_level): assert amp_mode == "apex" assert device == "cuda" model = Linear(1, 1) if device: model.to(device) model.weight.data.zero_() model.bias.data.zero_() optimizer = SGD(model.parameters(), 0.1) from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level=opt_level) mean_var = VariableAccumulation(lambda a, x: a + x) y_true = torch.rand(100).float().to(device) for y in y_true: mean_var.update(y) a, n = mean_var.compute() assert a.item() == pytest.approx(y_true.sum().item()) assert n == len(y_true)
def _default_create_supervised_evaluator( model_device: Optional[str] = None, evaluator_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, ): model = Linear(1, 1) if model_device: model.to(model_device) model.weight.data.zero_() model.bias.data.zero_() if trace: example_input = torch.randn(1, 1) model = torch.jit.trace(model, example_input) evaluator = create_supervised_evaluator(model, device=evaluator_device, amp_mode=amp_mode) assert model.weight.data[0, 0].item() == approx(0.0) assert model.bias.item() == approx(0.0) return model, evaluator
def _test_grad_accum_func(rank, world_size, tempfile_name): _dist_init(rank, world_size, tempfile_name, backend="gloo") # Covers gloo model = Linear(4, 2, bias=False) model.to("cuda") model = DDP(model, device_ids=[rank]) optim = AdaScale(SGD(model.parameters(), lr=0.1), num_gradients_to_accumulate=2) with model.no_sync(): # iter 1, input vectors are pointing dim0 and dim1 in_data = Tensor([0.0] * 4) in_data[rank] = 1.0 in_data = in_data.cuda() out = model(in_data) out.sum().backward() # iter 2, input vectors are pointing dim2 and dim3 in_data = Tensor([0.0] * 4) in_data[rank + 2] = 1.0 in_data = in_data.cuda() out = model(in_data) out.sum().backward() # since all inputs are orthogonal, the gain should be exactly 4.0. assert np.allclose(optim.gain(), 4.0), optim.gain() optim.step() optim.zero_grad() dist.destroy_process_group()
def _test_create_mocked_supervised_trainer( model_device: Optional[str] = None, trainer_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False, ): with mock.patch("ignite.engine.supervised_training_step_amp") as training_step_amp_mock: with mock.patch("ignite.engine.supervised_training_step_apex") as training_step_apex_mock: with mock.patch("ignite.engine.supervised_training_step_tpu") as training_step_tpu_mock: with mock.patch("ignite.engine.supervised_training_step") as training_step_mock: model = Linear(1, 1) if model_device: model.to(model_device) model.weight.data.zero_() model.bias.data.zero_() optimizer = SGD(model.parameters(), 0.1) if trace: example_input = torch.randn(1, 1) model = torch.jit.trace(model, example_input) if amp_mode == "apex" and model_device == trainer_device == "cuda": from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level="O2") trainer = create_supervised_trainer( model, optimizer, mse_loss, device=trainer_device, output_transform=lambda x, y, y_pred, loss: (y_pred, loss.item()), amp_mode=amp_mode, scaler=scaler, ) x = torch.tensor([[0.1], [0.2]]) y = torch.tensor([[0.3], [0.5]]) data = [(x, y)] assert model.weight.data[0, 0].item() == approx(0.0) assert model.bias.item() == approx(0.0) on_tpu = "xla" in trainer_device if trainer_device is not None else False mode, _ = _check_arg(on_tpu, amp_mode, scaler) if model_device == trainer_device or ((model_device == "cpu") ^ (trainer_device == "cpu")): trainer.run(data) if mode == "amp": assert training_step_amp_mock.called elif mode == "apex": assert training_step_apex_mock.called elif mode == "tpu": assert training_step_tpu_mock.called else: assert training_step_mock.called
def _test_mocked_supervised_evaluator( model_device: Optional[str] = None, evaluator_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, ): with mock.patch("ignite.engine.supervised_evaluation_step") as evaluation_step: with mock.patch("ignite.engine.supervised_evaluation_step_amp") as evaluation_step_amp: model = Linear(1, 1) if model_device: model.to(model_device) model.weight.data.zero_() model.bias.data.zero_() if trace: example_input = torch.randn(1, 1) model = torch.jit.trace(model, example_input) evaluator = create_supervised_evaluator(model, device=evaluator_device, amp_mode=amp_mode) x = torch.tensor([[1.0], [2.0]]) y = torch.tensor([[3.0], [5.0]]) data = [(x, y)] if model_device == evaluator_device or ((model_device == "cpu") ^ (evaluator_device == "cpu")): state = evaluator.run(data) if amp_mode == "amp": assert evaluation_step_amp.called assert not evaluation_step.called else: assert evaluation_step.called assert not evaluation_step_amp.called
def _test_basic_func(rank, world_size, tempfile_name, test_case, oss, model=None): _dist_init(rank, world_size, tempfile_name, backend="nccl") if model is None: model = Linear(2, 2) model.bias.data.fill_(0.0) model.to("cuda") model = DDP(model, device_ids=[rank]) assert oss in ["none", "ada-oss", "wrapper-oss", "oss-wrapper"] if oss == "ada-oss": optim = AdaScale(OSS(model.parameters(), SGD, lr=0.1)) elif oss == "wrapper-oss": optim = AdaScaleWrapper(model.parameters(), optim_cls=OSS, optim=SGD, lr=0.1) elif oss == "oss-wrapper": optim = OSS(model.parameters(), AdaScaleWrapper, optim_cls=SGD, lr=0.1) else: assert oss == "none" optim = AdaScale(SGD(model.parameters(), lr=0.1)) if "input" in test_case: inputs = [test_case["input"]] else: inputs = test_case["inputs"] for in_data in inputs: in_data = Tensor(in_data[rank]).cuda() out = model(in_data) out.sum().backward() optim.step() optim.zero_grad() if "expected_gain" in test_case: assert np.allclose(optim.gain(), test_case["expected_gain"]), "{} vs {}".format( optim.gain(), test_case["expected_gain"]) if "expected_mean_weight" in test_case: mean_weight = mean( [model.module[i].weight.data.mean().item() for i in range(4)]) assert np.allclose(mean_weight, test_case["expected_mean_weight"]), mean_weight dist.destroy_process_group()
def _test_mocked_supervised_evaluator( model_device: Optional[str] = None, evaluator_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, ): with mock.patch( "ignite.engine.supervised_evaluation_step") as evaluation_step: with mock.patch("ignite.engine.supervised_evaluation_step_amp" ) as evaluation_step_amp: model = Linear(1, 1) if model_device: model.to(model_device) model.weight.data.zero_() model.bias.data.zero_() if trace: example_input = torch.randn(1, 1) model = torch.jit.trace(model, example_input) evaluator = create_supervised_evaluator(model, device=evaluator_device, amp_mode=amp_mode) x = torch.tensor([[1.0], [2.0]]) y = torch.tensor([[3.0], [5.0]]) data = [(x, y)] if model_device == evaluator_device or ( (model_device == "cpu") ^ (evaluator_device == "cpu")): state = evaluator.run(data) if amp_mode == "amp": assert evaluation_step_amp.called assert not evaluation_step.called else: assert evaluation_step.called assert not evaluation_step_amp.called else: if LooseVersion(torch.__version__) >= LooseVersion("1.7.0"): # This is broken in 1.6.0 but will be probably fixed with 1.7.0 with pytest.raises( RuntimeError, match=r"is on CPU, but expected them to be on GPU" ): evaluator.run(data)
class FFNN(nn.Module): def __init__(self, input_dim=768, score=True, num_layers=4): assert num_layers > 0, "FFNN cannot have non-positive layers" super(FFNN, self).__init__() self.layers = [ Linear(input_dim, input_dim) for _ in range(num_layers - 1) ] self.fc = Linear(input_dim, 1) self.gelu = nn.GELU() self.score = score def forward(self, x): for l in self.layers: x = l(x) if self.score: output = self.gelu(self.fc(x)) return output.squeeze(1) else: return self.fc(x) def to(self, device=torch.device("cpu"), *args, **kwargs): super(FFNN, self).to(device, *args, **kwargs) for i, layer in enumerate(self.layers): self.layers[i] = layer.to(device) self.fc = self.fc.to(device) self.gelu = self.gelu.to(device) return self
def _test_create_supervised_evaluator( model_device: Optional[str] = None, evaluator_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, ): model = Linear(1, 1) if model_device: model.to(model_device) model.weight.data.zero_() model.bias.data.zero_() if trace: example_input = torch.randn(1, 1) model = torch.jit.trace(model, example_input) evaluator = create_supervised_evaluator(model, device=evaluator_device, amp_mode=amp_mode) x = torch.tensor([[1.0], [2.0]]) y = torch.tensor([[3.0], [5.0]]) data = [(x, y)] if model_device == evaluator_device or ((model_device == "cpu") ^ (evaluator_device == "cpu")): state = evaluator.run(data) y_pred, y = state.output assert y_pred[0, 0].item() == approx(0.0) assert y_pred[1, 0].item() == approx(0.0) assert y[0, 0].item() == approx(3.0) assert y[1, 0].item() == approx(5.0) assert model.weight.data[0, 0].item() == approx(0.0) assert model.bias.item() == approx(0.0) else: if LooseVersion(torch.__version__) >= LooseVersion("1.7.0"): # This is broken in 1.6.0 but will be probably fixed with 1.7.0 with pytest.raises( RuntimeError, match=r"is on CPU, but expected them to be on GPU"): evaluator.run(data)
def __linear2tensor(linear: nn.Linear) -> Tensor: linear = linear.to("cpu") tensor = Tensor(linear.in_features, linear.out_features) for i in range(linear.in_features): idx = torch.sparse_coo_tensor([[i]], [1], [linear.in_features], dtype=torch.float) val = linear(idx) tensor[i] = val return tensor
def _test_basic_func(rank, world_size, tempfile_name): _dist_init(rank, world_size, tempfile_name, backend="nccl") # Covers nccl model = Linear(2, 2, bias=False) model.to("cuda") model = DDP(model, device_ids=[rank]) optim = AdaScale(SGD(model.parameters(), lr=0.1)) # iter 1 in_data = Tensor([0.0, 0.0]) in_data[rank] = 1.0 in_data = in_data.cuda() out = model(in_data) out.sum().backward() assert np.allclose(optim.gain(), 2.0), optim.gain() optim.step() optim.zero_grad() dist.destroy_process_group()
def _test_basic_func(rank, ddp_cls, world_size, tempfile_name, test_case): _dist_init(rank, world_size, tempfile_name, backend="nccl") # Covers nccl model = Linear(2, 2) model.to("cuda") if ddp_cls is DDP: model = ddp_cls(model, device_ids=[rank]) optim = AdaScale(SGD(model.parameters(), lr=0.1)) elif ddp_cls is SDP: optim = AdaScale(OSS(model.parameters(), SGD, lr=0.1)) model = ddp_cls(model, sharded_optimizer=optim) else: assert ddp_cls is FSDP, ddp_cls # Two cases: # flatten=True : AdaScale wrapper must be after FSDP and it receives # a single grad tensor. It won't receive grad if # wrapped before. # flatten=False: AdaScale can be both before or after FSDP. # So, it is better to do AdaScale after FSDP. model = ddp_cls(model, flatten_parameters=False) optim = AdaScale(SGD(model.parameters(), lr=0.1)) if "input" in test_case: # single iter in_data = Tensor(test_case["input"][rank]) in_data = in_data.cuda() out = model(in_data) out.sum().backward() if ddp_cls is DDP: assert np.allclose(optim.gain(), test_case["expected_gain"]), optim.gain() optim.step() optim.zero_grad() else: # multiple iters for in_data in test_case["inputs"]: in_data = Tensor(in_data[rank]).cuda() out = model(in_data) out.sum().backward() optim.step() optim.zero_grad() if ddp_cls is DDP: assert np.allclose(optim.gain(), test_case["expected_gain"]), optim.gain() dist.destroy_process_group()
def _test_create_supervised_trainer(model_device: Optional[str] = None, trainer_device: Optional[str] = None, trace: bool = False): model = Linear(1, 1) if model_device: model.to(model_device) model.weight.data.zero_() model.bias.data.zero_() optimizer = SGD(model.parameters(), 0.1) if trace: example_input = torch.randn(1, 1) model = torch.jit.trace(model, example_input) trainer = create_supervised_trainer(model, optimizer, mse_loss, device=trainer_device) x = torch.tensor([[1.0], [2.0]]) y = torch.tensor([[3.0], [5.0]]) data = [(x, y)] assert model.weight.data[0, 0].item() == approx(0.0) assert model.bias.item() == approx(0.0) if model_device == trainer_device or ((model_device == "cpu") ^ (trainer_device == "cpu")): state = trainer.run(data) assert state.output == approx(17.0) assert model.weight.data[0, 0].item() == approx(1.3) assert model.bias.item() == approx(0.8) else: if LooseVersion(torch.__version__) >= LooseVersion("1.7.0"): # This is broken in 1.6.0 but will be probably fixed with 1.7.0 with pytest.raises( RuntimeError, match=r"is on CPU, but expected them to be on GPU"): trainer.run(data)
def _test_basic_func(rank, world_size, tempfile_name, test_case, oss, model=None): _dist_init(rank, world_size, tempfile_name, backend="nccl") if model is None: model = Linear(2, 2, bias=False) model.to("cuda") model = DDP(model, device_ids=[rank]) if oss: # For now, we can only wrap AdaScale over OSS. If we do it the other way around, # AdaScale needs to take different parameter types, i.e. the parameter list, etc. optim = AdaScale(OSS(model.parameters(), SGD, lr=0.1)) else: optim = AdaScale(SGD(model.parameters(), lr=0.1)) if "input" in test_case: inputs = [test_case["input"]] else: inputs = test_case["inputs"] for in_data in inputs: in_data = Tensor(in_data[rank]).cuda() out = model(in_data) out.sum().backward() optim.step() optim.zero_grad() assert np.allclose(optim.gain(), test_case["expected_gain"]), optim.gain() if "expected_mean_weight" in test_case: mean_weight = mean( [model.module[i].weight.data.mean().item() for i in range(4)]) assert np.allclose(mean_weight, test_case["expected_mean_weight"]), mean_weight dist.destroy_process_group()
def _default_create_supervised_trainer( gradient_accumulation_steps: int = 1, model_device: Optional[str] = None, trainer_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False, ): model = Linear(1, 1, bias=False) if model_device: model.to(model_device) model.weight.data.zero_() optimizer = SGD(model.parameters(), 0.1) if trace: example_input = torch.randn(1) model = torch.jit.trace(model, example_input) if amp_mode == "apex" and model_device == trainer_device == "cuda": from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level="O2") trainer = create_supervised_trainer( model, optimizer, mse_loss, device=trainer_device, output_transform=lambda x, y, y_pred, loss: (y_pred, loss.item()), amp_mode=amp_mode, scaler=scaler, gradient_accumulation_steps=gradient_accumulation_steps, ) assert model.weight.data[0, 0].item() == approx(0.0) return trainer, model
def _test_create_supervised_trainer(model_device: Optional[str] = None, trainer_device: Optional[str] = None, trace: bool = False): model = Linear(1, 1) if model_device: model.to(model_device) model.weight.data.zero_() model.bias.data.zero_() optimizer = SGD(model.parameters(), 0.1) if trace: example_input = torch.randn(1, 1) model = torch.jit.trace(model, example_input) trainer = create_supervised_trainer(model, optimizer, mse_loss, device=trainer_device) x = torch.tensor([[1.0], [2.0]]) y = torch.tensor([[3.0], [5.0]]) data = [(x, y)] assert model.weight.data[0, 0].item() == approx(0.0) assert model.bias.item() == approx(0.0) if model_device == trainer_device or ((model_device == "cpu") ^ (trainer_device == "cpu")): state = trainer.run(data) assert state.output == approx(17.0) assert model.weight.data[0, 0].item() == approx(1.3) assert model.bias.item() == approx(0.8) else: with pytest.raises(RuntimeError, match=r"device type"): trainer.run(data)
def _test_create_supervised_evaluator(model_device: Optional[str] = None, evaluator_device: Optional[str] = None, trace: bool = False): model = Linear(1, 1) if model_device: model.to(model_device) model.weight.data.zero_() model.bias.data.zero_() if trace: example_input = torch.randn(1, 1) model = torch.jit.trace(model, example_input) evaluator = create_supervised_evaluator(model, device=evaluator_device) x = torch.tensor([[1.0], [2.0]]) y = torch.tensor([[3.0], [5.0]]) data = [(x, y)] if model_device == evaluator_device or ((model_device == "cpu") ^ (evaluator_device == "cpu")): state = evaluator.run(data) y_pred, y = state.output assert y_pred[0, 0].item() == approx(0.0) assert y_pred[1, 0].item() == approx(0.0) assert y[0, 0].item() == approx(3.0) assert y[1, 0].item() == approx(5.0) assert model.weight.data[0, 0].item() == approx(0.0) assert model.bias.item() == approx(0.0) else: with pytest.raises(RuntimeError, match=r"device type"): evaluator.run(data)
def test_create_supervised_evaluator_on_cuda(): device = "cuda" model = Linear(1, 1) model.to(device) model.weight.data.zero_() model.bias.data.zero_() evaluator = create_supervised_evaluator(model, device=device) x = torch.tensor([[1.0], [2.0]]) y = torch.tensor([[3.0], [5.0]]) data = [(x, y)] state = evaluator.run(data) y_pred, y = state.output assert y_pred[0, 0].item() == approx(0.0) assert y_pred[1, 0].item() == approx(0.0) assert y[0, 0].item() == approx(3.0) assert y[1, 0].item() == approx(5.0) assert model.weight.data[0, 0].item() == approx(0.0) assert model.bias.item() == approx(0.0)
def test_one_iteration(self): """Test FSDP with uneven divide of parameter shards.""" model = Linear(3, 3, bias=False) input = torch.rand(8, 3) my_lr = 0.1 ref_forward_output_my_rank, ref_weight_out = self._get_ref_results( model, input, my_lr) model.to(self.rank) model = FSDP(model) optim = SGD(model.parameters(), lr=my_lr) self.assertTrue(len(input) >= self.world_size) in_data = torch.Tensor(input[self.rank]).to(self.rank) out = model(in_data) out.float().sum().backward() optim.step() optim.zero_grad() get_full_params(model) weight_out = model.module.weight.T.clone() self.assertEqual(ref_forward_output_my_rank, out) self.assertEqual(ref_weight_out, weight_out)
def test_create_supervised_trainer_on_cuda(): device = "cuda" model = Linear(1, 1) model.to(device) model.weight.data.zero_() model.bias.data.zero_() optimizer = SGD(model.parameters(), 0.1) trainer = create_supervised_trainer(model, optimizer, mse_loss, device=device) x = torch.tensor([[1.0], [2.0]]) y = torch.tensor([[3.0], [5.0]]) data = [(x, y)] assert model.weight.data[0, 0].item() == approx(0.0) assert model.bias.item() == approx(0.0) state = trainer.run(data) assert state.output == approx(17.0) assert model.weight.data[0, 0].item() == approx(1.3) assert model.bias.item() == approx(0.8)
class Train(): def __init__(self, lr = 0.01, pretrain = False): #self.model = model() self.model = Linear(1200,400) self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.model = self.model.to(self.device) if pretrain == True: load_model(self.model,m=1) self.optimizer = SGD(self.model.parameters(),lr = lr) lambda1 = lambda epoch : 0.98**epoch self.scheduler = lr_scheduler.LambdaLR(self.optimizer,lr_lambda = lambda1) self.img_c = torch.Tensor(cv.imread('./c.jpg')/255.).permute(2,0,1).unsqueeze(0).to(self.device) self.img_s = torch.Tensor(cv.imread('./s.jpg')/255.).permute(2,0,1).unsqueeze(0).to(self.device) def train(self, epoch_num, step_a_epoch, save_frq): x = torch.ones(1000) global_step = 0 epoch = 0 min_loss = 100000000000 self.model.train() while(epoch<epoch_num): for i in range(step_a_epoch): loss1 = 0.0 loss2 = 0.0 self.optimizer.zero_grad() #img = self.model(x) img = self.model.weight.view(1,3,400,400) for j in range(2,4): logit_cs = feature_map(img, j) logit_c = feature_map(self.img_c, j) loss1 += torch.sum((logit_cs-logit_c)**2)*0.5 for j in range(len(a)): gram_ss,m,n = Gram(img, j) gram_s,m,n = Gram(self.img_s, j) loss2 += torch.sum((gram_ss-gram_s)**2)/(4*m**2*n**2) loss = loss1 + 0.01*loss2 loss.backward() self.optimizer.step() if global_step%save_frq == 0 and loss<min_loss: min_loss = loss save_model(self.model,self.optimizer, self.scheduler, global_step, m = 1) print('learning rate: {:6f}'.format(self.scheduler.get_lr()[0])) print('step: {:} loss: {:}'.format(global_step,loss)) global_step += 1 epoch += 1 self.scheduler.step(epoch)
print('Preparing training/validation loaders...') train_loader = BunoDataloader(BunoDataset(train_bunos), train_batch_size) val_loader = BunoDataloader(BunoDataset(val_bunos), val_batch_size) print('Done!\n') mission_feat_dim = train_loader.mission_feat_dim maint_feat_dim = train_loader.maint_feat_dim mission_model = Linear(in_features=mission_feat_dim, out_features=1) mission_model.weight.data *= 0.0 mission_model.bias.data *= 0.0 maint_model = Linear(in_features=maint_feat_dim, out_features=1) maint_model.weight.data *= 0.0 maint_model.bias.data *= 0.0 mission_model.to(device) maint_model.to(device) if continue_fname: load_path = os.path.join('saved-sessions', continue_fname) print(f'Loading saved session from {load_path}...') ckpt = torch.load(load_path) mission_model.load_state_dict(ckpt['mission_state_dict']) maint_model.load_state_dict(ckpt['maint_state_dict']) train_losses = ckpt['train_losses'] val_accs = ckpt['val_accs'] time_elapsed = ckpt['time_elapsed'] print('Done!\n')