def create_sgd_optimizer(model): """Create the analog-aware optimizer. Args: model (nn.Module): model to be trained. """ optimizer = AnalogSGD(model.parameters(), lr=0.05) optimizer.regroup_param_groups(model) return optimizer
def create_sgd_optimizer(model, learning_rate): """Create the analog-aware optimizer. Args: model (nn.Module): model to be trained learning_rate (float): global parameter to define learning rate """ optimizer = AnalogSGD(model.parameters(), lr=learning_rate) optimizer.regroup_param_groups(model) return optimizer
def train_model(model, loss_func, x_b, y_b): """Train the model.""" opt = AnalogSGD(model.parameters(), lr=0.1) opt.regroup_param_groups(model) epochs = 10 for _ in range(epochs): pred = model(x_b) loss = loss_func(pred, y_b) loss.backward() opt.step() opt.zero_grad()
def test_learning_rate_update(self): """Check the learning rate update is applied to tile.""" loss_func = mse_loss x_b = Tensor([[0.1, 0.2], [0.2, 0.4]]) y_b = Tensor([[0.3], [0.6]]) layer1 = self.get_layer(2, 3) layer2 = self.get_layer(3, 1) model = Sequential(layer1, layer2) if self.use_cuda: x_b = x_b.cuda() y_b = y_b.cuda() model = model.cuda() opt = AnalogSGD(model.parameters(), lr=0.5) opt.regroup_param_groups(model) new_lr = 0.07 for param_group in opt.param_groups: param_group['lr'] = new_lr pred = model(x_b) loss = loss_func(pred, y_b) loss.backward() opt.step() self.assertAlmostEqual(layer1.analog_tile.get_learning_rate(), new_lr)
def test_learning_rate_update_fn(self): """Check the learning rate update is applied to tile.""" layer1 = self.get_layer(2, 3) layer2 = self.get_layer(3, 1) model = Sequential(layer1, layer2) if self.use_cuda: model = model.cuda() opt = AnalogSGD(model.parameters(), lr=0.5) opt.regroup_param_groups(model) new_lr = 0.07 opt.set_learning_rate(new_lr) self.assertAlmostEqual(layer1.analog_tile.get_learning_rate(), new_lr) self.assertAlmostEqual(layer2.analog_tile.get_learning_rate(), new_lr)
def get_model_and_x(self): """Trains a simple model.""" # Prepare the datasets (input and expected output). x = Tensor([[0.1, 0.2, 0.4, 0.3], [0.2, 0.1, 0.1, 0.3]]) y = Tensor([[1.0, 0.5], [0.7, 0.3]]) # Define a single-layer network, using a constant step device type. rpu_config = self.get_rpu_config() rpu_config.forward.out_res = -1. # Turn off (output) ADC discretization. rpu_config.forward.w_noise_type = OutputWeightNoiseType.ADDITIVE_CONSTANT rpu_config.forward.w_noise = 0.02 rpu_config.noise_model = PCMLikeNoiseModel(g_max=25.0) model = AnalogLinear(4, 2, bias=True, rpu_config=rpu_config) # Move the model and tensors to cuda if it is available. if self.use_cuda: x = x.cuda() y = y.cuda() model.cuda() # Define an analog-aware optimizer, preparing it for using the layers. opt = AnalogSGD(model.parameters(), lr=0.1) opt.regroup_param_groups(model) for _ in range(100): # Add the training Tensor to the model (input). pred = model(x) # Add the expected output Tensor. loss = mse_loss(pred, y) # Run training (backward propagation). loss.backward() opt.step() return model, x
# Imports from aihwkit. from aihwkit.nn import AnalogLinear from aihwkit.optim.analog_sgd import AnalogSGD from aihwkit.simulator.devices import ConstantStepResistiveDevice # Prepare the datasets (input and expected output). x_b = Tensor([[0.1, 0.2, 0.0, 0.0], [0.2, 0.4, 0.0, 0.0]]) y_b = Tensor([[0.3], [0.6]]) # Define a multiple-layer network, using a constant step device type. model = Sequential( AnalogLinear(4, 2, resistive_device=ConstantStepResistiveDevice()), AnalogLinear(2, 2, resistive_device=ConstantStepResistiveDevice()), AnalogLinear(2, 1, resistive_device=ConstantStepResistiveDevice())) # Define an analog-aware optimizer, preparing it for using the layers. opt = AnalogSGD(model.parameters(), lr=0.5) opt.regroup_param_groups(model) for epoch in range(100): # Add the training Tensor to the model (input). pred = model(x_b) # Add the expected output Tensor. loss = mse_loss(pred, y_b) # Run training (backward propagation). loss.backward() opt.step() print('Loss error: {:.16f}'.format(loss))