예제 #1
0
def create_sgd_optimizer(model):
    """Create the analog-aware optimizer.

    Args:
        model (nn.Module): model to be trained.
    """
    optimizer = AnalogSGD(model.parameters(), lr=0.05)
    optimizer.regroup_param_groups(model)

    return optimizer
예제 #2
0
def create_sgd_optimizer(model, learning_rate):
    """Create the analog-aware optimizer.

    Args:
        model (nn.Module): model to be trained
        learning_rate (float): global parameter to define learning rate
    """
    optimizer = AnalogSGD(model.parameters(), lr=learning_rate)
    optimizer.regroup_param_groups(model)

    return optimizer
예제 #3
0
    def get_optimizer(self, learning_rate: float, model: Module) -> Optimizer:
        """Return the `Optimizer` for the experiment.

        Args:
            learning_rate: the learning rate used by the optimizer.
            model: the neural network to be trained.

        Returns:
            the optimizer to be used in the experiment.
        """
        optimizer = AnalogSGD(model.parameters(), lr=learning_rate)
        optimizer.regroup_param_groups(model)

        return optimizer
예제 #4
0
    def test_learning_rate_update(self):
        """Check the learning rate update is applied to tile."""
        loss_func = mse_loss

        x_b = Tensor([[0.1, 0.2], [0.2, 0.4]])
        y_b = Tensor([[0.3], [0.6]])

        layer1 = self.get_layer(2, 3)
        layer2 = self.get_layer(3, 1)

        model = Sequential(layer1, layer2)
        if self.use_cuda:
            x_b = x_b.cuda()
            y_b = y_b.cuda()
            model = model.cuda()
        opt = AnalogSGD(model.parameters(), lr=0.5)
        opt.regroup_param_groups(model)
        opt.zero_grad()

        new_lr = 0.07
        for param_group in opt.param_groups:
            param_group['lr'] = new_lr

        pred = model(x_b)
        loss = loss_func(pred, y_b)
        loss.backward()
        opt.step()

        if not layer1.analog_tile.get_analog_ctx().use_torch_update:
            self.assertAlmostEqual(layer1.analog_tile.get_learning_rate(),
                                   new_lr)
예제 #5
0
    def train_model(model, loss_func, x_b, y_b):
        """Train the model."""
        opt = AnalogSGD(model.parameters(), lr=0.1)
        opt.regroup_param_groups(model)

        epochs = 10
        for _ in range(epochs):
            opt.zero_grad()
            pred = model(x_b)
            loss = loss_func(pred, y_b)
            loss.backward()
            opt.step()
예제 #6
0
    def train_once(model, y_in, y_out, analog_if, use_cuda=False):
        """Train once."""
        criterion = MSELoss()
        optimizer = AnalogSGD(model.parameters(),
                              lr=0.5,
                              momentum=0.0,
                              nesterov=0.0)
        optimizer.regroup_param_groups(model)
        if analog_if:
            # why is this format so difference?
            # TODO: better use same state format as for native Pytorch's LSTM?
            if use_cuda:
                states = [
                    LSTMState(
                        zeros(y_in.size()[1], model.hidden_size).cuda(),
                        zeros(y_in.size()[1], model.hidden_size).cuda())
                    for _ in range(model.num_layers)
                ]
            else:
                states = [
                    LSTMState(zeros(y_in.size()[1], model.hidden_size),
                              zeros(y_in.size()[1], model.hidden_size))
                    for _ in range(model.num_layers)
                ]

        else:
            if use_cuda:
                states = (zeros(model.num_layers,
                                y_in.size()[1], model.hidden_size).cuda(),
                          zeros(model.num_layers,
                                y_in.size()[1], model.hidden_size).cuda())
            else:
                states = (zeros(model.num_layers,
                                y_in.size()[1], model.hidden_size),
                          zeros(model.num_layers,
                                y_in.size()[1], model.hidden_size))

        for _ in range(2):
            optimizer.zero_grad()
            pred, _ = model(y_in, states)
            loss = criterion(pred.mean(axis=2, keepdim=True), y_out)
            loss.backward()
            optimizer.step()

        return pred.detach().cpu().numpy()
예제 #7
0
    def test_learning_rate_update_fn(self):
        """Check the learning rate update is applied to tile."""
        layer1 = self.get_layer(2, 3)
        layer2 = self.get_layer(3, 1)

        model = Sequential(layer1, layer2)
        if self.use_cuda:
            model = model.cuda()
        opt = AnalogSGD(model.parameters(), lr=0.5)
        opt.regroup_param_groups(model)
        opt.zero_grad()

        new_lr = 0.07

        opt.set_learning_rate(new_lr)

        self.assertAlmostEqual(layer1.analog_tile.get_learning_rate(), new_lr)
        self.assertAlmostEqual(layer2.analog_tile.get_learning_rate(), new_lr)
예제 #8
0
    def get_model_and_x(self):
        """Trains a simple model."""
        # Prepare the datasets (input and expected output).
        x = Tensor([[0.1, 0.2, 0.4, 0.3], [0.2, 0.1, 0.1, 0.3]])
        y = Tensor([[1.0, 0.5], [0.7, 0.3]])

        # Define a single-layer network, using a constant step device type.
        rpu_config = self.get_rpu_config()
        rpu_config.forward.out_res = -1.  # Turn off (output) ADC discretization.
        rpu_config.forward.w_noise_type = WeightNoiseType.ADDITIVE_CONSTANT
        rpu_config.forward.w_noise = 0.02
        rpu_config.noise_model = PCMLikeNoiseModel(g_max=25.0)

        model = AnalogLinear(4, 2, bias=True, rpu_config=rpu_config)

        # Move the model and tensors to cuda if it is available.
        if self.use_cuda:
            x = x.cuda()
            y = y.cuda()
            model.cuda()

        # Define an analog-aware optimizer, preparing it for using the layers.
        opt = AnalogSGD(model.parameters(), lr=0.1)
        opt.regroup_param_groups(model)

        for _ in range(100):
            opt.zero_grad()

            # Add the training Tensor to the model (input).
            pred = model(x)
            # Add the expected output Tensor.
            loss = mse_loss(pred, y)
            # Run training (backward propagation).
            loss.backward()

            opt.step()

        return model, x
예제 #9
0
def main():
    """Train a PyTorch GAN analog model to generate fake characters alla MNIST dataset."""
    # Make sure the directory where to save the results exist.
    # Results include examples of the fake images generated.
    os.makedirs(RESULTS, exist_ok=True)
    torch.manual_seed(SEED)

    # Load MNIST dataset as tensors.
    dataloader = DataLoader(
        MNIST(PATH_DATASET, download=True, transform=transforms.ToTensor()),
        batch_size=BATCH_SIZE,
        shuffle=True,
    )

    print(f'\n{datetime.now().time().replace(microsecond=0)} --- '
          f'Started GAN Example')

    gen = Generator(Z_DIM).to(DEVICE)
    gen_opt = AnalogSGD(gen.parameters(), lr=LR)
    gen_opt.regroup_param_groups(gen)

    disc = Discriminator().to(DEVICE)
    disc_opt = AnalogSGD(disc.parameters(), lr=LR)
    disc_opt.regroup_param_groups(disc)

    print(RPU_CONFIG)
    print(gen)
    print(disc)

    criterion = nn.BCEWithLogitsLoss()

    training_loop(gen, disc, gen_opt, disc_opt, criterion, dataloader,
                  N_EPOCHS, DISPLAY_STEP)
    show_animation_fake_images()

    print(f'{datetime.now().time().replace(microsecond=0)} --- '
          f'Completed GAN Example')
예제 #10
0
    def train_model(self, model, in_vectors, out_vectors):
        """Trains a model """

        opt = AnalogSGD(model.parameters(), lr=0.1)

        for _ in range(10):
            opt.zero_grad()

            # Add the training Tensor to the model (input).
            pred_value = model(in_vectors)
            # Add the expected output Tensor.
            loss_value = mse_loss(pred_value, out_vectors)
            # Run training (backward propagation).
            loss_value.backward()
            opt.step()

        return loss_value
예제 #11
0
    def train_once_bidir(model, y_in, y_out, analog_if):
        """Train once."""
        criterion = MSELoss()
        optimizer = AnalogSGD(model.parameters(),
                              lr=0.5,
                              momentum=0.0,
                              nesterov=0.0)
        batch_size = y_in.size()[1]

        if analog_if:
            states = model.get_zero_state(batch_size)
        else:
            states = None

        for _ in range(2):
            optimizer.zero_grad()
            pred, _ = model(y_in, states)
            loss = criterion(pred.mean(axis=2, keepdim=True), y_out)
            loss.backward()
            optimizer.step()

        return pred.detach().cpu().numpy()
예제 #12
0
from aihwkit.simulator.rpu_base import cuda

# Prepare the datasets (input and expected output).
x = Tensor([[0.1, 0.2, 0.4, 0.3], [0.2, 0.1, 0.1, 0.3]])
y = Tensor([[1.0, 0.5], [0.7, 0.3]])

# Define a single-layer network, using a constant step device type.
rpu_config = SingleRPUConfig(device=ConstantStepDevice())
model = AnalogLinear(4, 2, bias=True, rpu_config=rpu_config)

# Move the model and tensors to cuda if it is available.
if cuda.is_compiled():
    x = x.cuda()
    y = y.cuda()
    model.cuda()

# Define an analog-aware optimizer, preparing it for using the layers.
opt = AnalogSGD(model.parameters(), lr=0.1)
opt.regroup_param_groups(model)

for epoch in range(100):
    # Add the training Tensor to the model (input).
    pred = model(x)
    # Add the expected output Tensor.
    loss = mse_loss(pred, y)
    # Run training (backward propagation).
    loss.backward()

    opt.step()
    print('Loss error: {:.16f}'.format(loss))
예제 #13
0
    def test_against_fp(self):
        """Test whether FP is same as is_perfect inference tile."""
        # pylint: disable-msg=too-many-locals
        # Prepare the datasets (input and expected output).
        x = Tensor([[0.1, 0.2, 0.4, 0.3], [0.2, 0.1, 0.1, 0.3]])
        y = Tensor([[1.0, 0.5], [0.7, 0.3]])

        # Define a single-layer network, using a constant step device type.
        rpu_config = self.get_rpu_config()
        rpu_config.forward.is_perfect = True
        model_torch = Linear(4, 2, bias=True)
        model = AnalogLinear(4, 2, bias=True, rpu_config=rpu_config)
        model.set_weights(model_torch.weight, model_torch.bias)
        model_fp = AnalogLinear(4,
                                2,
                                bias=True,
                                rpu_config=FloatingPointRPUConfig())
        model_fp.set_weights(model_torch.weight, model_torch.bias)

        self.assertTensorAlmostEqual(model.get_weights()[0],
                                     model_torch.weight)
        self.assertTensorAlmostEqual(model.get_weights()[0],
                                     model_fp.get_weights()[0])

        # Move the model and tensors to cuda if it is available.
        if self.use_cuda:
            x = x.cuda()
            y = y.cuda()
            model.cuda()
            model_fp.cuda()
            model_torch.cuda()

        # Define an analog-aware optimizer, preparing it for using the layers.
        opt = AnalogSGD(model.parameters(), lr=0.1)
        opt_fp = AnalogSGD(model_fp.parameters(), lr=0.1)
        opt_torch = SGD(model_torch.parameters(), lr=0.1)

        for _ in range(100):

            # inference
            opt.zero_grad()
            pred = model(x)
            loss = mse_loss(pred, y)
            loss.backward()
            opt.step()

            # same for fp
            opt_fp.zero_grad()
            pred_fp = model_fp(x)
            loss_fp = mse_loss(pred_fp, y)
            loss_fp.backward()
            opt_fp.step()

            # same for torch
            opt_torch.zero_grad()
            pred_torch = model_torch(x)
            loss_torch = mse_loss(pred_torch, y)
            loss_torch.backward()
            opt_torch.step()

            self.assertTensorAlmostEqual(pred_torch, pred)
            self.assertTensorAlmostEqual(loss_torch, loss)
            self.assertTensorAlmostEqual(model.get_weights()[0],
                                         model_torch.weight)

            self.assertTensorAlmostEqual(pred_fp, pred)
            self.assertTensorAlmostEqual(loss_fp, loss)
            self.assertTensorAlmostEqual(model.get_weights()[0],
                                         model_fp.get_weights()[0])
예제 #14
0
y_in = torch.stack(y_in_2d, dim=0).transpose(0, 1).unsqueeze(2)
y_out = torch.stack(y_out_2d, dim=0).transpose(0, 1).unsqueeze(2)

if WITH_EMBEDDING:
    if WITH_BIDIR:
        model = AnalogBidirRNNNetwork()
    else:
        model = AnalogRNNNetwork()
else:
    if WITH_BIDIR:
        model = AnalogBidirRNNNetwork_noEmbedding()
    else:
        model = AnalogRNNNetwork_noEmbedding()

model = model.to(DEVICE)
optimizer = AnalogSGD(model.parameters(), lr=LEARNING_RATE)
optimizer.regroup_param_groups(model)
criterion = nn.MSELoss()

# train
losses = []
for i in range(EPOCHS):
    optimizer.zero_grad()

    pred, states = model(y_in, None)

    loss = criterion(pred, y_out)
    print('Epoch = %d: Train Perplexity = %f' %
          (i, np.exp(loss.detach().cpu().numpy())))

    loss.backward()
예제 #15
0
    def test_decay(self):
        """Test hidden parameter set."""
        # pylint: disable=invalid-name, too-many-locals

        lifetime = 100.  # initial setting (needs to be larger 1)
        gamma = 0.1
        reset_bias = 0.1  # decay shift
        rpu_config = self.get_transfer_compound(gamma=gamma,
                                                lifetime=lifetime,
                                                lifetime_dtod=0.0,
                                                reset=reset_bias)

        model = self.get_layer(in_features=2,
                               out_features=1,
                               rpu_config=rpu_config)

        weight, bias = model.get_weights()
        model.set_weights(weight * 0.0,
                          bias * 0.0 if bias is not None else None)

        params = model.analog_tile.get_hidden_parameters()
        shape = params['hidden_weights_0_0'].shape

        # just dummy settings
        a, b, c, d = 0.47, 0.21, 0.64, 0.12
        params['hidden_weights_0_0'] = a * ones(*shape)  # A
        params['hidden_weights_1_0'] = b * ones(*shape)  # A ref
        params['hidden_weights_0_1'] = c * ones(*shape)  # C
        params['hidden_weights_1_1'] = d * ones(*shape)  # C_ref

        # explicitly set the decay scales (which is 1-1/lifetime)
        a_dcy, b_dcy, c_dcy, d_dcy = 0.95, 0.78, 0.93, 0.92
        params['decay_scales_0_0'] = a_dcy * ones(*shape)  # A
        params['decay_scales_1_0'] = b_dcy * ones(*shape)  # A ref
        params['decay_scales_0_1'] = c_dcy * ones(*shape)  # C
        params['decay_scales_1_1'] = d_dcy * ones(*shape)  # C_ref

        model.analog_tile.set_hidden_parameters(params)

        # LR set to zero. Only lifetime will be applied
        opt = AnalogSGD(model.parameters(), lr=0.0)

        x_b = Tensor([[0.1, 0.2], [0.2, 0.4]])
        y_b = Tensor([[0.3], [0.6]])

        if self.use_cuda:
            model = model.cuda()
            x_b = x_b.cuda()
            y_b = y_b.cuda()

        epochs = 2
        for _ in range(epochs):
            opt.zero_grad()
            pred = model(x_b)
            loss = mse_loss(pred, y_b)

            loss.backward()
            opt.step()

        weight, bias = model.get_weights()

        # reference values
        a = (a - reset_bias) * pow(a_dcy, epochs) + reset_bias
        b = (b - reset_bias) * pow(b_dcy, epochs) + reset_bias
        c = (c - reset_bias) * pow(c_dcy, epochs) + reset_bias
        d = (d - reset_bias) * pow(d_dcy, epochs) + reset_bias

        if self.digital_bias:
            self.assertAlmostEqual(bias[0].item(), 0.0)
        if self.bias and not self.digital_bias:
            self.assertAlmostEqual(bias[0].item(), gamma * (a - b) + c - d, 5)

        self.assertAlmostEqual(weight[0][0].item(), gamma * (a - b) + c - d, 5)