コード例 #1
0
    def test_learning_rate_update(self):
        """Check the learning rate update is applied to tile."""
        loss_func = mse_loss

        x_b = Tensor([[0.1, 0.2], [0.2, 0.4]])
        y_b = Tensor([[0.3], [0.6]])

        layer1 = self.get_layer(2, 3)
        layer2 = self.get_layer(3, 1)

        model = Sequential(layer1, layer2)
        if self.use_cuda:
            x_b = x_b.cuda()
            y_b = y_b.cuda()
            model = model.cuda()
        opt = AnalogSGD(model.parameters(), lr=0.5)
        opt.regroup_param_groups(model)

        new_lr = 0.07
        for param_group in opt.param_groups:
            param_group['lr'] = new_lr

        pred = model(x_b)
        loss = loss_func(pred, y_b)
        loss.backward()
        opt.step()

        if not layer1.analog_tile.get_analog_ctx().use_torch_update:
            self.assertAlmostEqual(layer1.analog_tile.get_learning_rate(),
                                   new_lr)
コード例 #2
0
ファイル: 03_mnist_training.py プロジェクト: sycomix/aihwkit
def create_sgd_optimizer(model):
    """Create the analog-aware optimizer.

    Args:
        model (nn.Module): model to be trained.
    """
    optimizer = AnalogSGD(model.parameters(), lr=0.05)
    optimizer.regroup_param_groups(model)

    return optimizer
コード例 #3
0
def create_sgd_optimizer(model, learning_rate):
    """Create the analog-aware optimizer.

    Args:
        model (nn.Module): model to be trained
        learning_rate (float): global parameter to define learning rate
    """
    optimizer = AnalogSGD(model.parameters(), lr=learning_rate)
    optimizer.regroup_param_groups(model)

    return optimizer
コード例 #4
0
    def train_model(model, loss_func, x_b, y_b):
        """Train the model."""
        opt = AnalogSGD(model.parameters(), lr=0.1)
        opt.regroup_param_groups(model)

        epochs = 10
        for _ in range(epochs):
            opt.zero_grad()
            pred = model(x_b)
            loss = loss_func(pred, y_b)
            loss.backward()
            opt.step()
コード例 #5
0
ファイル: training.py プロジェクト: diego-plan9/aihwkit
    def get_optimizer(self, learning_rate: float, model: Module) -> Optimizer:
        """Return the `Optimizer` for the experiment.

        Args:
            learning_rate: the learning rate used by the optimizer.
            model: the neural network to be trained.

        Returns:
            the optimizer to be used in the experiment.
        """
        optimizer = AnalogSGD(model.parameters(), lr=learning_rate)
        optimizer.regroup_param_groups(model)

        return optimizer
コード例 #6
0
    def test_learning_rate_update_fn(self):
        """Check the learning rate update is applied to tile."""
        layer1 = self.get_layer(2, 3)
        layer2 = self.get_layer(3, 1)

        model = Sequential(layer1, layer2)
        if self.use_cuda:
            model = model.cuda()
        opt = AnalogSGD(model.parameters(), lr=0.5)
        opt.regroup_param_groups(model)

        new_lr = 0.07

        opt.set_learning_rate(new_lr)

        self.assertAlmostEqual(layer1.analog_tile.get_learning_rate(), new_lr)
        self.assertAlmostEqual(layer2.analog_tile.get_learning_rate(), new_lr)
コード例 #7
0
    def train_once(model, y_in, y_out, analog_if, use_cuda=False):
        """Train once."""
        criterion = MSELoss()
        optimizer = AnalogSGD(model.parameters(),
                              lr=0.5,
                              momentum=0.0,
                              nesterov=0.0)
        optimizer.regroup_param_groups(model)
        if analog_if:
            # why is this format so difference?
            # TODO: better use same state format as for native Pytorch's LSTM?
            if use_cuda:
                states = [
                    LSTMState(
                        zeros(y_in.size()[1], model.hidden_size).cuda(),
                        zeros(y_in.size()[1], model.hidden_size).cuda())
                    for _ in range(model.num_layers)
                ]
            else:
                states = [
                    LSTMState(zeros(y_in.size()[1], model.hidden_size),
                              zeros(y_in.size()[1], model.hidden_size))
                    for _ in range(model.num_layers)
                ]

        else:
            if use_cuda:
                states = (zeros(model.num_layers,
                                y_in.size()[1], model.hidden_size).cuda(),
                          zeros(model.num_layers,
                                y_in.size()[1], model.hidden_size).cuda())
            else:
                states = (zeros(model.num_layers,
                                y_in.size()[1], model.hidden_size),
                          zeros(model.num_layers,
                                y_in.size()[1], model.hidden_size))

        for _ in range(2):
            optimizer.zero_grad()
            pred, _ = model(y_in, states)
            loss = criterion(pred.mean(axis=2, keepdim=True), y_out)
            loss.backward()
            optimizer.step()

        return pred.detach().cpu().numpy()
コード例 #8
0
    def get_model_and_x(self):
        """Trains a simple model."""
        # Prepare the datasets (input and expected output).
        x = Tensor([[0.1, 0.2, 0.4, 0.3], [0.2, 0.1, 0.1, 0.3]])
        y = Tensor([[1.0, 0.5], [0.7, 0.3]])

        # Define a single-layer network, using a constant step device type.
        rpu_config = self.get_rpu_config()
        rpu_config.forward.out_res = -1.  # Turn off (output) ADC discretization.
        rpu_config.forward.w_noise_type = WeightNoiseType.ADDITIVE_CONSTANT
        rpu_config.forward.w_noise = 0.02
        rpu_config.noise_model = PCMLikeNoiseModel(g_max=25.0)

        model = AnalogLinear(4, 2, bias=True, rpu_config=rpu_config)

        # Move the model and tensors to cuda if it is available.
        if self.use_cuda:
            x = x.cuda()
            y = y.cuda()
            model.cuda()

        # Define an analog-aware optimizer, preparing it for using the layers.
        opt = AnalogSGD(model.parameters(), lr=0.1)
        opt.regroup_param_groups(model)

        for _ in range(100):
            opt.zero_grad()

            # Add the training Tensor to the model (input).
            pred = model(x)
            # Add the expected output Tensor.
            loss = mse_loss(pred, y)
            # Run training (backward propagation).
            loss.backward()

            opt.step()

        return model, x
コード例 #9
0
def main():
    """Train a PyTorch GAN analog model to generate fake characters alla MNIST dataset."""
    # Make sure the directory where to save the results exist.
    # Results include examples of the fake images generated.
    os.makedirs(RESULTS, exist_ok=True)
    torch.manual_seed(SEED)

    # Load MNIST dataset as tensors.
    dataloader = DataLoader(
        MNIST(PATH_DATASET, download=True, transform=transforms.ToTensor()),
        batch_size=BATCH_SIZE,
        shuffle=True,
    )

    print(f'\n{datetime.now().time().replace(microsecond=0)} --- '
          f'Started GAN Example')

    gen = Generator(Z_DIM).to(DEVICE)
    gen_opt = AnalogSGD(gen.parameters(), lr=LR)
    gen_opt.regroup_param_groups(gen)

    disc = Discriminator().to(DEVICE)
    disc_opt = AnalogSGD(disc.parameters(), lr=LR)
    disc_opt.regroup_param_groups(disc)

    print(RPU_CONFIG)
    print(gen)
    print(disc)

    criterion = nn.BCEWithLogitsLoss()

    training_loop(gen, disc, gen_opt, disc_opt, criterion, dataloader,
                  N_EPOCHS, DISPLAY_STEP)
    show_animation_fake_images()

    print(f'{datetime.now().time().replace(microsecond=0)} --- '
          f'Completed GAN Example')
コード例 #10
0
from aihwkit.simulator.rpu_base import cuda

# Prepare the datasets (input and expected output).
x = Tensor([[0.1, 0.2, 0.4, 0.3], [0.2, 0.1, 0.1, 0.3]])
y = Tensor([[1.0, 0.5], [0.7, 0.3]])

# Define a single-layer network, using a constant step device type.
rpu_config = SingleRPUConfig(device=ConstantStepDevice())
model = AnalogLinear(4, 2, bias=True, rpu_config=rpu_config)

# Move the model and tensors to cuda if it is available.
if cuda.is_compiled():
    x = x.cuda()
    y = y.cuda()
    model.cuda()

# Define an analog-aware optimizer, preparing it for using the layers.
opt = AnalogSGD(model.parameters(), lr=0.1)
opt.regroup_param_groups(model)

for epoch in range(100):
    # Add the training Tensor to the model (input).
    pred = model(x)
    # Add the expected output Tensor.
    loss = mse_loss(pred, y)
    # Run training (backward propagation).
    loss.backward()

    opt.step()
    print('Loss error: {:.16f}'.format(loss))
コード例 #11
0
ファイル: 15_simple_rnn.py プロジェクト: IBM/aihwkit
y_out = torch.stack(y_out_2d, dim=0).transpose(0, 1).unsqueeze(2)

if WITH_EMBEDDING:
    if WITH_BIDIR:
        model = AnalogBidirRNNNetwork()
    else:
        model = AnalogRNNNetwork()
else:
    if WITH_BIDIR:
        model = AnalogBidirRNNNetwork_noEmbedding()
    else:
        model = AnalogRNNNetwork_noEmbedding()

model = model.to(DEVICE)
optimizer = AnalogSGD(model.parameters(), lr=LEARNING_RATE)
optimizer.regroup_param_groups(model)
criterion = nn.MSELoss()

# train
losses = []
for i in range(EPOCHS):
    optimizer.zero_grad()

    pred, states = model(y_in, None)

    loss = criterion(pred, y_out)
    print('Epoch = %d: Train Perplexity = %f' %
          (i, np.exp(loss.detach().cpu().numpy())))

    loss.backward()
    optimizer.step()