Example #1
0
def transformer(train=True, droput_ratio=0.1):
    x = nn.Variable((batch_size, max_len))
    t = nn.Variable((batch_size, 1))
    mask = get_mask(x)
    with nn.parameter_scope('embedding_layer'):
        # h = time_distributed(PF.embed)(x, vocab_size, embedding_size) * mask
        h = token_embedding(x, vocab_size, embedding_size)
    h = position_encoding(h)

    if train:
        h = F.dropout(h, p=droput_ratio)

    for i in range(hopping_num):
        with nn.parameter_scope(f'encoder_hopping_{i}'):
            h = residual_normalization_wrapper(multihead_self_attention)(
                h,
                head_num,
                mask=mask,
                train=train,
                dropout_ratio=droput_ratio)
            h = residual_normalization_wrapper(positionwise_feed_forward)(
                h, train=train, dropout_ratio=droput_ratio)

    with nn.parameter_scope('output_layer'):
        y = F.sigmoid(PF.affine(h[:, 0, :], 1))

    accuracy = F.mean(F.equal(F.round(y), t))
    loss = F.mean(F.binary_cross_entropy(y, t))

    return x, y, t, accuracy, loss
Example #2
0
def build_self_attention_model(train=True):
    x = nn.Variable((batch_size, max_len))
    t = nn.Variable((batch_size, 1))
    mask = get_mask(x)
    attention_mask = (F.constant(1, shape=mask.shape) - mask) * F.constant(
        np.finfo(np.float32).min, shape=mask.shape)
    with nn.parameter_scope('embedding'):
        h = time_distributed(PF.embed)(x, vocab_size, embedding_size) * mask
    with nn.parameter_scope('forward'):
        h_f = lstm(h,
                   hidden_size,
                   mask=mask,
                   return_sequences=True,
                   return_state=False)
    with nn.parameter_scope('backward'):
        h_b = lstm(h[:, ::-1, ],
                   hidden_size,
                   mask=mask,
                   return_sequences=True,
                   return_state=False)[:, ::-1, ]
    h = F.concatenate(h_f, h_b, axis=2)
    if train:
        h = F.dropout(h, p=dropout_ratio)
    with nn.parameter_scope('da'):
        a = F.tanh(time_distributed(PF.affine)(h, da))
        if train:
            a = F.dropout(a, p=dropout_ratio)
    with nn.parameter_scope('r'):
        a = time_distributed(PF.affine)(a, r)
        if train:
            a = F.dropout(a, p=dropout_ratio)
        a = F.softmax(a + attention_mask, axis=1)
    m = F.batch_matmul(a, h, transpose_a=True)
    with nn.parameter_scope('output_mlp'):
        output = F.relu(PF.affine(m, output_mlp_size))
        if train:
            output = F.dropout(output, p=dropout_ratio)
    with nn.parameter_scope('output'):
        y = F.sigmoid(PF.affine(output, 1))

    accuracy = F.mean(F.equal(F.round(y), t))
    loss = F.mean(F.binary_cross_entropy(
        y, t)) + attention_penalty_coef * frobenius(
            F.batch_matmul(a, a, transpose_a=True) - batch_eye(batch_size, r))
    return x, t, accuracy, loss
def gan_loss(x, target):
    return F.mean(F.binary_cross_entropy(x, target))
    h = PF.embed(x, vocab_size, embedding_size)
h = F.mean(h, axis=1)
h = expand_dims(h, axis=-1)  # (batch_size, embedding_size, 1)
t = nn.Variable([batch_size, 1])
t_neg = nn.Variable([batch_size, k])
with nn.parameter_scope('W_out'):
    _t = PF.embed(t, vocab_size,
                  embedding_size)  # (batch_size, 1, embedding_size)
    _t_neg = PF.embed(t_neg, vocab_size,
                      embedding_size)  # (batch_size, k, embedding_size)

t_score = F.sigmoid(F.reshape(F.batch_matmul(_t, h), shape=(batch_size, 1)))
t_neg_score = F.sigmoid(
    F.reshape(F.batch_matmul(_t_neg, h), shape=(batch_size, k)))

t_loss = F.binary_cross_entropy(t_score, F.constant(1, shape=(batch_size, 1)))
t_neg_loss = F.binary_cross_entropy(t_neg_score,
                                    F.constant(0, shape=(batch_size, k)))

loss = F.mean(F.sum(t_loss, axis=1) + F.sum(t_neg_loss, axis=1))

# Create solver.
solver = S.Adam()
solver.set_parameters(nn.get_parameters())

trainer = Trainer(inputs=[x, t, t_neg], loss=loss, solver=solver)
trainer.run(train_data_iter, valid_data_iter, epochs=max_epoch)

with open('vectors.txt', 'w') as f:
    f.write('{} {}\n'.format(vocab_size - 1, embedding_size))
    with nn.parameter_scope('W_in'):
Example #5
0
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(image.reshape(28, 28), cmap='Greys_r')
    plt.show()


#show_image()

#%%
for i in range(10000):
    ## Fake image
    z = nn.Variable.from_numpy_array(np.random.randn(batch_size, z_dim))
    fake_images = G(z)
    predict = D(fake_images)
    fake_loss = F.mean(F.binary_cross_entropy(predict, zeros))

    D_solver.zero_grad()
    fake_loss.forward()
    fake_loss.backward(clear_buffer=True)
    D_solver.update()

    ## Real image
    true_images = nn.Variable.from_numpy_array(mnist_next_batch())
    predict = D(true_images)
    real_loss = F.mean(F.binary_cross_entropy(predict, ones))

    D_solver.zero_grad()
    real_loss.forward()
    real_loss.backward(clear_buffer=True)
    D_solver.update()
Example #6
0
def global_average_pooling_1d(x, mask):
    count = F.sum(mask, axis=1)
    global_average_pooled = F.sum(h, axis=1) / count
    return global_average_pooled


x = nn.Variable((batch_size, max_len))
t = nn.Variable((batch_size, 1))
mask = get_mask(x)
with nn.parameter_scope('embedding'):
    h = time_distributed(PF.embed)(x, vocab_size, embedding_size) * mask
h = global_average_pooling_1d(h, mask)
with nn.parameter_scope('output'):
    y = F.sigmoid(PF.affine(h, 1))

accuracy = F.mean(F.equal(F.round(y), t))
loss = F.mean(F.binary_cross_entropy(y, t))

# Create solver.
solver = S.Adam()
solver.set_parameters(nn.get_parameters())

trainer = Trainer(inputs=[x, t],
                  loss=loss,
                  metrics={
                      'cross entropy': loss,
                      'accuracy': accuracy
                  },
                  solver=solver)
trainer.run(train_data_iter, dev_data_iter, epochs=5, verbose=1)
Example #7
0
def test_save_load_multi_datasets(tmpdir, datasets_o, datasets_m):
    ctx = get_extension_context('cpu', device_id=0, type_config='float')
    nn.set_default_context(ctx)

    batch_size = 64
    x = nn.Variable([batch_size, 1, 28, 28])
    Affine = PF.affine(x, 1, name='Affine')
    Sigmoid = F.sigmoid(Affine)

    target = nn.Variable([batch_size, 1])
    target.data.fill(1)
    BinaryCrossEntropy = F.binary_cross_entropy(Sigmoid, target)

    solver = S.Adam()
    solver.set_parameters(nn.get_parameters())
    solver.set_learning_rate(5e-4)

    contents = {
        'global_config': {
            'default_context': ctx
        },
        'training_config': {
            'max_epoch': 100,
            'iter_per_epoch': 23,
            'save_best': True,
            'monitor_interval': 10
        },
        'networks': [{
            'name': 'Main',
            'batch_size': batch_size,
            'outputs': {
                'BinaryCrossEntropy': BinaryCrossEntropy
            },
            'names': {
                'x': x
            }
        }, {
            'name': 'MainValidation',
            'batch_size': batch_size,
            'outputs': {
                'BinaryCrossEntropy': BinaryCrossEntropy
            },
            'names': {
                'x': x
            }
        }, {
            'name': 'MainRuntime',
            'batch_size': batch_size,
            'outputs': {
                'Sigmoid': Sigmoid
            },
            'names': {
                'x': x
            }
        }],
        'datasets': [{
            'name': 'dataset1',
            'uri': 'DATASET_TRAINING1',
            'cache_dir': 'here_it_is',
            'shuffle': True,
            'batch_size': batch_size,
            'no_image_normalization': False,
            'variables': {
                'x': x,
                'BinaryCrossEntropy': BinaryCrossEntropy
            }
        }, {
            'name': 'dataset2',
            'uri': 'DATASET_TRAINING2',
            'cache_dir': 'here_it_is',
            'shuffle': True,
            'batch_size': batch_size,
            'no_image_normalization': False,
            'variables': {
                'x': x,
                'BinaryCrossEntropy': BinaryCrossEntropy
            },
        }],
        'optimizers': [{
            'name': 'optimizer',
            'solver': solver,
            'network': 'Main',
            'dataset': datasets_o,
            'weight_decay': 0,
            'lr_decay': 1,
            'lr_decay_interval': 1,
            'update_interval': 1
        }],
        'monitors': [{
            'name': 'train_error',
            'network': 'MainValidation',
            'dataset': datasets_m
        }, {
            'name': 'valid_error',
            'network': 'MainValidation',
            'dataset': datasets_m
        }],
        'executors': [{
            'name': 'Executor',
            'network': 'MainRuntime',
            'data': ['x'],
            'output': ['Sigmoid']
        }]
    }

    tmpdir.ensure(dir=True)
    tmppath = tmpdir.join('testsave.nnp')
    nnp_file = tmppath.strpath
    nnabla.utils.save.save(nnp_file, contents)
    nnabla.utils.load.load(nnp_file)
Example #8
0
        enc = F.relu(PF.affine(x, dim))
    with nn.parameter_scope("dec"):
        dec = F.sigmoid(PF.affine(enc, 784))
    return dec


# In[44]:

# build graph.
batch_size = 256
encord_dim = 32
nn.clear_parameters()

image = nn.Variable(shape=(batch_size, 784))  # for train.
pred = prediction(image, encord_dim)
loss = F.mean(F.binary_cross_entropy(pred, image))

vimage = nn.Variable(shape=(batch_size, 784))  # for test.
vpred = prediction(vimage, encord_dim)

# In[45]:

# setup solver.
solver = S.Adam()
solver.set_parameters(nn.get_parameters())

# In[46]:

# training.
data = data_iterator_mnist(
    batch_size, True
     def loss_func(pred, target): return F.mean(
         F.binary_cross_entropy(pred, target))
 else: