def transformer(train=True, droput_ratio=0.1): x = nn.Variable((batch_size, max_len)) t = nn.Variable((batch_size, 1)) mask = get_mask(x) with nn.parameter_scope('embedding_layer'): # h = time_distributed(PF.embed)(x, vocab_size, embedding_size) * mask h = token_embedding(x, vocab_size, embedding_size) h = position_encoding(h) if train: h = F.dropout(h, p=droput_ratio) for i in range(hopping_num): with nn.parameter_scope(f'encoder_hopping_{i}'): h = residual_normalization_wrapper(multihead_self_attention)( h, head_num, mask=mask, train=train, dropout_ratio=droput_ratio) h = residual_normalization_wrapper(positionwise_feed_forward)( h, train=train, dropout_ratio=droput_ratio) with nn.parameter_scope('output_layer'): y = F.sigmoid(PF.affine(h[:, 0, :], 1)) accuracy = F.mean(F.equal(F.round(y), t)) loss = F.mean(F.binary_cross_entropy(y, t)) return x, y, t, accuracy, loss
def build_self_attention_model(train=True): x = nn.Variable((batch_size, max_len)) t = nn.Variable((batch_size, 1)) mask = get_mask(x) attention_mask = (F.constant(1, shape=mask.shape) - mask) * F.constant( np.finfo(np.float32).min, shape=mask.shape) with nn.parameter_scope('embedding'): h = time_distributed(PF.embed)(x, vocab_size, embedding_size) * mask with nn.parameter_scope('forward'): h_f = lstm(h, hidden_size, mask=mask, return_sequences=True, return_state=False) with nn.parameter_scope('backward'): h_b = lstm(h[:, ::-1, ], hidden_size, mask=mask, return_sequences=True, return_state=False)[:, ::-1, ] h = F.concatenate(h_f, h_b, axis=2) if train: h = F.dropout(h, p=dropout_ratio) with nn.parameter_scope('da'): a = F.tanh(time_distributed(PF.affine)(h, da)) if train: a = F.dropout(a, p=dropout_ratio) with nn.parameter_scope('r'): a = time_distributed(PF.affine)(a, r) if train: a = F.dropout(a, p=dropout_ratio) a = F.softmax(a + attention_mask, axis=1) m = F.batch_matmul(a, h, transpose_a=True) with nn.parameter_scope('output_mlp'): output = F.relu(PF.affine(m, output_mlp_size)) if train: output = F.dropout(output, p=dropout_ratio) with nn.parameter_scope('output'): y = F.sigmoid(PF.affine(output, 1)) accuracy = F.mean(F.equal(F.round(y), t)) loss = F.mean(F.binary_cross_entropy( y, t)) + attention_penalty_coef * frobenius( F.batch_matmul(a, a, transpose_a=True) - batch_eye(batch_size, r)) return x, t, accuracy, loss
def gan_loss(x, target): return F.mean(F.binary_cross_entropy(x, target))
h = PF.embed(x, vocab_size, embedding_size) h = F.mean(h, axis=1) h = expand_dims(h, axis=-1) # (batch_size, embedding_size, 1) t = nn.Variable([batch_size, 1]) t_neg = nn.Variable([batch_size, k]) with nn.parameter_scope('W_out'): _t = PF.embed(t, vocab_size, embedding_size) # (batch_size, 1, embedding_size) _t_neg = PF.embed(t_neg, vocab_size, embedding_size) # (batch_size, k, embedding_size) t_score = F.sigmoid(F.reshape(F.batch_matmul(_t, h), shape=(batch_size, 1))) t_neg_score = F.sigmoid( F.reshape(F.batch_matmul(_t_neg, h), shape=(batch_size, k))) t_loss = F.binary_cross_entropy(t_score, F.constant(1, shape=(batch_size, 1))) t_neg_loss = F.binary_cross_entropy(t_neg_score, F.constant(0, shape=(batch_size, k))) loss = F.mean(F.sum(t_loss, axis=1) + F.sum(t_neg_loss, axis=1)) # Create solver. solver = S.Adam() solver.set_parameters(nn.get_parameters()) trainer = Trainer(inputs=[x, t, t_neg], loss=loss, solver=solver) trainer.run(train_data_iter, valid_data_iter, epochs=max_epoch) with open('vectors.txt', 'w') as f: f.write('{} {}\n'.format(vocab_size - 1, embedding_size)) with nn.parameter_scope('W_in'):
ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(image.reshape(28, 28), cmap='Greys_r') plt.show() #show_image() #%% for i in range(10000): ## Fake image z = nn.Variable.from_numpy_array(np.random.randn(batch_size, z_dim)) fake_images = G(z) predict = D(fake_images) fake_loss = F.mean(F.binary_cross_entropy(predict, zeros)) D_solver.zero_grad() fake_loss.forward() fake_loss.backward(clear_buffer=True) D_solver.update() ## Real image true_images = nn.Variable.from_numpy_array(mnist_next_batch()) predict = D(true_images) real_loss = F.mean(F.binary_cross_entropy(predict, ones)) D_solver.zero_grad() real_loss.forward() real_loss.backward(clear_buffer=True) D_solver.update()
def global_average_pooling_1d(x, mask): count = F.sum(mask, axis=1) global_average_pooled = F.sum(h, axis=1) / count return global_average_pooled x = nn.Variable((batch_size, max_len)) t = nn.Variable((batch_size, 1)) mask = get_mask(x) with nn.parameter_scope('embedding'): h = time_distributed(PF.embed)(x, vocab_size, embedding_size) * mask h = global_average_pooling_1d(h, mask) with nn.parameter_scope('output'): y = F.sigmoid(PF.affine(h, 1)) accuracy = F.mean(F.equal(F.round(y), t)) loss = F.mean(F.binary_cross_entropy(y, t)) # Create solver. solver = S.Adam() solver.set_parameters(nn.get_parameters()) trainer = Trainer(inputs=[x, t], loss=loss, metrics={ 'cross entropy': loss, 'accuracy': accuracy }, solver=solver) trainer.run(train_data_iter, dev_data_iter, epochs=5, verbose=1)
def test_save_load_multi_datasets(tmpdir, datasets_o, datasets_m): ctx = get_extension_context('cpu', device_id=0, type_config='float') nn.set_default_context(ctx) batch_size = 64 x = nn.Variable([batch_size, 1, 28, 28]) Affine = PF.affine(x, 1, name='Affine') Sigmoid = F.sigmoid(Affine) target = nn.Variable([batch_size, 1]) target.data.fill(1) BinaryCrossEntropy = F.binary_cross_entropy(Sigmoid, target) solver = S.Adam() solver.set_parameters(nn.get_parameters()) solver.set_learning_rate(5e-4) contents = { 'global_config': { 'default_context': ctx }, 'training_config': { 'max_epoch': 100, 'iter_per_epoch': 23, 'save_best': True, 'monitor_interval': 10 }, 'networks': [{ 'name': 'Main', 'batch_size': batch_size, 'outputs': { 'BinaryCrossEntropy': BinaryCrossEntropy }, 'names': { 'x': x } }, { 'name': 'MainValidation', 'batch_size': batch_size, 'outputs': { 'BinaryCrossEntropy': BinaryCrossEntropy }, 'names': { 'x': x } }, { 'name': 'MainRuntime', 'batch_size': batch_size, 'outputs': { 'Sigmoid': Sigmoid }, 'names': { 'x': x } }], 'datasets': [{ 'name': 'dataset1', 'uri': 'DATASET_TRAINING1', 'cache_dir': 'here_it_is', 'shuffle': True, 'batch_size': batch_size, 'no_image_normalization': False, 'variables': { 'x': x, 'BinaryCrossEntropy': BinaryCrossEntropy } }, { 'name': 'dataset2', 'uri': 'DATASET_TRAINING2', 'cache_dir': 'here_it_is', 'shuffle': True, 'batch_size': batch_size, 'no_image_normalization': False, 'variables': { 'x': x, 'BinaryCrossEntropy': BinaryCrossEntropy }, }], 'optimizers': [{ 'name': 'optimizer', 'solver': solver, 'network': 'Main', 'dataset': datasets_o, 'weight_decay': 0, 'lr_decay': 1, 'lr_decay_interval': 1, 'update_interval': 1 }], 'monitors': [{ 'name': 'train_error', 'network': 'MainValidation', 'dataset': datasets_m }, { 'name': 'valid_error', 'network': 'MainValidation', 'dataset': datasets_m }], 'executors': [{ 'name': 'Executor', 'network': 'MainRuntime', 'data': ['x'], 'output': ['Sigmoid'] }] } tmpdir.ensure(dir=True) tmppath = tmpdir.join('testsave.nnp') nnp_file = tmppath.strpath nnabla.utils.save.save(nnp_file, contents) nnabla.utils.load.load(nnp_file)
enc = F.relu(PF.affine(x, dim)) with nn.parameter_scope("dec"): dec = F.sigmoid(PF.affine(enc, 784)) return dec # In[44]: # build graph. batch_size = 256 encord_dim = 32 nn.clear_parameters() image = nn.Variable(shape=(batch_size, 784)) # for train. pred = prediction(image, encord_dim) loss = F.mean(F.binary_cross_entropy(pred, image)) vimage = nn.Variable(shape=(batch_size, 784)) # for test. vpred = prediction(vimage, encord_dim) # In[45]: # setup solver. solver = S.Adam() solver.set_parameters(nn.get_parameters()) # In[46]: # training. data = data_iterator_mnist( batch_size, True
def loss_func(pred, target): return F.mean( F.binary_cross_entropy(pred, target)) else: