Exemple #1
0
 def test_readcell(self):
     i = nd.normal(shape=(b, m))
     w = nd.normal(shape=(b, n))
     memory = nd.normal(shape=(n, m))
     read = ReadCell([m, 1, 1, 3, 1], n)
     read.initialize()
     read.hybridize()
     r, wr = read(i, memory, w)
     self.assertEqual(r.shape, (b, m), msg='')
     self.assertEqual(wr.shape, (b, n), msg='')
Exemple #2
0
 def test_writecell(self):
     i = nd.normal(shape=(b, m))
     w = nd.normal(shape=(b, n))
     memory = nd.normal(shape=(n, m))
     write = WriteCell([m, 1, 1, 3, 1, m, m], n)
     write.initialize()
     write.hybridize()
     memory, ww = write(i, memory, w)
     self.assertEqual(memory.shape, (n, m), msg='')
     self.assertEqual(ww.shape, (b, n), msg='')
Exemple #3
0
    def forward(self, original_idx, paraphrase_idx):
        '''
        forward pass of the whole model, original/paraphrase_idx are both of layout
        NT, to be added "C" by embedding layer
        '''
        # ENCODER part
        mean, logv, last_state = self.encoder(original_idx, paraphrase_idx)
        # sample from Gaussian distribution N(0, 1), of the shape (batch_size, hidden_size)
        z = nd.normal(loc=0,
                      scale=1,
                      shape=(original_idx.shape[0], self.latent_size),
                      ctx=model_ctx)
        latent_input = mean + z * nd.exp(
            0.5 * logv)  # exp() is to make the std dev positive

        # DECODER part
        # the KL Div should be calculated between the sample from N(0, 1), and the distribution after
        # Parameterization Trick, negation since we want it to be small
        kl_loss = -self.kl_div(mean, logv)
        # first paraphrase_input should be the <bos> token
        last_idx = paraphrase_idx[:, 0:1]
        ce_loss = 0
        # decode the sample
        for pos in range(paraphrase_idx.shape[-1] - 1):
            vocab_output, last_state = self.decoder(last_state, last_idx,
                                                    latent_input)
            # only compare the label we predict, note the first is bos and will be ignored
            ce_loss = ce_loss + self.ce_loss(
                vocab_output, paraphrase_idx[:, pos + 1:pos + 2])
            last_idx = vocab_output.argmax(axis=-1, keepdims=True)
        return kl_loss, ce_loss
Exemple #4
0
 def choose_action_train(self, state):
     state = nd.array([state], ctx=self.ctx)
     action = self.main_actor_network(state)
     # no noise clip
     noise = nd.normal(loc=0, scale=self.explore_noise, shape=action.shape, ctx=self.ctx)
     action += noise
     clipped_action = self.action_clip(action).squeeze()
     return clipped_action
Exemple #5
0
def load_data_polynomial(true_w, true_b, num_train=5000, num_test=1000):
    """
    """
    features = nd.normal(shape=(num_train + num_test, 1))
    poly_features = [nd.power(features, i) for i in range(1, len(true_w) + 1)]
    poly_features = nd.concat(*poly_features)
    labels = nd.dot(poly_features, true_w) + true_b
    labels += nd.random.normal(scale=0.1)
    return features, poly_features, labels
Exemple #6
0
def normal():
    """
    它的每个元素都随机采样于均值为0、标准差为1的正态分布。nd.sqrt(nd.power(a, 2).sum())
    :return:
    """
    n = nd.normal(0, 1, shape=(2, 2))
    logger.info(n)

    a = nd.array([1, 2, 3, 4])
    print(a.norm())
    print(nd.sqrt(nd.power(a, 2).sum()))
Exemple #7
0
    def __init__(self, **kwargs):
        super(ncf, self).__init__(**kwargs)
        with self.name_scope():
            self.emb_u = nn.Embedding(
                nb_users, EMBEDDING_DIM_USER)  #,weight_initializer=)
            self.emb_u.weight.set_data(
                nd.normal(shape=(nb_users, EMBEDDING_DIM_USER)))
            self.emb_u.grad_req = 'null'

            self.emb_u2 = nn.Embedding(
                nb_users, EMBEDDING_DIM_USER)  #,weight_initializer=)

            self.emb_p = nn.Embedding(
                nb_photos, EMBEDDING_DIM_PHOTO)  #,weight_initializer=)
            self.emb_p.weight.set_data(
                nd.normal(shape=(nb_photos, EMBEDDING_DIM_USER)))
            self.emb_p.grad_req = 'null'

            self.bn = nn.BatchNorm()
            self.flatten = nn.Flatten()
            self.dropout = nn.Dropout(rate=0.25)
            self.dense1 = nn.Dense(units=128, activation='relu')
            self.dense2 = nn.Dense(units=1, activation='sigmoid')
Exemple #8
0
    def update(self):
        self.total_train_steps += 1
        state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory_buffer.sample(self.batch_size)

        # --------------optimize the critic network--------------------
        with autograd.record():
            # choose next action according to target policy network
            next_action_batch = self.target_actor_network(next_state_batch)
            noise = nd.normal(loc=0, scale=self.policy_noise, shape=next_action_batch.shape, ctx=self.ctx)
            # with noise clip
            noise = nd.clip(noise, a_min=-self.noise_clip, a_max=self.noise_clip)
            next_action_batch = next_action_batch + noise
            clipped_action = self.action_clip(next_action_batch)

            # get target q value
            target_q_value1 = self.target_critic_network1(next_state_batch, clipped_action)
            target_q_value2 = self.target_critic_network2(next_state_batch, clipped_action)
            target_q_value = nd.minimum(target_q_value1, target_q_value2).squeeze()
            target_q_value = reward_batch + (1.0 - done_batch) * (self.gamma * target_q_value)

            # get current q value
            current_q_value1 = self.main_critic_network1(state_batch, action_batch)
            current_q_value2 = self.main_critic_network2(state_batch, action_batch)
            loss = gloss.L2Loss()

            value_loss1 = loss(current_q_value1, target_q_value.detach())
            value_loss2 = loss(current_q_value2, target_q_value.detach())

        self.main_critic_network1.collect_params().zero_grad()
        value_loss1.backward()
        self.critic1_optimizer.step(self.batch_size)

        self.main_critic_network2.collect_params().zero_grad()
        value_loss2.backward()
        self.critic2_optimizer.step(self.batch_size)

        # ---------------optimize the actor network-------------------------
        if self.total_train_steps % self.policy_update == 0:
            with autograd.record():
                pred_action_batch = self.main_actor_network(state_batch)
                actor_loss = -nd.mean(self.main_critic_network1(state_batch, pred_action_batch))

            self.main_actor_network.collect_params().zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step(1)

            self.soft_update(self.target_actor_network, self.main_actor_network)
            self.soft_update(self.target_critic_network1, self.main_critic_network1)
            self.soft_update(self.target_critic_network2, self.main_critic_network2)
Exemple #9
0
    def test_ntmcell(self):
        prev_wr = [nd.normal(shape=(b, n)), nd.normal(shape=(b, n))]
        prev_ww = nd.normal(shape=(b, n))
        prev_content = [nd.normal(shape=(b, m)), nd.normal(shape=(b, m))]
        prev_state = nd.normal(shape=(b, m))
        x = nd.normal(shape=(b, m))
        memory = nd.normal(shape=(n, m))

        ntmcell = NTMCell()
        ntmcell.initialize()
        o, memory, (wr, ww, content, state) = ntmcell(
            x, memory, (prev_wr, prev_ww, prev_content, prev_state))

        self.assertEqual(o.shape, (b, 125))
        self.assertEqual(memory.shape, (n, m))
        for ewr in wr:
            self.assertEqual(ewr.shape, (b, n))
        self.assertEqual(ww.shape, (b, n))
        for ec in content:
            self.assertEqual(ec.shape, (b, m))
        self.assertEqual(state.shape, (b, 125))
Exemple #10
0
def get_random_data(size, ctx):
    x = nd.normal(0, 1, shape=(size, 10), ctx=ctx)
    y = x.sum(axis=1) > 3
    return x, y
Exemple #11
0
    # paraphrase_idx = nd.array(paraphrase_idx, ctx=model_ctx).expand_dims(axis=0)
    pred = model.predict(original_idx,
                         sample,
                         bos=vocab['<bos>'],
                         eos=vocab['<eos>'])
    return ' '.join(vocab.to_tokens(pred))


if __name__ == '__main__':
    if args.gen:
        with open('data/' + args.dataset + '/vocab.json', 'r') as f:
            vocab = nlp.Vocab.from_json(json.load(f))

        model = VAE_LSTM(emb_size=300, vocab_size=len(vocab))
        model.load_parameters(args.param, ctx=model_ctx)
        sample = nd.normal(loc=0, scale=1, shape=(1, 64), ctx=model_ctx)
        print('\033[33mOriginal: \033[34m%s\033[0m' % args.org_sts)
        print('\033[31mResult: \033[35m%s\033[0m' % generate(model, args.org_sts, \
                                                    sample, vocab, ctx=model_ctx))
    else:
        # load train, valid dataset
        train_dataset_str, valid_dataset_str = get_dataset_str(folder=args.dataset, \
                                                               length=args.nsample)
        # start from existing parameters
        if args.param:
            with open('data/' + args.dataset + '/vocab.json', 'r') as f:
                vocab = nlp.Vocab.from_json(json.load(f))
            # use this loaded vocab
            train_ld, valid_ld = get_dataloader(train_dataset_str, valid_dataset_str, \
                                                clip_length=args.seq_len, vocab=vocab, \
                                                batch_size=args.batch_size)
Exemple #12
0
import d2lzh as d2l
from mxnet import autograd, nd, init,gluon
from mxnet.gluon import loss as gloss, data as gdata, nn

# 生成数据集或读取数据集
n_train, n_test, n_feature = 20, 100, 200
true_w, true_b = nd.ones((n_feature, 1))*0.01, 0.5

# 生成y
features = nd.random.normal(scale=1, shape=(n_train+n_test, n_feature))
labels = nd.dot(features, true_w) + true_b
labels += nd.normal(scale=0.01, shape=labels.shape)

# 生成batch data
batch_size = 1
train_features, test_features = features[:n_train, :], features[n_train:, :]
train_labels, test_labels = labels[:n_train], labels[n_train:]
train_iter = gdata.DataLoader(gdata.ArrayDataset(train_features, train_labels), batch_size, shuffle=True)

# 初始化模型参数
def init_params():
    w = nd.random.normal(scale=1, shape=(n_feature, 1))
    b = nd.zeros(shape=(1,))
    w.attach_grad()
    b.attach_grad()
    return [w, b]

# 定义L2惩罚
def l2_penalty(w):
    return (w**2).sum()/2                      # 只惩罚模型权重参数
from mxnet import nd

x = nd.arange(12)
#print(x)
x = x.reshape(3, 4)
#print(x)
y = nd.zeros((2, 3, 4))
#print(y)
z = nd.ones((2, 8, 9))
#print(z)
z *= 3
#print(z)
k = nd.normal(0, 1, shape=(2, 3, 4))
#print(k)
k = k + y
#print(k)
#k = k/y
#print(k.exp())
a = nd.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
#b = nd.array([1, 2, 3, 4], [ 5, 6, 7, 8], [9, 10, 11, 12])
#a = nd.dot(b, a.T)
b = nd.arange(12)
b = b.reshape((4, 3))
#print(b.shape)
#print(a.shape)
#print(b)
e = b.T
#print(b)
#print(b.shape)
#矩阵的转置乘法
c = nd.dot(a, e)
Exemple #14
0
    return softmax(nd.dot(X.reshape(-1,feature_nums),w)+b)

def cross_entropy(y_hat,y):
    return -nd.pick(y_hat,y).log()

def sgd(params,lr,batch_size):
    for param in params:
        param[:]=param-lr*param.grad/batch_size

def eval_accuracy(y_hat,y):
    return (y_hat.argmax(axis=1)==y.astype('float32')).sum().asscalar()

epoch=5

lr=0.1
train_weights=nd.normal(scale=0.01,shape=(feature_nums,output_nums))
train_bias=nd.normal(scale=0.01,shape=(output_nums,))
train_weights.attach_grad()
train_bias.attach_grad()
for i in range(1,epoch+1):
    loss_total=0
    accuracy_total=0
    samples=0
    for X,y in train_iter:
        with autograd.record():
            loss_val = cross_entropy(net(X,train_weights,train_bias),y).sum()
        loss_val.backward()
        sgd([train_weights,train_bias],lr,batch_size)
        loss_total+=loss_val.asscalar()
        accuracy_total+=eval_accuracy(net(X,train_weights,train_bias),y)
        samples+=batch_size
Exemple #15
0
from mxnet import nd

x = nd.ones(shape=(3, 4))
y = nd.ones(shape=(3, 4))
nor = nd.normal(0, 1, shape=(3, 4))

print(x + y)
print(x * y)
print(nd.dot(x, y.T))

x = nd.arange(3).reshape((1, 3))
y = nd.arange(2).reshape((2, 1))
print(x.shape)
print(y.shape)
print(x + y)

print(x)
print(x.asnumpy())
Exemple #16
0
    g_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT})
d_trainer = gluon.Trainer(
    d_net.collect_params(), 'Adam', {'learning_rate': LEARNING_RATE, 'beta1': BETA, 'clip_gradient': CLIP_GRADIENT})
g_net.collect_params().zero_grad()
d_net.collect_params().zero_grad()
# define evaluation metric
metric = mx.metric.CustomMetric(facc)
# initialize labels
real_label = nd.ones(BATCH_SIZE, CTX)
fake_label = nd.zeros(BATCH_SIZE, CTX)

for epoch in range(NUM_EPOCHS):
    for i, (d, _) in enumerate(train_data):
        # update D
        data = d.as_in_context(CTX)
        noise = nd.normal(loc=0, scale=1, shape=(
            BATCH_SIZE, Z_DIM, 1, 1), ctx=CTX)
        with autograd.record():
            # train with real image
            output = d_net(data).reshape((-1, 1))
            errD_real = loss(output, real_label)
            metric.update([real_label, ], [output, ])

            # train with fake image
            fake_image = g_net(noise)
            output = d_net(fake_image.detach()).reshape((-1, 1))
            errD_fake = loss(output, fake_label)
            errD = errD_real + errD_fake
            errD.backward()
            metric.update([fake_label, ], [output, ])

        d_trainer.step(BATCH_SIZE)
Exemple #17
0
    import numpy as np
    import mxnet as mx

    logging.basicConfig()
    logging.getLogger().setLevel(logging.DEBUG)

    batch_size = 128

    # define how many of each feature type to generate
    user_n_cont_features = 5
    user_n_cat_features = 1
    item_n_cat_features = 1
    item_n_cont_features = 100

    # generate data
    user_cont_features = nd.normal(shape=(batch_size, user_n_cont_features))
    user_embed_features = nd.array(
        np.random.randint(low=0,
                          high=10000,
                          size=(batch_size, user_n_cat_features)))

    item_cont_features = nd.normal(shape=(batch_size, item_n_cont_features))
    item_embed_features = nd.array(
        np.random.randint(low=0,
                          high=10000,
                          size=(batch_size, item_n_cat_features)))

    # compute total number of user/item categorical features
    user_cats = np.unique(user_embed_features.asnumpy()).shape[0]
    item_cats = np.unique(item_embed_features.asnumpy()).shape[0]
    logging.debug("{} total user categories & {} total item categories".format(
Exemple #18
0

# define dropout function
def dropout(X, drop_prob):
    assert 0 <= drop_prob <= 1
    keep_prob = 1 - drop_prob
    if keep_prob == 0:
        return X.zeros_like()
    mask = nd.random.uniform(0, 1, X.shape) < keep_prob
    return mask * X


# define model parameters
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = nd.normal(scale=0.01, shape=(num_inputs, num_hiddens1))
b1 = nd.zeros(shape=[num_hiddens1])

W2 = nd.normal(scale=0.01, shape=(num_hiddens1, num_hiddens2))
b2 = nd.zeros(shape=(num_hiddens2))

W3 = nd.normal(scale=0.01, shape=(num_hiddens2, num_outputs))
b3 = nd.zeros(num_outputs)

params = [W1, b1, W2, b2, W3, b3]
for param in params:
    param.attach_grad()

# define dropout probability
drop_prob1, drop_prob2 = 0.2, 0.5