def data_prepare(self): # 语言数据 self.__ld = LoadData(zip_file_name="jaychou_lyrics.zip", txt_file_name="jaychou_lyrics.txt") self.__ld.set_data() self.__char_to_idx, self.__idx_to_char = self.__ld.set_get_dict() self.__vocab_size = len(self.__char_to_idx) self.__corpus_indices = self.__ld.set_get_index() # 神经网络参数 input_dim = self.__vocab_size hidden_dim = self.__hidden_dim output_dim = self.__vocab_size std = .001 # 隐含层 self.__W_xh = nd.random_normal(scale=std, shape=(input_dim, hidden_dim), ctx=self.__ctx) self.__W_hh = nd.random_normal(scale=std, shape=(hidden_dim, hidden_dim), ctx=self.__ctx) self.__b_h = nd.zeros(hidden_dim, ctx=self.__ctx) # 输出层 self.__W_hy = nd.random_normal(scale=std, shape=(hidden_dim, output_dim), ctx=self.__ctx) self.__b_y = nd.zeros(output_dim, ctx=self.__ctx) self.__params = [ self.__W_xh, self.__W_hh, self.__b_h, self.__W_hy, self.__b_y ]
def __init__(self, num_sample, num_local, rank, local_rank, name, embedding_size, prefix, gpu=True): self.num_sample = num_sample self.num_local = num_local self.rank = rank self.name = name self.embedding_size = embedding_size self.gpu = gpu self.prefix = prefix if gpu: self.weight = nd.random_normal(loc=0, scale=0.01, shape=(self.num_local, self.embedding_size), ctx=mx.gpu(local_rank)) self.weight_mom = nd.zeros_like(self.weight) else: self.weight = nd.random_normal(loc=0, scale=0.01, shape=(self.num_local, self.embedding_size)) self.weight_mom = nd.zeros_like(self.weight) self.weight_index_sampler = WeightIndexSampler(num_sample, num_local, rank, name) pass
def init_w_b(num_inputs, num_outputs, kv_url): # init params w = nd.random_normal(shape=(num_inputs, num_outputs)) b = nd.random_normal(shape=num_outputs) # push params to kvstore push([w, b], kv_url, False)
def train_section_class_classififer(section): data_loader, test_loader, data_size, num_outputs = train_data.get_data_loader(section) W = nd.random_normal(shape=(FEATURE_COUNT, num_outputs), ctx=model_context) b = nd.random_normal(shape=num_outputs, ctx=model_context) params=[W, b] for param in params: param.attach_grad() num_batches = data_size / train_data.BATCH_SIZE loss_sequence = [] for epoch in range(EPOCHS): cumulative_loss = 0 for index, (data, label) in enumerate(data_loader): data = data.as_in_context(model_context) label = label.as_in_context(model_context).reshape((-1, 1)) with autograd.record(): output = net(data, W, b) loss = squared_loss(output, label) loss.backward() SGD(params, LEARNING_RATE) cumulative_loss += loss.asscalar() print("Cumulative loss: %s"%(cumulative_loss / num_batches)) loss_sequence.append(cumulative_loss) plot(loss_sequence)
def gen_dataset(): MAX_DOC_LENGTH = 100 X_train = nd.random_normal(shape=(1000, MAX_DOC_LENGTH)) y_train = nd.random.rand X_test = nd.random_normal(shape=(100, MAX_DOC_LENGTH)) y_test = 0 return (X_train, y_train), (X_test, y_test)
def __init__(self): self.w0 = nd.random_normal(shape=(1, 1),scale=0.01,dtype='float64') self.w = nd.random_normal(shape=(features, 1),scale=0.01,dtype='float64') self.bw = nd.random_normal(shape=(int((features*(features-1))/2),1),scale=0.0001,dtype='float64') self.params = [self.w, self.w0,self.bw] for param in self.params: param.attach_grad()
def get_parameters(): # parameters for INPUT gate W_xi = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hi = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_i = nd.zeros(shape=config.hidden_dim) # parameters for FORGET gate W_xf = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hf = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_f = nd.zeros(shape=config.hidden_dim) # parameters for OUTPUT gate W_xo = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_ho = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_o = nd.zeros(shape=config.hidden_dim) # parameters for memory cell W_xc = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hc = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_c = nd.zeros(shape=config.hidden_dim) # output layer W_hy = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.output_dim)) b_y = nd.zeros(shape=config.output_dim) parameters = [W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hy, b_y] for parameter in parameters: parameter.attach_grad() return parameters
def test_linear_regresion_gluon(self): num_inputs = 2 num_outputs = 1 num_examples = 10000 def real_fn(X): return 2 * X[:, 0] - 3.4 * X[:, 1] + 4.2 X = nd.random_normal(shape=(num_examples, num_inputs)) noise = 0.01 * nd.random_normal(shape=(num_examples, )) y = real_fn(X) + noise batch_size = 4 train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y), batch_size=batch_size, shuffle=True) net = gluon.nn.Dense(1) net.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=model_ctx) square_loss = gluon.loss.L2Loss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001}) epochs = 1 loss_sequence = [] num_batches = num_examples / batch_size for e in range(epochs): cumulative_loss = 0 # inner loop for i, (data, label) in enumerate(train_data): data = data.as_in_context(model_ctx) label = label.as_in_context(model_ctx) with autograd.record(): output = net(data) loss = square_loss(output, label) loss.backward() trainer.step(batch_size) cumulative_loss += nd.mean(loss).asscalar() print("Epoch %s, loss: %s" % (e, cumulative_loss / num_examples)) loss_sequence.append(cumulative_loss) params = net.collect_params() # this returns a ParameterDict print('The type of "params" is a ', type(params)) # A ParameterDict is a dictionary of Parameter class objects # therefore, here is how we can read off the parameters from it. for param in params.values(): print(param.name, param.data())
def get_params(): # 输入门参数 W_xi = nd.random_normal(scale=std, shape=(input_dim, hidden_dim), ctx=ctx) W_hi = nd.random_normal(scale=std, shape=(hidden_dim, hidden_dim), ctx=ctx) b_i = nd.zeros(hidden_dim, ctx=ctx) # 遗忘门参数 W_xf = nd.random_normal(scale=std, shape=(input_dim, hidden_dim), ctx=ctx) W_hf = nd.random_normal(scale=std, shape=(hidden_dim, hidden_dim), ctx=ctx) b_f = nd.zeros(hidden_dim, ctx=ctx) # 输出门参数 W_xo = nd.random_normal(scale=std, shape=(input_dim, hidden_dim), ctx=ctx) W_ho = nd.random_normal(scale=std, shape=(hidden_dim, hidden_dim), ctx=ctx) b_o = nd.zeros(hidden_dim, ctx=ctx) # 候选细胞参数 W_xc = nd.random_normal(scale=std, shape=(input_dim, hidden_dim), ctx=ctx) W_hc = nd.random_normal(scale=std, shape=(hidden_dim, hidden_dim), ctx=ctx) b_c = nd.zeros(hidden_dim, ctx=ctx) # 输出层 W_hy = nd.random_normal(scale=std, shape=(hidden_dim, output_dim), ctx=ctx) b_y = nd.zeros(output_dim, ctx=ctx) params = [ W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hy, b_y ] for param in params: param.attach_grad() return params
def test_linear_regresion(self): num_inputs = 2 num_outputs = 1 num_examples = 10000 def real_fn(X): return 2 * X[:, 0] - 3.4 * X[:, 1] + 4.2 X = nd.random_normal(shape=(num_examples, num_inputs), ctx=data_ctx) noise = .1 * nd.random_normal(shape=(num_examples, ), ctx=data_ctx) y = real_fn(X) + noise batch_size = 4 train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y), batch_size=batch_size, shuffle=True) w = nd.random_normal(shape=(num_inputs, num_outputs), ctx=model_ctx) b = nd.random_normal(shape=num_outputs, ctx=model_ctx) params = [w, b] for param in params: param.attach_grad() def net(X): return mx.nd.dot(X, w) + b def square_loss(yhat, y): return nd.mean((yhat - y)**2) def SGD(params, lr): for param in params: param[:] = param - lr * param.grad epochs = 10 learning_rate = .0001 num_batches = num_examples / batch_size for e in range(epochs): cumulative_loss = 0 # inner loop for i, (data, label) in enumerate(train_data): data = data.as_in_context(model_ctx) label = label.as_in_context(model_ctx).reshape((-1, 1)) with autograd.record(): output = net(data) loss = square_loss(output, label) loss.backward() SGD(params, learning_rate) cumulative_loss += loss.asscalar() print(cumulative_loss / num_batches) print(w) print(b)
def _get_critic_output(self, net, s, a): w1_s = nd.random_normal(shape=(self.s_dim, 30)) w1_a = nd.random_normal(shape=(self.a_dim, 30)) b1 = nd.zeros((1, 30)) params = [w1_s, w1_a, b1] for param in params: param.attach_grad() critic_input = nd.relu(nd.dot(s, w1_s) + nd.dot(a, w1_a) + b1) output = net(critic_input) return output
def get_params(): W_xh = nd.random_normal(scale=std, shape=(input_dim, hidden_dim), ctx=ctx) W_hh = nd.random_normal(scale=std, shape=(hidden_dim, hidden_dim), ctx=ctx) b_h = nd.zeros(hidden_dim, ctx=ctx) W_hy = nd.random_normal(scale=std, shape=(hidden_dim, output_dim), ctx=ctx) b_y = nd.zeros(output_dim, ctx=ctx) params = [W_xh, W_hh, b_h, W_hy, b_y] for param in params: param.attach_grad() return params
def review_network(net, use_symbol=False, timing=True, num_rep=1, dir_out='', print_model_size=False): """inspect the network architecture & input - output use_symbol: set True to inspect the network in details timing: set True to estimate inference time of the network num_rep: number of inference""" # from my_func import get_model_size shape = (6, 4, 16, 160, 160) if use_symbol: x1 = symbol.Variable('x1') x2 = symbol.Variable('x2') y = net(x1, x2) if print_model_size: get_model_size(y, to_print=False) viz.plot_network(y, shape={ 'x1': shape, 'x2': shape }, node_attrs={ "fixedsize": "false" }).view('%sDenseMultipathNet' % dir_out) else: x1 = nd.random_normal(0.1, 0.02, shape=shape, ctx=ctx) x2 = nd.random_normal(0.1, 0.02, shape=shape, ctx=ctx) net.collect_params().initialize(initializer.Xavier(magnitude=2), ctx=ctx) net.hybridize(static_alloc=True, static_shape=True) if timing: s1 = time.time() y = net(x1, x2) y.wait_to_read() print("First run: %.5f" % (time.time() - s1)) import numpy as np times = np.zeros(num_rep) for t in range(num_rep): x = nd.random_normal(0.1, 0.02, shape=shape, ctx=ctx) s2 = time.time() y = net(x1, x2) y.wait_to_read() times[t] = time.time() - s2 print("Run with hybrid network: %.5f" % times.mean()) else: y = net(x) print("Input size: ", x.shape) print("Output size: ", y.shape)
def get_params(): # 隐含层 W_xh = nd.random_normal(scale=std, shape=(vocab_size, hidden_size)) W_hh = nd.random_normal(scale=std, shape=(hidden_size, hidden_size)) b_h = nd.zeros(hidden_size) # 输出层 W_hy = nd.random_normal(scale=std, shape=(hidden_size, vocab_size)) b_y = nd.zeros(vocab_size) params = [W_xh, W_hh, b_h, W_hy, b_y] for param in params: param.attach_grad() return params
def __init__(self, state_size, action_size): self.state_size = state_size self.action_size = action_size self.epsilon = 0.8 # Set the scale for weight initialization and choose the number of hidden units in the fully-connected layer self.weight_scale = 0.01 self.num_fc = 128 self.num_outputs = action_size # Define the weights for the network self.W1 = nd.random_normal(shape=(20, 3, 3, 3), scale=self.weight_scale, ctx=ctx) self.b1 = nd.random_normal(shape=20, scale=self.weight_scale, ctx=ctx) self.W2 = nd.random_normal(shape=(50, 20, 5, 5), scale=self.weight_scale, ctx=ctx) self.b2 = nd.random_normal(shape=50, scale=self.weight_scale, ctx=ctx) self.W3 = nd.random_normal(shape=(36250, self.num_fc), scale=self.weight_scale, ctx=ctx) self.b3 = nd.random_normal(shape=128, scale=self.weight_scale, ctx=ctx) self.W4 = nd.random_normal(shape=(self.num_fc, self.num_outputs), scale=self.weight_scale, ctx=ctx) self.b4 = nd.random_normal(shape=self.num_outputs, scale=self.weight_scale, ctx=ctx) self.params = [ self.W1, self.b1, self.W2, self.b2, self.W3, self.b3, self.W4, self.b4 ] for param in self.params: param.attach_grad()
def __init__(self, e_dim): super(CrossCompress, self).__init__() self.e_dim = e_dim self.weight_vv = nd.random_normal(shape=(self.e_dim, 1)) self.weight_ev = nd.random_normal(shape=(self.e_dim, 1)) self.weight_ve = nd.random_normal(shape=(self.e_dim, 1)) self.weight_ee = nd.random_normal(shape=(self.e_dim, 1)) self.bias_v = nd.zeros(self.e_dim) self.bias_e = nd.zeros(self.e_dim) params = [ self.weight_vv, self.weight_ev, self.weight_ve, self.weight_ee, self.bias_v, self.bias_e ] for param in params: param.attach_grad()
def forward(self, x): # Because this encoder decoder setup uses convolutional layers # There is no need to flatten anything # x.shape = (batch_size, n_channels, width, height) # Get the latent layer latent_layer = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable with reparametrization trick applied eps = nd.random_normal(0, 1, shape=(x.shape[0], self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Compute the KL Divergence between latent variable and standard normal kl_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Use the decoder to generate output x_hat = self.decoder(latent_z.reshape((x.shape[0], self.n_latent, 1, 1))) # Compute the pixel-by-pixel loss; this requires that x and x_hat be flattened x_flattened = x.reshape((x.shape[0], -1)) x_hat_flattened = x_hat.reshape((x_hat.shape[0], -1)) logloss = - nd.sum(x_flattened*nd.log(x_hat_flattened + 1e-10) + (1-x_flattened)*nd.log(1-x_hat_flattened+1e-10), axis=1) # Sum up the loss loss = kl_div_loss + logloss * self.pbp_weight return loss
def train(self): workers = [ Worker(self.params, self.n_episode, self.in_queue, self.out_queue) for _ in range(self.hparams.n_threads) ] print( colored( "===> Tranning Start with thread {}".format( self.hparams.n_threads), "yellow")) for worker in workers: worker.start() # @TODO: dummy_history = nd.random_normal(shape=(1, 4, 84, 84)) self.actor(dummy_history) self.critic(dummy_history) is_alive = True while is_alive: if self.in_queue.empty() is False: self.update_model(self.in_queue.get()) self.raise_weight() is_alive = False for worker in workers: is_alive = is_alive | worker.is_alive() for worker in workers: worker.join() print(colored("===> Training End", "yellow")) self.close()
def __init__(self, num_capsule, dim_vector, context=cpu, iter_routing=1, **kwargs): super(DigitCaps, self).__init__(**kwargs) self.num_capsule = num_capsule #10 self.dim_vector = dim_vector #16 self.iter_routing = iter_routing #3 self.batch_size = 1 self.input_num_capsule = 1152 self.input_dim_vector = 8 self.context = context self.routing_weight_initial = True if self.routing_weight_initial: self.routing_weight = nd.random_normal( shape=(1, self.input_num_capsule, self.num_capsule, self.input_dim_vector, self.dim_vector), name='routing_weight').as_in_context(self.context) self.routing_weight_initial = False self.routing_weight.attach_grad() # (1, 1152, 10, 8, 16) self.W_ij = self.params.get( 'weight', shape=(1, self.input_num_capsule, self.num_capsule, self.input_dim_vector, self.dim_vector))
def sample(self, mu, sigma): epsilon = nd.random_normal(shape=mu.shape, loc=0., scale=1., ctx=self.args.ctx) out = mu + sigma * epsilon return out
def forward(self, x): # x is input of shape (n_batch, n_channels, width, height) batch_size = x.shape[0] x = x.reshape(batch_size, -1) self.loss_net.batch_size = batch_size # Get the latent layer latent_vals = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_vals, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_vals, axis=1, num_outputs=2)[1] # Use the reparametrization trick to ensure differentiability of the latent # variable eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Use the decoder to generate output x_hat = self.decoder(latent_z) self.x_hat = x_hat # Use the vgg loss net to compute the loss loss = self.loss_net(x, x_hat) return loss
def query(self, image_text_pairs): if self.pool_size == 0: return image_text_pairs ret_images = [] ret_text_feats = [] images, text_feats = image_text_pairs for i in range(images.shape[0]): image = nd.expand_dims(images[i], axis=0) text_feat = nd.expand_dims(text_feats[i], axis=0) if self.num_imgs < self.pool_size: self.num_imgs = self.num_imgs + 1 self.images.append(image) self.text_feats.append(text_feat) ret_images.append(image) ret_text_feats.append(text_feat) else: p = nd.random_normal(0, 1, shape=(1, )).asscalar() if p < 0.5: random_index = nd.random_uniform(0, self.pool_size-1, shape=(1, )).astype(np.uint8).asscalar() tmp_img = self.images[random_index].copy() tmp_text_feat = self.text_feats[random_index].copy() self.images[random_index] = image self.text_feats[random_index] = text_feat ret_images.append(tmp_img) ret_text_feats.append(tmp_text_feat) else: ret_images.append(image) ret_text_feats.append(text_feat) ret_images = nd.concat(*ret_images, dim=0) ret_text_feats = nd.concat(*ret_text_feats, dim=0) return [ret_images, ret_text_feats]
def generate(self, x): # Because forward() returns the loss values, we still need a method that returns the generated image # Which is basically the forward process, up to (not including) the flattening of x_hat # x should be image arrays (4-dimensional) but encoder should be able # to handle this so I am not going flatten it # Use the encoder network to compute the values of latent layers latent_layer = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Use the reparametrization trick to ensure differentiability of the latent # variable eps = nd.random_normal(loc=0, scale=1, shape=(x.shape[0], self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Use the decoder to generate output, then flatten it to compute loss return self.decoder(latent_z).reshape(-1, self.n_out_channels, self.out_width, self.out_height)
def generate(self, x): # Repeat the process of forward, but stop at x_hat and return it # input x is image and thus 4-dimensional ndarray batch_size, n_channels_in, input_width, input_height = x.shape # First run it through the encoder x_flattened = x.reshape(batch_size, -1) latent_layer = self.encoder(x_flattened) # Split latent layer into latent mean and latent log variances latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable's value using the reparametrization trick eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # At this point, also compute the KL_Divergence between latent variable and # Gaussian(0, 1) KL_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Run the latent variable through the decoder to get the flattened generated image x_hat_flattened = self.decoder(latent_z) # Inflate the flattened output to be fed into the discriminator x_hat = x_hat_flattened.reshape(batch_size, n_channels_in, input_width, input_height) return x_hat
def init_params(): w = nd.random_normal(scale=1, shape=(num_input, 1)) b = nd.zeros(shape=(1, )) params = [w, b] for param in params: param.attach_grad() return params
def mutation(self, weights, mutation_step): """ Perform mutations on the given weights based on the mutation step size """ new_weights = [] for layer in weights: weight_mutations = mutation_step * nd.random_normal( 0, 1, shape=layer.shape) new_weights.append(layer + weight_mutations) return new_weights
def random_counter(num, K, ctx, d=100, ohkw={}): nrow, ncol = K, (num-1)//K+1 cond_col = nd.arange(nrow, ctx=ctx).reshape([1, nrow]) noise = nd.random_normal(shape=(num, d), ctx=ctx) cond = cond_col .tile([ncol, 1]).one_hot(K, **ohkw).reshape([ncol*nrow, K])[:num] return noise, cond
def getfake(samples, dimensions, epsilon): wfake = nd.random_normal(shape=(dimensions)) # fake weight vector for separation bfake = nd.random_normal(shape=(1)) # fake bias wfake = wfake / nd.norm(wfake) # rescale to unit length # making some linearly separable data, simply by chosing the labels accordingly X = nd.zeros(shape=(samples, dimensions)) Y = nd.zeros(shape=(samples)) i = 0 while (i < samples): tmp = nd.random_normal(shape=(1,dimensions)) margin = nd.dot(tmp, wfake) + bfake if (nd.norm(tmp).asscalar() < 3) & (abs(margin.asscalar()) > epsilon): X[i,:] = tmp[0] Y[i] = 1 if margin.asscalar() > 0 else -1 i += 1 return X, Y
def get_parameters(): # parameters for UPDATE gate W_xz = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hz = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_z = nd.zeros(shape=config.hidden_dim) # parameters for RESET gate W_xr = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hr = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_r = nd.zeros(shape=config.hidden_dim) # parameters for candidate hidden state W_xh = nd.random_normal(scale=config.std, shape=(config.input_dim, config.hidden_dim)) W_hh = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.hidden_dim)) b_h = nd.zeros(shape=config.hidden_dim) # output layer W_hy = nd.random_normal(scale=config.std, shape=(config.hidden_dim, config.output_dim)) b_y = nd.zeros(shape=config.output_dim) parameters = [W_xz, W_hz, b_z, W_xr, W_hr, b_r, W_xh, W_hh, b_h, W_hy, b_y] for parameter in parameters: parameter.attach_grad() return parameters
def get_params(): w_xh = nd.random_normal(scale=std, shape=(vocab_size, hidden_size), ctx=ctx) w_hh = nd.random_normal(scale=std, shape=(hidden_size, hidden_size), ctx=ctx) b_h = nd.zeros(hidden_size, ctx=ctx) w_hy = nd.random_normal(scale=std, shape=(hidden_size, vocab_size), ctx=ctx) b_y = nd.zeros(vocab_size, ctx=ctx) params = [w_xh, w_hh, b_h, w_hy, b_y] for p in params: p.attach_grad() return params
def get_inception_score_gl(G, ctx): all_samples = [] for i in range(10): samples_100 = nd.random_normal(0, 1, shape=(100, nz, 1, 1), ctx=ctx) all_samples.append(G(samples_100).as_in_context(mx.cpu()).asnumpy()) all_samples = np.concatenate(all_samples, axis=0) # all_samples = np.add(np.multiply(all_samples, 0.5), 0.5) all_samples = all_samples.reshape((-1, 3, 64, 64)) return icc(list(all_samples), resize=True, splits=10)
def getfake(samples, dimensions, epsilon): wfake = nd.random_normal(shape=(dimensions)) # fake weight vector for separation bfake = nd.random_normal(shape=(1)) # fake bias wfake = wfake / nd.norm(wfake) # rescale to unit length # making some linearly separable data, simply by chosing the labels accordingly X = nd.zeros(shape=(samples, dimensions)) Y = nd.zeros(shape=(samples)) i = 0 while (i < samples): tmp = nd.random_normal(shape=(1, dimensions)) margin = nd.dot(tmp, wfake) + bfake if (nd.norm(tmp).asscalar() < 3) & (abs(margin.asscalar()) > epsilon): X[i, :] = tmp Y[i] = 2 * (margin > 0) - 1 i += 1 return X, Y
def random_uniform(num, K, ctx, d=100, ohkw={}): ncol, nrow = (num-1)//K+1, K noise_row = nd.random_normal(shape=(ncol, 1, d), ctx=ctx) cond_col = nd.random.uniform(0, K, shape=(1, nrow), ctx=ctx).floor() noise = noise_row.tile([1, nrow]) .reshape([ncol*nrow, d])[:num] cond = cond_col .tile([ncol, 1]).one_hot(K, **ohkw).reshape([ncol*nrow, K])[:num] return noise, cond
def get_fake(samples, dimensions, epsilon): wfake = nd.random_normal(shape=(dimensions)) bfake = nd.random_normal(shape=(1)) wfake = wfake / nd.norm(wfake) X = nd.zeros(shape=(samples, dimensions)) Y = nd.zeros(shape=(samples)) i = 0 while i < samples: tmp = nd.random_normal(shape=(1, dimensions)) margin = nd.dot(tmp, wfake) + bfake if (nd.norm(tmp).asscalar() < 3) and (abs( margin.asscalar() > epsilon)): X[i, :] = tmp Y[i] = 1 if margin.ascalar() > 0 else -1 i += 1 return X, Y
def _test(): anchors = [[33, 48, 50, 108, 127, 96], [78, 202, 178, 179, 130, 295], [332, 195, 228, 326, 366, 359]] strides = [8, 16, 32] generator = YOLOv3TargetGenerator(20, strides, anchors) img = nd.random_normal(shape=(3, 416, 416)) gt_box = nd.array([[50, 50, 100., 100, 1], [0, 150, 100, 200, 2]]) args = generator(img, gt_box) nd.save('out', list(args)) print(args)
def grow(num, K, ctx, d=100, ohkw={}): nrow, ncol = K, (num-1)//K+1 noise_one = nd.random_normal(shape=(1, 1, d), ctx=ctx) noise = noise_one.tile([ncol, nrow]).reshape([ncol*nrow, d])[:num] onval = ohkw.get('on_value', 1.0) offval = ohkw.get('off_value', -1.0) cond_col_d = nd.arange(nrow, ctx=ctx).reshape([1, nrow]).tile([ncol, 1]) cond_col = cond_col_d.one_hot(K, **ohkw) alpha = linspace(offval, onval, ncol, end=True, ctx=ctx).reshape([ncol, 1, 1]) * cond_col_d.one_hot(K) + offval * cond_col_d.one_hot(K, off_value=1., on_value=0.) cond = alpha.reshape([ncol*nrow, K])[:num] return noise, cond
def transform(num, K, ctx, d=100, ohkw={}): nrow, ncol = K, (num-1)//K+1 noise_one = nd.random_normal(shape=(1, 1, d), ctx=ctx) noise = noise_one.tile([ncol, nrow]).reshape([ncol*nrow, d])[:num] onval = ohkw.get('on_value', 1.0) offval = ohkw.get('off_value', -1.0) cond_col_ds = nd.arange(nrow, ctx=ctx).reshape([1, nrow]).tile([ncol, 1]) cond_col_dt = cond_col_ds[:, list(range(1,nrow)) + [0]] cond_col_s = cond_col_ds.one_hot(K) cond_col_t = cond_col_dt.one_hot(K) alpha = linspace(offval, onval, ncol, end=True, ctx=ctx).reshape([ncol, 1, 1]) * cond_col_t beta = linspace(onval, offval, ncol, end=True, ctx=ctx).reshape([ncol, 1, 1]) * cond_col_s offvals = offval * cond_col_ds.one_hot(K, off_value=1., on_value=0.) * cond_col_dt.one_hot(K, off_value=1., on_value=0.) cond = (beta + alpha + offvals).reshape([ncol*nrow, K])[:num] return noise, cond
_outputs = [] for X in _inputs: # compute INPUT gate from input and last/initial hidden state input_gate = nd.sigmoid(nd.dot(X, W_xi) + nd.dot(H, W_hi) + b_i) # compute FORGET gate from input and last/initial hidden state forget_gate = nd.sigmoid(nd.dot(X, W_xf) + nd.dot(H, W_hf) + b_f) # compute OUTPUT gate from input and last/initial hidden state output_gate = nd.sigmoid(nd.dot(X, W_xo) + nd.dot(H, W_ho) + b_o) # compute memory cell candidate from input and last/initial hidden state memory_cell_candidate = nd.tanh(nd.dot(X, W_xc) + nd.dot(H, W_hc) + b_c) # compute memory cell from last memory cell and memory cell candidate C = forget_gate * C + input_gate * memory_cell_candidate # compute hidden state from output gate and memory cell H = output_gate * nd.tanh(C) # compute output from hidden state Y = nd.dot(H, W_hy) + b_y _outputs.append(Y) return _outputs, H, C if __name__ == '__main__': initial_state_h = nd.zeros(shape=(config.batch_size, config.hidden_dim)) initial_state_c = nd.zeros(shape=(config.batch_size, config.hidden_dim)) dump_data = [nd.random_normal(shape=(config.batch_size, config.input_dim)) for _ in range(config.num_steps)] parameters = get_parameters() _outputs, final_state, memory_cell = lstm(dump_data, initial_state_h, initial_state_c, *parameters) print(_outputs, final_state, memory_cell)
labels = one_hots(time_numerical[1:seq_length*num_samples+1]) train_label = labels.reshape((num_batches, batch_size, seq_length, vocab_size)) train_label = nd.swapaxes(train_label, 1, 2) ######################## # allocate parameter ######################## num_inputs = vocab_size num_hidden = 256 num_outputs = vocab_size ######################## # Weights connecting the inputs to the hidden layer ######################## Wxg = nd.random_normal(shape=(num_inputs,num_hidden), ctx=ctx) * .01 Wxi = nd.random_normal(shape=(num_inputs,num_hidden), ctx=ctx) * .01 Wxf = nd.random_normal(shape=(num_inputs,num_hidden), ctx=ctx) * .01 Wxo = nd.random_normal(shape=(num_inputs,num_hidden), ctx=ctx) * .01 ######################## # Recurrent weights connecting the hidden layer across time steps ######################## Whg = nd.random_normal(shape=(num_hidden,num_hidden), ctx=ctx)* .01 Whi = nd.random_normal(shape=(num_hidden,num_hidden), ctx=ctx)* .01 Whf = nd.random_normal(shape=(num_hidden,num_hidden), ctx=ctx)* .01 Who = nd.random_normal(shape=(num_hidden,num_hidden), ctx=ctx)* .01 ######################## # Bias vector for hidden layer ########################
labels = one_hots(time_numerical[1:seq_length*num_samples+1]) train_label = labels.reshape((num_batches, batch_size, seq_length, vocab_size)) train_label = nd.swapaxes(train_label, 1, 2) ######################## # allocate parameter ######################## num_inputs = vocab_size num_hidden = 256 num_outputs = vocab_size ######################## # Weights connecting the inputs to the hidden layer ######################## Wxz = nd.random_normal(shape=(num_inputs,num_hidden), ctx=ctx) * .01 Wxr = nd.random_normal(shape=(num_inputs,num_hidden), ctx=ctx) * .01 Wxh = nd.random_normal(shape=(num_inputs,num_hidden), ctx=ctx) * .01 ######################## # Recurrent weights connecting the hidden layer across time steps ######################## Whz = nd.random_normal(shape=(num_hidden,num_hidden), ctx=ctx)* .01 Whr = nd.random_normal(shape=(num_hidden,num_hidden), ctx=ctx)* .01 Whh = nd.random_normal(shape=(num_hidden,num_hidden), ctx=ctx)* .01 ######################## # Bias vector for hidden layer ######################## bz = nd.random_normal(shape=num_hidden, ctx=ctx) * .01 br = nd.random_normal(shape=num_hidden, ctx=ctx) * .01
# plt.show() # autograd - import mxnet as mx from mxnet import nd, autograd mx.random.seed(1) num_inputs = 2 num_outputs = 1 num_examples = 10000 X = nd.random_normal(shape=(num_examples, num_inputs)) y = 2 * X[:, 0] - 3.4 * X[:, 1] + 4.2 + .01 * nd.random_normal(shape=(num_examples,)) batch_size = 4 train_data = mx.io.NDArrayIter(X, y, batch_size, shuffle=True) # stochastic batch = train_data.next() print(batch.data[0]) print(batch.label[0]) # end of an epoch # reset reshuffles the dat counter = 0 train_data.reset() for batch in train_data:
def fully_random_uniform(num, K, ctx, d=100, ohkw={}): noise = nd.random_normal(shape=(num, d), ctx=ctx) cond_flat = nd.random.uniform(0, K, shape=num, ctx=ctx).floor().one_hot(K, **ohkw) return noise, cond