def forward(self, x): # Because this encoder decoder setup uses convolutional layers # There is no need to flatten anything # x.shape = (batch_size, n_channels, width, height) # Get the latent layer latent_layer = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable with reparametrization trick applied eps = nd.random_normal(0, 1, shape=(x.shape[0], self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Compute the KL Divergence between latent variable and standard normal kl_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Use the decoder to generate output x_hat = self.decoder(latent_z.reshape((x.shape[0], self.n_latent, 1, 1))) # Compute the pixel-by-pixel loss; this requires that x and x_hat be flattened x_flattened = x.reshape((x.shape[0], -1)) x_hat_flattened = x_hat.reshape((x_hat.shape[0], -1)) logloss = - nd.sum(x_flattened*nd.log(x_hat_flattened + 1e-10) + (1-x_flattened)*nd.log(1-x_hat_flattened+1e-10), axis=1) # Sum up the loss loss = kl_div_loss + logloss * self.pbp_weight return loss
def hybrid_forward(self, F, X, *args, **kwargs): # Perform neural network pass X = self.linear_1(X) X = self.linear_2(X) X = self.linear_3(X) X = self.linear_4(X) # Extract mixture coefficients according to formula 25 in Bishop z_alpha = X[:, :self.n_components] z_alpha_exp = nd.exp(z_alpha) alpha = (z_alpha_exp / nd.sum(z_alpha_exp))[0] # Extract variance according to formula 26 in Bishop z_sigma = X[:, self.n_components:2 * self.n_components] sigma = nd.exp(z_sigma)[0] # Extract mu according to formula 27 in Bishop mu = nd.reshape(X[:, 2 * self.n_components:], (self.n_components, self.t_dim)) # create bunch of Gaussians distributions = [ MultivariateGaussian( mu[i], nd.linalg.potrf(sigma[i] * nd.eye(self.t_dim))) for i in range(self.n_components) ] # Create mixture model p_t_X = MixtureDistribution(alpha, distributions) return p_t_X
def hybrid_forward(self, F, past_target, future_target): # compute network output net_output = self.nn(past_target) # (batch, prediction_length * nn_features) -> (batch, prediction_length, nn_features) net_output = net_output.reshape(0, self.prediction_length, -1) # project network output to distribution parameters domain distr_args = self.proj_distr_args(net_output) # compute distribution distr = self.distr_output.distribution(distr_args) # negative log-likelihood loss = distr.loss(future_target) # custom quantile based loss if self.distr_output_type == "Gaussian": alpha = self.alpha quantile_high = distr.quantile(nd.array([0.995]))[0] quantile_low = distr.quantile(nd.array([0.005]))[0] future_high = future_target - quantile_high future_low = quantile_low - future_target loss1 = nd.exp(future_high) * alpha loss2 = nd.exp(future_low) * alpha loss = loss1 + loss2 + loss return loss
def generate(self, x): # Repeat the process of forward, but stop at x_hat and return it # input x is image and thus 4-dimensional ndarray batch_size, n_channels_in, input_width, input_height = x.shape # First run it through the encoder x_flattened = x.reshape(batch_size, -1) latent_layer = self.encoder(x_flattened) # Split latent layer into latent mean and latent log variances latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable's value using the reparametrization trick eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # At this point, also compute the KL_Divergence between latent variable and # Gaussian(0, 1) KL_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Run the latent variable through the decoder to get the flattened generated image x_hat_flattened = self.decoder(latent_z) # Inflate the flattened output to be fed into the discriminator x_hat = x_hat_flattened.reshape(batch_size, n_channels_in, input_width, input_height) return x_hat
def softmax(x): if x.ndim == 2: x = x.T x = x - nd.max(x, axis=0) y = nd.exp(x) / nd.sum(nd.exp(x), axis=0) return y.T x = x - nd.max(x) # avoid overflow return nd.exp(x) / nd.sum(nd.exp(x))
def _postprocess(self, data): data = data[0] softmax = nd.exp(data) / nd.sum(nd.exp(data))[0] values = { val: float(int(softmax[0][i].asnumpy() * 1000) / 1000.0) for i, val in enumerate(self.labels) } index = int(nd.argmax(data, axis=1).asnumpy()[0]) predicted = self.labels[index] return {'predicted': predicted, 'confidence': values}
def Treplit_hard_loss(net,data,label): label = label.reshape(-1, 1) label_mat = nd.equal(label, label.T).astype('float32') vec = net(data) dist_self = nd.sum(nd.square(vec), axis=1, keepdims=True) dist_mat = nd.broadcast_add(dist_self, dist_self.T) - 2 * nd.dot(vec, vec.T) p_min=nd.log(nd.sum(label_mat*nd.exp(dist_mat),axis=1)) p_max=nd.log(nd.sum((1-label_mat)*nd.exp(-dist_mat)),axis=1) loss=nd.relu(p_min+p_max+1) return loss
def forward(self, x, first_cycle=False): # input x is image and thus 4-dimensional ndarray batch_size, n_channels_in, input_width, input_height = x.shape # First run it through the encoder x_flattened = x.reshape(batch_size, -1) latent_layer = self.encoder(x_flattened) # Split latent layer into latent mean and latent log variances latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Compute the latent variable's value using the reparametrization trick eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # At this point, also compute the KL_Divergence between latent variable and # Gaussian(0, 1) KL_div_loss = -0.5 * nd.sum(1 + latent_logvar - latent_mean * latent_mean - nd.exp(latent_logvar), axis=1) # Run the latent variable through the decoder to get the flattened generated image x_hat_flattened = self.decoder(latent_z) # Inflate the flattened output to be fed into the discriminator x_hat = x_hat_flattened.reshape(batch_size, n_channels_in, input_width, input_height) # Content loss is given by the resnet # In later training process we will feed the discriminator genuine and generated images # with genuine images labeled 1 and generated images labeled 0 # in this case a higher value in ResNet's output indicate higher confidence of # an image's realness; therefore we want to reduce the negative of the ResNet's output content_loss = -nd.sigmoid(self.discriminator(x_hat)).reshape(-1) # For the first training cycle, resnet is completely not trained # so we will not use the resnet as a content loss metric; instead we will use # the logloss as a content loss if first_cycle: content_loss = -nd.sum( x_flattened * nd.log(x_hat_flattened + 1e-10) + (1 - x_flattened) * nd.log(1 - x_hat_flattened + 1e-10), axis=1) # Loss is the sum of KL_Divergence and the content loss loss = KL_div_loss + content_loss return loss
def hybrid_forward(self, F, pred, label, sample_weight=None): label = _reshape_like(F, label, pred) if not self._from_sigmoid: max_val = F.relu(-pred) loss = pred - pred * label + max_val + F.log(F.exp(-max_val) + F.exp(-pred - max_val)) else: p = mx.nd.array(1 / (1 + nd.exp(-pred)), ctx=ctx) weights = nd.exp(label + (1 - label * 2) * batch_ratios) gamma = 2 w_p, w_n = nd.power(1. - p, gamma), nd.power(p, gamma) loss = - (w_p * F.log(p + 1e-12) * label + w_n * F.log(1. - p + 1e-12) * (1. - label)) loss *= weights return F.mean(loss, axis=self._batch_axis, exclude=True)
def refine_bbox_nd(bbox, bbox_delta, im_info=None, means=None, stds=None): xmin, ymin, xmax, ymax = nd.split(data=bbox, num_outputs=4, axis=1) bbox_width = xmax - xmin + 1. bbox_height = ymax - ymin + 1. center_x = 0.5 * (xmin + xmax) center_y = 0.5 * (ymin + ymax) bbox_delta_reshape = nd.Reshape(data=bbox_delta, shape=(0, -1, 4)) dx, dy, dw, dh = nd.split(data=bbox_delta_reshape, num_outputs=4, axis=2, squeeze_axis=1) if (means is not None) and (stds is not None): dx = dx * stds[0] + means[0] dy = dy * stds[1] + means[1] dw = dw * stds[2] + means[2] dh = dh * stds[3] + means[3] refine_center_x = nd.broadcast_add(lhs=center_x, rhs=nd.broadcast_mul(lhs=bbox_width, rhs=dx)) refine_center_y = nd.broadcast_add(lhs=center_y, rhs=nd.broadcast_mul(lhs=bbox_height, rhs=dy)) refined_width = nd.broadcast_mul(lhs=bbox_width, rhs=nd.exp(dw)) refined_height = nd.broadcast_mul(lhs=bbox_height, rhs=nd.exp(dh)) w_offset = 0.5 * (refined_width - 1.) h_offset = 0.5 * (refined_height - 1.) refined_xmin = nd.expand_dims(refine_center_x - w_offset, axis=1) refined_ymin = nd.expand_dims(refine_center_y - h_offset, axis=1) refined_xmax = nd.expand_dims(refine_center_x + w_offset, axis=1) refined_ymax = nd.expand_dims(refine_center_y + h_offset, axis=1) refined_bbox = nd.concat(refined_xmin, refined_ymin, refined_xmax, refined_ymax, dim=1) if im_info is not None: # assume im_info [[height, width, scale]] with shape (1,3) im_hw = nd.slice_axis(im_info, axis=1, begin=0, end=2) im_wh = nd.reverse(im_hw, axis=1) im_wh = im_wh - 1. im_wh = nd.tile(data=im_wh, reps=(1, 2)) im_wh = nd.Reshape(im_wh, shape=(1, 4, 1)) refined_bbox = nd.broadcast_minimum(lhs=refined_bbox, rhs=im_wh) refined_bbox = nd.broadcast_maximum(lhs=refined_bbox, rhs=nd.zeros_like(refined_bbox)) # print refined_bbox.debug_str() return refined_bbox
def dynamic_range_decompression(x, c=1): """ params ------ c: compression factor used to compress """ return nd.exp(x) / c
def forward(self, x): # x is input of shape (n_batch, n_channels, width, height) batch_size = x.shape[0] x = x.reshape(batch_size, -1) self.loss_net.batch_size = batch_size # Get the latent layer latent_vals = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_vals, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_vals, axis=1, num_outputs=2)[1] # Use the reparametrization trick to ensure differentiability of the latent # variable eps = nd.random_normal(loc=0, scale=1, shape=(batch_size, self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Use the decoder to generate output x_hat = self.decoder(latent_z) self.x_hat = x_hat # Use the vgg loss net to compute the loss loss = self.loss_net(x, x_hat) return loss
def test_periodic_kernel(x1, x2, amplitude, length_scale, exact) -> None: tol = 1e-5 batch_size = amplitude.shape[0] history_length_1 = x1.shape[0] history_length_2 = x2.shape[0] num_features = x1.shape[1] if batch_size > 1: x1 = nd.tile(x1, reps=(batch_size, 1, 1)) x2 = nd.tile(x2, reps=(batch_size, 1, 1)) for i in range(1, batch_size): x1[i, :, :] = (i + 1) * x1[i, :, :] x2[i, :, :] = (i - 3) * x2[i, :, :] else: x1 = x1.reshape(batch_size, history_length_1, num_features) x2 = x2.reshape(batch_size, history_length_2, num_features) amplitude = amplitude.reshape(batch_size, 1, 1) length_scale = length_scale.reshape(batch_size, 1, 1) frequency = 1 / 24 * nd.ones_like(length_scale) periodic = PeriodicKernel(amplitude, length_scale, frequency) exact = amplitude * nd.exp( -2 * nd.sin(frequency * math.pi * nd.sqrt(exact))**2 / length_scale**2) res = periodic.kernel_matrix(x1, x2) assert nd.norm(exact - res) < tol
def forward(self, pred, target): """ pred is the output prob,target the multi-class set label """ batch, dim = pred.shape dist = nd.broadcast_minus(pred.reshape(batch, dim, 1), pred.reshape(batch, 1, dim)) pos = mxnet.nd.greater(target, 0).reshape(batch, dim, 1) neg = mxnet.nd.equal(target, 0).reshape(batch, 1, dim) pos.detach() neg.detach() # pos_matrix = mxnet.nd.concat(*([pos]*dim),dim=2) # neg_matrix = mxnet.nd.concat(*([neg]*dim),dim=1) #print(pos_matrix.shape,neg_matrix.shape,dist.shape) #loss_matrix = nd.log(1+nd.sum(pos_matrix*neg_matrix*nd.exp(-dist))) # print("----------------------") # print("pos is ",pos) # print("neg is ",neg) # print("multiply is ",nd.broadcast_mul(pos,neg)) # print("the distance is ",dist) # print("the mat mul is ",nd.broadcast_mul(pos,neg)*dist) # print("-----------------------") loss_matrix = nd.log( 1 + nd.sum(nd.broadcast_mul(pos, neg) * nd.exp(-dist))) return loss_matrix
def backward(self, grad_loss): pred, target, pos_mask, neg_mask, loss = self.saved_tensors fac = -1 / loss grad_input = mxnet.nd.zeros_like(pred) ## make one-hot vecters one_hot_pos, one_hot_neg = [], [] for i in range(grad_input.shape[0]): # loop on batch each vector pos = np.array([ j for j, pos in enumerate(pos_mask[i]) if pos != 0 ]) # filter pos neg = np.array([ j for j, neg in enumerate(neg_mask[i]) if neg != 0 ]) # filter neg one_hot_pos.append(mxnet.nd.one_hot(nd.array(pos), pred.shape[1])) one_hot_neg.append(mxnet.nd.one_hot(nd.array(neg), pred.shape[1])) ## grad for i in range(grad_input.shape[0] ): # for each pred and label sample instance for dum_j, phot in enumerate(one_hot_pos[i]): for dum_k, nhot in enumerate(one_hot_neg[i]): grad_input[i] += (phot - nhot) * nd.exp(-pred[i] * (phot - nhot)) #this is the grad input grad_input = grad_input * grad_loss * fac return grad_input, mx.nd.ones(target.shape[0], ctx=target.context)
def evaluate_accuracy(data_loader, model, kclasses): if (data_loader == None): return (0, 0) n_accum = 0.0 accuracy_accum = 0.0 for i, (data, label) in enumerate(data_loader): data = data.as_in_context(mx.cpu()) label = label.as_in_context(mx.cpu()) log_c = model(data) # standard practice classification accuracy accuracy = (nd.argmax( log_c, axis=1).astype('int32') == label.astype('int32') ).astype('float32') accuracy_accum += nd.sum(accuracy) n_accum += len(label) avg_accuracy = (accuracy_accum / n_accum).asscalar() avg_c_max = nd.exp(nd.max(log_c, axis=1)).mean().asscalar( ) # maximum value in classification vector return (avg_accuracy, avg_c_max)
def adaptOptimization(self, data_loader, n_epochs, domainTests): # Unsupervised in Target Domain tracelog = TrainerTraceLog() cumulative_points = 0 for e in range(n_epochs): epoch_loss = 0 for i, (tgt_data, _) in enumerate( data_loader['target']): # Target domain, without labels tgt_data = tgt_data.as_in_context(self.context) n_points = tgt_data.shape[0] with autograd.record(): logP_c = self.model(tgt_data) loss = nd.mean(self.crossEntropy(nd.exp(logP_c), logP_c)) # entropy loss.backward() self.stepTrainer() epoch_loss += loss.asscalar() cumulative_points += n_points # log at completion of epoch tracelog.logPerformanceData(self, domainTests, e, cumulative_points, epoch_loss / (1 + i)) return tracelog
def forward(self, C): # input value is of shape (batch_size, num_matches*emb_size, num_candidates), # with num_matches=3, num_candidates=2 in our case exp_C = nd.exp(nd.dot(C.transpose(axes=(0, 2, 1)), self.V.data())) # L(A_i | P, Q) = -log(exp(V^T C_i) / exp(V^T C)) L = -nd.log(exp_C / nd.sum(exp_C, axis=-1, keepdims=True)) return L
def poisson(self, n, lam): r""" The continous approximation, using :math:`n! = \Gamma\left(n+1\right)`, to the probability mass function of the Poisson distribution evaluated at :code:`n` given the parameter :code:`lam`. Example: >>> import pyhf >>> pyhf.set_backend(pyhf.tensor.mxnet_backend()) >>> pyhf.tensorlib.poisson(5., 6.) <BLANKLINE> [0.16062315] <NDArray 1 @cpu(0)> Args: n (Number or Tensor): The value at which to evaluate the approximation to the Poisson distribution p.m.f. (the observed number of events) lam (Number or Tensor): The mean of the Poisson distribution p.d.f. (the expected number of events) Returns: MXNet NDArray: Value of the continous approximation to Poisson(n|lam) """ n = self.astensor(n) lam = self.astensor(lam) # This is currently copied directly from PyTorch's source until a better # way can be found to do this in MXNet # https://github.com/pytorch/pytorch/blob/39520ffec15ab7e97691fed048de1832e83785e8/torch/distributions/poisson.py#L59-L63 return nd.exp((nd.log(lam) * n) - lam - nd.gammaln(n + 1.0))
def old_update(self, b_s, b_a, b_r, b_logpac): b_s = nd.array(b_s, ctx=self.args.ctx).reshape( (-1, self.observation_dim)) b_a = nd.array(b_a, ctx=self.args.ctx).reshape((-1, self.action_dim)) b_r = nd.array(b_r, ctx=self.args.ctx).reshape((-1, 1)) b_oldpi_log_prob = nd.array(b_logpac, ctx=self.args.ctx).reshape( (-1, self.action_dim)) with autograd.record(): # Value loss v_pred, mu, sigma = self.net(b_s) advantage = b_r - v_pred vf_loss = nd.mean(nd.square(advantage)) # Detach from the computation graph advantage = advantage.detach() # Action loss pi_log_prob = self.net.log_prob(b_a, mu, sigma) ratio = nd.exp(pi_log_prob - b_oldpi_log_prob) surr1 = ratio * advantage surr2 = nd.clip(ratio, 1.0 - self.args.clip_param, 1.0 + self.args.clip_param) * advantage actor_loss = -nd.mean(nd.minimum(surr1, surr2)) entropy = self.net.entropy(sigma) # Total (maximize entropy to encourage exploration) loss = vf_loss * self.args.value_coefficient + actor_loss \ - entropy * self.args.entropy_coefficient loss.backward() self.trainer.step(b_s.shape[0])
def softmax(y_linear): # exponent of a negative value is always between 0 and 1. exp = nd.exp(y_linear - nd.max(y_linear)) # Finding the total sum of all the exponents norms = nd.sum(exp, axis=0, exclude=True).reshape((-1, 1)) # Retrning value of exponent by total sum such that all of them overall sum up to 1. return exp / norms
def forward(self, original_idx, paraphrase_idx): ''' forward pass of the whole model, original/paraphrase_idx are both of layout NT, to be added "C" by embedding layer ''' # ENCODER part mean, logv, last_state = self.encoder(original_idx, paraphrase_idx) # sample from Gaussian distribution N(0, 1), of the shape (batch_size, hidden_size) z = nd.normal(loc=0, scale=1, shape=(original_idx.shape[0], self.latent_size), ctx=model_ctx) latent_input = mean + z * nd.exp( 0.5 * logv) # exp() is to make the std dev positive # DECODER part # the KL Div should be calculated between the sample from N(0, 1), and the distribution after # Parameterization Trick, negation since we want it to be small kl_loss = -self.kl_div(mean, logv) # first paraphrase_input should be the <bos> token last_idx = paraphrase_idx[:, 0:1] ce_loss = 0 # decode the sample for pos in range(paraphrase_idx.shape[-1] - 1): vocab_output, last_state = self.decoder(last_state, last_idx, latent_input) # only compare the label we predict, note the first is bos and will be ignored ce_loss = ce_loss + self.ce_loss( vocab_output, paraphrase_idx[:, pos + 1:pos + 2]) last_idx = vocab_output.argmax(axis=-1, keepdims=True) return kl_loss, ce_loss
def softmax(y_linear): # here, elementwise subtraction of the max value stabilizes the score # before it is exponentiated; this keeps higher scores from corresponding # to disproportionately high probabilities exp = nd.exp(y_linear-nd.max(y_linear, axis=1).reshape((-1,1))) norms = nd.sum(exp, axis=1).reshape((-1,1)) return exp / norms
def get_label_dist(self, ele, azi, sigma=0.1): ''' Reference: https://en.wikipedia.org/wiki/Great-circle_distance Parameters ---------- ele: float angle of elevation in rad azi: float angle of azimuth in rad Returns ---------- class_label: int Maximum likelihood of classes class_label_distribution: mxnet.ndarray probability of each class ''' cos_ang = np.arccos( math.sin(ele) * np.sin(_deg_2_rad(self.ele_label)) + math.cos(ele) * np.cos(_deg_2_rad(self.ele_label)) * np.cos(azi - _deg_2_rad(self.azi_label))) cos_ang = np.expand_dims(cos_ang, axis=0) cos_ang_gaussion = nd.exp(-nd.array(cos_ang)**2 / sigma) cos_ang_gaussion_softmax = cos_ang_gaussion / sum(cos_ang_gaussion[0]) class_label = np.argmin(cos_ang) return class_label, cos_ang_gaussion_softmax
def test_radial_basis_function_kernel( x1, x2, amplitude, length_scale, exact ) -> None: tol = 1e-5 batch_size = amplitude.shape[0] history_length_1 = x1.shape[0] history_length_2 = x2.shape[0] num_features = x1.shape[1] if batch_size > 1: x1 = nd.tile(x1, reps=(batch_size, 1, 1)) x2 = nd.tile(x2, reps=(batch_size, 1, 1)) for i in range(1, batch_size): x1[i, :, :] = (i + 1) * x1[i, :, :] x2[i, :, :] = (i - 3) * x2[i, :, :] else: x1 = x1.reshape(batch_size, history_length_1, num_features) x2 = x2.reshape(batch_size, history_length_2, num_features) amplitude = amplitude.reshape(batch_size, 1, 1) length_scale = length_scale.reshape(batch_size, 1, 1) rbf = RBFKernel(amplitude, length_scale) exact = amplitude * nd.exp(-0.5 * exact / length_scale ** 2) res = rbf.kernel_matrix(x1, x2) assert nd.norm(exact - res) < tol
def hybrid_forward(self, F, x, min_timescale=1.0, max_timescale=1e4): r"""Implement forward computation. Parameters ----------- x : NDArray input tensor with shape `(batch_size, sequence_length, hidden_size)` Returns -------- : NDArray output tensor with shape `(batch_size, sequence_length, hidden_size)` """ length = x.shape[1] channels = x.shape[2] position = nd.array(range(length)) num_timescales = channels // 2 log_timescale_increment = ( math.log(float(max_timescale) / float(min_timescale)) / (num_timescales - 1)) inv_timescales = min_timescale * \ nd.exp(nd.array(range(num_timescales)) * -log_timescale_increment) scaled_time = F.expand_dims(position, 1) * F.expand_dims( inv_timescales, 0) signal = F.concat(F.sin(scaled_time), F.cos(scaled_time), dim=1) signal = F.reshape(signal, shape=(1, length, channels)) return x + signal.as_in_context(x.context)
def test_periodic_kernel_compute( x1, x2, amplitude, length_scale, frequency ) -> None: tol = 1e-5 batch_size = amplitude.shape[0] history_length_1 = x1.shape[0] history_length_2 = x2.shape[0] num_features = x1.shape[1] x1 = x1.reshape(batch_size, history_length_1, num_features) x2 = x2.reshape(batch_size, history_length_2, num_features) amplitude = amplitude.reshape(batch_size, 1, 1) length_scale = length_scale.reshape(batch_size, 1, 1) frequency = frequency.reshape(batch_size, 1, 1) periodic = PeriodicKernel(amplitude, length_scale, frequency) exact = nd.zeros((batch_size, history_length_1, history_length_2)) for i in range(history_length_1): for j in range(history_length_2): val = ( 2 * ( nd.sin(frequency * math.pi * (x1[:, i, :] - x2[:, j, :])) / length_scale ) ** 2 ) exact[:, i, j] = (amplitude * nd.exp(-val)).reshape(-1) res = periodic.kernel_matrix(x1, x2) assert nd.norm(res - exact) < tol
def generate(self, x): # Because forward() returns the loss values, we still need a method that returns the generated image # Which is basically the forward process, up to (not including) the flattening of x_hat # x should be image arrays (4-dimensional) but encoder should be able # to handle this so I am not going flatten it # Use the encoder network to compute the values of latent layers latent_layer = self.encoder(x) # Split the latent layer into latent means and latent log vars latent_mean = nd.split(latent_layer, axis=1, num_outputs=2)[0] latent_logvar = nd.split(latent_layer, axis=1, num_outputs=2)[1] # Use the reparametrization trick to ensure differentiability of the latent # variable eps = nd.random_normal(loc=0, scale=1, shape=(x.shape[0], self.n_latent), ctx=CTX) latent_z = latent_mean + nd.exp(0.5 * latent_logvar) * eps # Use the decoder to generate output, then flatten it to compute loss return self.decoder(latent_z).reshape(-1, self.n_out_channels, self.out_width, self.out_height)
def sample(match, cls_pred, iou, ratio=3, min_sample=0, threshold=0.5, do=True): if do is False: ones = nd.ones_like(match) sample = nd.where(match > -0.5, ones, ones*-1) return sample sample = nd.zeros_like(match) num_pos = nd.sum(match > -0.5, axis=-1) requre_neg = ratio * num_pos neg_mask = nd.where(match < -0.5, nd.max(iou, axis=-1) < threshold, sample) max_neg = neg_mask.sum(axis=-1) num_neg = nd.minimum(max_neg, nd.maximum(requre_neg, min_sample)).astype('int') neg_prob = cls_pred[:,:,0] max_value = nd.max(cls_pred, axis=-1, keepdims=True) score = max_value[:,:,0] - neg_prob + nd.log( nd.sum( nd.exp(cls_pred-max_value), axis=-1)) score = nd.where(neg_mask, score, nd.zeros_like(score)) argmax = nd.argsort(score, axis=-1, is_ascend=False) sample = nd.where(match > -0.5, nd.ones_like(sample), sample) for i, num in enumerate(num_neg): sample[i, argmax[i,:num.asscalar()]] = -1 return sample
def test_exponent_logarithm_operators(): a = 2 * nd.ones(shape=LARGE_X) # exponent result = nd.exp(a) assert result[-1] == 7.389056 assert result.shape == a.shape # exponent minus 1 result = nd.expm1(a) assert result[-1] == 6.389056 assert result.shape == a.shape # log2 result = nd.log2(a) assert result[-1] == 1 assert result.shape == a.shape # log10 result = nd.log10(a) assert result[-1] == 0.30103 assert result.shape == a.shape # log1p result = nd.log1p(a) assert result[-1] == 1.0986123 assert result.shape == a.shape # log result = nd.log(a) assert result[-1] == 0.6931472 assert result.shape == a.shape
def softmax(y_linear, temperature=1.0): lin = (y_linear-nd.max(y_linear)) / temperature exp = nd.exp(lin) partition = nd.sum(exp, axis=0, exclude=True).reshape((-1,1)) return exp / partition