def get_srme_log(net, X_train, Y_train): num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) #对net(X)结果进行截断[1,正无穷] return np.sqrt(2 * nd.sum( square_loss(nd.log(clipped_preds), nd.log(Y_train))).asscalar() / num_train)
def get_rmse_log(net, X_train, y_train): num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) return np.sqrt(2 * nd.sum( square_loss(nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)
def test_convdraw_loss_kl_term_2(): ctx = mx.cpu() mu_q = nd.array([[1., 2.], [0., 1.], [2., 3.]], ctx=ctx) sd_q = nd.array([[1., 0.5], [0.5, 0.5], [2., 1.]], ctx=ctx) mu_p = nd.array([[1., 1.], [-0.5, 0.7], [1., 2.3]], ctx=ctx) sd_p = nd.array([[0.7, 0.5], [1., 0.3], [1.5, 1.1]], ctx=ctx) log_sd_q = nd.log(sd_q) log_sd_p = nd.log(sd_p) q = nd.concat(mu_q, log_sd_q, dim=1) p = nd.concat(mu_p, log_sd_p, dim=1) convdraw_loss_kl_term = ConvDRAWLossKLTerm(latent_dim=2) val = convdraw_loss_kl_term(q, p) expected = np.array([2.163733219326574, 1.3212104456828437, 0.5344416978024983]) assert val.shape == (3,) assert np.allclose(expected, val.asnumpy())
def test_convdraw_loss_kl_term_1(): ctx = mx.cpu() mu_q = nd.array([[1., 2.], [0., 1.], [2., 3.]], ctx=ctx) sd_q = nd.array([[1., 0.5], [0.5, 0.5], [2., 1.]], ctx=ctx) mu_p = nd.array([[0., 0.], [0., 0.], [0., 0.]], ctx=ctx) sd_p = nd.array([[1., 1.], [1., 1.], [1., 1.]], ctx=ctx) log_sd_q = nd.log(sd_q) log_sd_p = nd.log(sd_p) q = nd.concat(mu_q, log_sd_q, dim=1) p = nd.concat(mu_p, log_sd_p, dim=1) convdraw_loss_kl_term = ConvDRAWLossKLTerm(latent_dim=2) val = convdraw_loss_kl_term(q, p) expected = np.array([2.8181471805599454, 1.1362943611198906, 7.306852819440055]) assert val.shape == (3,) assert np.allclose(expected, val.asnumpy())
def test_convdraw_loss(): ctx = mx.cpu() # steps x batch x latent (2 x 3 x 2) mu_q = nd.array([[[1., 2.], [0., 1.], [2., 3.]], [[1.5, 0.4], [1.0, 0.7], [1.2, 0.8]]], ctx=ctx) sd_q = nd.array([[[1., 0.5], [0.5, 0.5], [2., 1.]], [[0.4, 1.], [0.8, 0.8], [1.5, 2.]]], ctx=ctx) mu_p = nd.array([[[1., 1.], [-0.5, 0.7], [1., 2.3]], [[0.5, 1.2], [1.5, 0.8], [1.0, 0.5]]], ctx=ctx) sd_p = nd.array([[[0.7, 0.5], [1., 0.3], [1.5, 1.1]], [[0.2, 0.4], [0.6, 0.6], [1.0, 0.3]]], ctx=ctx) log_sd_q = nd.log(sd_q) log_sd_p = nd.log(sd_p) q = nd.concat(mu_q, log_sd_q, dim=2) p = nd.concat(mu_p, log_sd_p, dim=2) convdraw_loss = ConvDRAWLoss(fit_loss=lambda y, x: 1.0, input_dim=4, latent_shape=(1, 2, 1), input_cost_scale=0.5) mock_x = nd.zeros((3, 4), ctx=ctx) mock_y = nd.zeros((3, 4), ctx=ctx) val = convdraw_loss(mock_x, q, p, mock_y) expected_kl = (np.array([2.163733219326574, 1.3212104456828437, 0.5344416978024983]) + np.array([17.015562057495117, 0.5635244846343994, 20.56463623046875])) expected_fit = 1.0 * 4 * 0.5 assert val.shape == (3,) assert np.allclose(expected_fit + expected_kl, val.asnumpy())
def get_rmse_log(net, X_train, y_train): """Gets root mse between the logarithms of the prediction and the truth.""" num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) return np.sqrt(2 * nd.sum( square_loss(nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)
def KL(self, other_prob): if not self.is_conjugate(other_prob): raise ValueError("KL cannot be computed in closed form.") if (not len(self.shapes) == len(other_prob.shapes)) or \ (not np.all(np.array([s == o for s, o in zip(self.shapes, other_prob.shapes)]))): raise ValueError( "KL cannot be computed: The 2 distributions have different support" ) raw_params_ext_var_posterior = self._replicate_shared_parameters() sigmas_var_posterior = transform_rhos( raw_params_ext_var_posterior[RHO]) raw_params_ext_prior = other_prob._replicate_shared_parameters() out = 0.0 for ii in range(len(self.shapes)): means_p = raw_params_ext_prior[MEAN][ii] var_p = raw_params_ext_prior["sigma"][ii]**2 means_q = raw_params_ext_var_posterior[MEAN][ii] var_q = sigmas_var_posterior[ii]**2 inc_means = (means_q - means_p) prec_p = 1.0 / var_p temp = 0.5 * (var_q * prec_p + ( (inc_means**2) * prec_p) - 1.0 + nd.log(var_p) - nd.log(var_q)) if temp.shape == (1, 1): # If parameters are shared, multiply by the number of variables temp = temp * (self.shapes[ii][0] * self.shapes[ii][1]) out = out + nd.sum(temp) return out
def check_KL(self): ph_act = nd.dot(self.enum_states, self.W) + self.hb vt = nd.dot(self.enum_states, self.vb) ht = nd.sum(-nd.log(nd.sigmoid(-ph_act)), axis=1) p_th = nd.softmax(vt + ht) KL = nd.sum(self.prob_states * nd.log(self.prob_states / p_th)) return KL.asnumpy()[0]
def hybrid_forward(self, F, action, prob, advantage): action_prob = F.sum(action * prob, axis=1) # loss for polocy cross-entroy cross_entropy = F.log(action_prob + 1e-10) * advantage cross_entropy = -F.sum(cross_entropy) # loss for exploration entropy = F.sum(prob * F.log(prob + 1e-10), axis=1) entropy = F.sum(entropy) # add two loss loss = cross_entropy + 0.01 * entropy return loss
def train(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] action_indx = np.argmax(a_batch_one_hot,axis=1).tolist() action_stats = [action_indx.count(action_indx[i]) for i in range(batch_size)] action_bp_rate = (1 - np.array(action_stats)/float(batch_size))**2 s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) action_bp_rate = nd.softmax(nd.array(action_bp_rate, ctx=CTX)) self.actorcritic.collect_params().zero_grad() self.reset_noise() with mx.autograd.record(): loss_vec = [] probs, values, top_decisions = self.actorcritic.forward(s_batch, loss_vec) loss = 0. for element in loss_vec: loss = loss + element # print 'loss_dropout:', loss logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)+1e-5) entropy = -nd.sum(nd.sum(data=probs*nd.log(probs+1e-5), axis=1), axis=0) top_decision_entropy = -nd.sum(nd.sum(data=top_decisions*nd.log(top_decisions+1e-5), axis=1), axis=0) entropy_loss = - entropy top_decision_entropy_loss = - top_decision_entropy actorloss = -nd.sum(action_bp_rate*(logprob*advantage_batch), axis=0) criticloss = nd.sum(action_bp_rate*nd.square(values-V_trace_batch), axis=0) # actorloss = -nd.sum(logprob*advantage_batch, axis=0) # criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0) loss = actorloss + 0.3*criticloss + 0.001*entropy_loss # loss = actorloss + 0.3*criticloss + 0.0001*top_decision_entropy_loss loss.backward() # CTname = threading.currentThread().getName() # print(CTname + ' actorloss : '+str(actorloss)) # print(CTname + ' criticloss : '+str(criticloss)) # print(CTname + ' entropy_loss : '+str(entropy_loss)) grads_list = [] for name, value in self.actorcritic.collect_params().items(): if name.find('batchnorm') < 0: # grads_list.append(mx.nd.array(value.grad().asnumpy())) grads_list.append(value.grad()) return grads_list, batch_size
def test_draw_loss(): ctx = mx.cpu() # num_steps=2 mu = nd.zeros((2, 3, 2)) # steps x batch x latent mu[0] = nd.array([[1., 2.], [0., 1.], [2., 3.]], ctx=ctx) mu[1] = nd.array([[-1., 1.], [-2., 1.5], [2., -1.]], ctx=ctx) sd = nd.zeros((2, 3, 2)) # steps x batch x latent sd[0] = nd.array([[1., 0.5], [0.5, 0.5], [2., 1.]], ctx=ctx) sd[1] = nd.array([[2., 0.2], [1.5, 1.5], [.4, 3.]], ctx=ctx) log_sd = nd.log(sd) qs = nd.concat(mu, log_sd, dim=2) # we only test the kl term and don't care about the fit term. draw_loss = DRAWLoss(fit_loss=lambda y, x: 0.0, input_dim=1, latent_dim=2) mock_x = nd.ones((3, 1)) mock_y = nd.ones((3, 1)) val = draw_loss(mock_x, qs, mock_y) expected = ( np.array([2.8181471805599454, 1.1362943611198906, 7.306852819440055]) + np.array([2.9362907318741547, 3.564069783783671, 5.897678443206045])) assert val.shape == (3, ) assert np.allclose(expected, val.asnumpy())
def my_loss(data, nc, ns, nq): data = data.astype('float64') cls_data = nd.reshape(data[0:nc * ns], (nc, ns, -1)) cls_center = nd.mean(cls_data, axis=1) + 1e-10 data_center_dis = nd.norm(data[nc * ns:].expand_dims(axis=1) - cls_center.expand_dims(axis=0), axis=2)**2 weight = nd.zeros((nc * nq, nc), ctx=data.context, dtype='float64') for i in range(0, nc): weight[i * nq:i * nq + nq, i] = 1 weight2 = 1 - weight temp1 = nd.log_softmax(-data_center_dis, axis=1) temp2 = nd.sum(temp1, axis=1) temp3 = nd.sum(-temp2) label = nd.argmin(data_center_dis, axis=1) return temp3 / (nc * nq), label loss1 = nd.sum(data_center_dis * weight) temp = nd.sum(nd.exp(-data_center_dis), axis=1) loss2 = nd.sum(nd.log(temp)) if loss1 is np.nan or loss2 is np.nan: raise StopIteration return (loss1 + loss2) / (nc * nq), label
def inference_g(self, observed_arr): ''' Inference with generator. Args: observed_arr: `mxnet.ndarray` of observed data points. Returns: Tuple data. - re-parametric data. - encoded data points. - re-encoded data points. ''' encoded_arr = self.model.encoder(observed_arr) decoded_arr = self.model.decoder(encoded_arr) re_encoded_arr = self.re_encoder_model(decoded_arr) anomaly_arr = nd.square(encoded_arr - re_encoded_arr) anomaly_arr = nd.expand_dims(nd.exp(anomaly_arr.mean(axis=1)), axis=1) mean_arr = nd.expand_dims(decoded_arr.mean(axis=1), axis=1) gauss_arr = nd.random.normal_like(data=observed_arr, loc=0, scale=3.0) re_param_arr = mean_arr + (gauss_arr * anomaly_arr) kl_arr = -0.5 * (1 + nd.log(anomaly_arr) - mean_arr + anomaly_arr) re_param_arr = re_param_arr + kl_arr return re_param_arr, encoded_arr, re_encoded_arr
def train(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) sigma_prime = copy.deepcopy(self.sigma) mu_prime = copy.deepcopy(self.mu) self.presigma = (1-self.beta)*self.presigma + self.beta*np.sum(np.array(V_trace))/(np.array(V_trace).shape[0]) self.mu = (1-self.beta)*self.mu + self.beta*np.sum((np.array(V_trace))**2)/(np.array(V_trace).shape[0]) self.sigma = math.sqrt(self.presigma-self.mu**2) pop_art_hyper = self.sigma, sigma_prime, self.mu, mu_prime s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) self.reset_noise() self.actorcritic.collect_params().zero_grad() with mx.autograd.record(): loss_vec = [] probs, values = self.actorcritic.forward(s_batch, pop_art_hyper, loss_vec) loss = 0. for element in loss_vec: loss = loss + element # print 'loss_dropout:', loss logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)) entropyloss = -nd.sum(nd.sum(data=probs*nd.log(probs), axis=1), axis=0) actorloss = -nd.sum(logprob*advantage_batch, axis=0) criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0) loss = actorloss + criticloss loss.backward() grads_list = [] for name, value in self.actorcritic.collect_params().items(): if name.find('batchnorm') < 0: # grads_list.append(mx.nd.array(value.grad().asnumpy())) grads_list.append(value.grad()) return grads_list, batch_size
def goodness_of_function_loss_function(self): # 取指数使得所有值 > 0 self.__batch_y_hat_exp = nd.exp(self.__batch_y_hat) # 求 partition 用于归一化概率 self.__batch_y_hat_partition = self.__batch_y_hat_exp.sum( axis=1, keepdims=True) self.__batch_y_hat_exp_divided_partition = self.__batch_y_hat_exp / self.__batch_y_hat_partition return -nd.log( nd.pick(self.__batch_y_hat_exp_divided_partition, self.__batch_y))
def train_update(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] action_indx = np.argmax(a_batch_one_hot, axis=1).tolist() action_stats = [action_indx.count(action_indx[i]) for i in range(batch_size)] action_bp_rate = (1 - np.array(action_stats) / float(batch_size)) ** 2 s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) action_bp_rate = nd.softmax(nd.array(action_bp_rate, ctx=CTX)) self.actorcritic.collect_params().zero_grad() self.reset_noise() with mx.autograd.record(): loss_vec = [] probs, values, top_decisions = self.actorcritic.forward(s_batch, loss_vec) loss = 0. for element in loss_vec: loss = loss + element # print 'loss_dropout:', loss logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1) + 1e-5) entropy = -nd.sum(nd.sum(data=probs * nd.log(probs + 1e-5), axis=1), axis=0) top_decision_entropy = -nd.sum(nd.sum(data=top_decisions * nd.log(top_decisions + 1e-5), axis=1), axis=0) entropy_loss = - entropy top_decision_entropy_loss = - top_decision_entropy actorloss = -nd.sum(action_bp_rate * (logprob * advantage_batch), axis=0) criticloss = nd.sum(action_bp_rate * nd.square(values - V_trace_batch), axis=0) # actorloss = -nd.sum(logprob*advantage_batch, axis=0) # criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0) loss = actorloss + 0.3 * criticloss + 0.001 * entropy_loss # loss = actorloss + 0.3*criticloss + 0.0001*top_decision_entropy_loss loss.backward() self.trainer.step(batch_size=batch_size, ignore_stale_grad=True)
def train(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) self.actorcritic.collect_params().zero_grad() with mx.autograd.record(): loss_vec = [] probs, values = self.actorcritic(s_batch, loss_vec) loss = 0. for element in loss_vec: loss = loss + element # print 'loss_dropout:', loss logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)) entropyloss = -nd.sum(nd.sum(data=probs * nd.log(probs), axis=1), axis=0) actorloss = -nd.sum(logprob * advantage_batch, axis=0) criticloss = nd.sum(nd.square(values - V_trace_batch), axis=0) loss = actorloss + criticloss loss.backward() grads_list = [] for name, value in self.actorcritic.collect_params().items(): if name.find('batchnorm') < 0: # grads_list.append(mx.nd.array(value.grad().asnumpy())) grads_list.append(value.grad()) return grads_list, batch_size
def _forward_alg(self, feats, lens_): batch_size = feats.shape[0] tagset_size = feats.shape[2] length = feats.shape[1] init_alphas = nd.full((self.tagset_size, ), -10000.) init_alphas[self.tag_dictionary.get_idx_for_item(START_TAG)] = 0. forward_var_list = [init_alphas.tile((feats.shape[0], 1))] transitions = self.transitions.data().expand_dims(0).tile( (feats.shape[0], 1, 1)) for i in range(feats.shape[1]): emit_score = feats[:, i, :] tag_var = \ emit_score.expand_dims(2).tile((1, 1, transitions.shape[2])) + \ transitions + \ forward_var_list[i].expand_dims(2).tile((1, 1, transitions.shape[2])).transpose([0, 2, 1]) max_tag_var = nd.max(tag_var, axis=2) new_tag_var = tag_var - max_tag_var.expand_dims(2).tile( (1, 1, transitions.shape[2])) agg_ = nd.log(nd.sum(nd.exp(new_tag_var), axis=2)) forward_var_list.append( nd.full((feats.shape[0], feats.shape[2]), val=max_tag_var + agg_)) # cloned = forward_var.clone() # forward_var[:, i + 1, :] = max_tag_var + agg_ # forward_var = cloned forward_var = nd.stack(*forward_var_list)[ lens_, nd.array(list(range(feats.shape[0])), dtype='int32'), :] terminal_var = forward_var + \ self.transitions.data()[self.tag_dictionary.get_idx_for_item(STOP_TAG)].expand_dims(0).tile(( forward_var.shape[0], 1)) alpha = log_sum_exp_batch(terminal_var) return alpha
def test_vae_loss_kl_term(): ctx = mx.cpu() mu = nd.array([[1., 2.], [0., 1.], [2., 3.]], ctx=ctx) sd = nd.array([[1., 0.5], [0.5, 0.5], [2., 1.]], ctx=ctx) log_sd = nd.log(sd) q = nd.concat(mu, log_sd, dim=1) vae_loss_kl_term = VAELossKLTerm(latent_dim=2) val = vae_loss_kl_term(q) expected = np.array([2.8181471805599454, 1.1362943611198906, 7.306852819440055]) assert val.shape == (3,) assert np.allclose(expected, val.asnumpy())
def train(self, s_batch, a_batch_one_hot, V_trace, advantage): batch_size = s_batch.shape[0] s_batch = copy.deepcopy(s_batch) a_batch_one_hot = copy.deepcopy(a_batch_one_hot) V_trace_batch = copy.deepcopy(V_trace) advantage_batch = copy.deepcopy(advantage) s_batch = nd.array(s_batch, ctx=CTX) a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX) V_trace_batch = nd.array(V_trace_batch, ctx=CTX) advantage_batch = nd.array(advantage_batch, ctx=CTX) self.actorcritic.collect_params().zero_grad() with mx.autograd.record(): loss_vec = [] probs, _ = self.actorcritic(s_batch, loss_vec) logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)) actorloss = -nd.sum(logprob*advantage_batch, axis=0) actorloss.backward() # self.actortrainer.step(batch_size=batch_size, ignore_stale_grad=True) with mx.autograd.record(): loss_vec = [] _, values = self.actorcritic(s_batch, loss_vec) criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0) # print loss criticloss.backward() # self.critictrainer.step(batch_size=batch_size, ignore_stale_grad=True) grads_list = [] for name, value in self.actorcritic.collect_params().items(): if name.find('batchnorm') < 0: # grads_list.append(mx.nd.array(value.grad().asnumpy())) grads_list.append(value.grad()) return grads_list, batch_size
def check_status(self, input, epoch): n_sample = input.shape[0] ph_prob, ph_sample = self.sample_h_given_v(input) nv_prob, nv_sample, nh_prob, nh_sample = self.gibbs_hvh(ph_sample) error = nd.sum((input - nv_sample)**2) / n_sample #use logsoftmax if nan cross = -nd.mean(nd.sum(input * nd.log(nv_prob), axis=1)) freeE = self.get_free_energy(input) sys.stdout.write("Training: ") sys.stdout.write("epoch= %d " % epoch) sys.stdout.write("cross= %f " % cross.asnumpy()[0]) sys.stdout.write("error= %f " % error.asnumpy()[0]) sys.stdout.write("freeE= %f " % freeE.asnumpy()[0]) if self.enum_states is not None: sys.stdout.write("KL= %f " % self.check_KL()) if self.prob_RGs is not None: sys.stdout.write("rgKL= %f " % self.check_rgKL(nv_sample)) sys.stdout.write("\n") return
def cross_entropy(y_, y): return -nd.pick(nd.log(y_), y)
def cross_entropy(yhat, y): return -nd.pick(nd.log(yhat), y)
def cross_entropy(yhat, y): return - nd.pick(nd.log(yhat), y)
def logsigmoid(val): max_elem = nd.maximum(0., -val) z = nd.exp(-max_elem) + nd.exp(-val - max_elem) return -(max_elem + nd.log(z))
def get_free_energy(self, v): x = nd.dot(v, self.W) + self.hb vt = nd.dot(v, self.vb) ht = nd.sum(nd.log(1.0 + nd.exp(x)), axis=1) fe = -ht - vt #free energy, how to prevent scale return nd.mean(fe)
def cross_entropy(yhat, y): return -nd.sum(y * nd.log(yhat), axis=0, exclude=True)
def getRmseLog(net, x_train, y_train): num_train = x_train.shape[0] clipped_pred = nd.clip(net(x_train), 1, float('inf')) return np.sqrt(2 * nd.sum(square_loss(nd.log(clipped_pred), nd.log(y_train))).asscalar() / num_train)
def log_sum_exp(vec): max_score = nd.max(vec).asscalar() return nd.log(nd.sum(nd.exp(vec - max_score))) + max_score
def cross_entropy(yhat, y): # 交叉熵,因为此处yvec中只有一个1 return - nd.pick(nd.log(yhat), y) # 返回key为y对应的log值
def get_rmse_log(net, X_train, y_train): """Gets root mse between the logarithms of the prediction and the truth.""" num_train = X_train.shape[0] clipped_preds = nd.clip(net(X_train), 1, float('inf')) return np.sqrt(2 * nd.sum(square_loss( nd.log(clipped_preds), nd.log(y_train))).asscalar() / num_train)
def softplus(x): return nd.log(1. + nd.exp(x))
def log_gaussian(x, mu, sigma): return nd.sum(-0.5 * np.log(2.0 * np.pi) - nd.log(sigma) - (x - mu) ** 2 / (2 * sigma ** 2))
def cross_entropy(yhat, y):#yhat为预测 y为真实标签 return - nd.pick(nd.log(yhat), y)#注意为 负交叉熵