def proto_loss(embedding, nc, ns, nq): embedding = embedding.astype('float64'); cls_data = nd.reshape(embedding[0:nc*ns], (nc, ns, -1)); cls_data.attach_grad() cls_center = nd.mean(cls_data, axis=1); data_center_dis = nd.norm(embedding[nc*ns:].expand_dims(axis=1) - cls_center.expand_dims(axis=0), axis=2) ** 2 # print(nd.max(data_center_dis).asscalar()) weight = nd.zeros((nc*nq, nc), ctx=embedding.context, dtype='float64') pick_vec = nd.zeros((nc*nq), ctx=embedding.context) for i in range(0, nc): weight[i*nq:i*nq+nq, i] = 1 pick_vec[i*nq:i*nq+nq] = i """ temp = nd.SoftmaxOutput(-data_center_dis, label) temp = nd.log(temp) * weight temp = nd.sum(-temp, axis=1) predict = nd.argmin(data_center_dis, axis=1) return -temp * nd.log(temp), predict """ temp1 = nd.log_softmax(-data_center_dis, axis=1); temp2 = nd.pick(temp1, index=pick_vec, axis=1); temp3 = nd.sum(-temp2); label = nd.argmin(data_center_dis, axis=1) return temp3 / (nc * nq), label
def fit_f(net, batch_data, loss_function, *args, **kwargs): state, action, reward = batch_data loss = [] pred = nd.pick(net(state), action) for _, loss_f in loss_function.items(): loss.append(loss_f(pred, reward)) return sum(loss)
def goodness_of_function_loss_function(self): # 取指数使得所有值 > 0 self.__batch_y_hat_exp = nd.exp(self.__batch_y_hat) # 求 partition 用于归一化概率 self.__batch_y_hat_partition = self.__batch_y_hat_exp.sum( axis=1, keepdims=True) self.__batch_y_hat_exp_divided_partition = self.__batch_y_hat_exp / self.__batch_y_hat_partition return -nd.log( nd.pick(self.__batch_y_hat_exp_divided_partition, self.__batch_y))
def hybrid_forward(self, F, pred, label, valid_length): pred = pred[:, :-1, :] label = label[:, 1:] valid_length = valid_length - 1 if not self._from_logits: pred = F.log_softmax(pred, self._axis) loss = mx.nd.squeeze( -F.pick(pred, label, axis=self._axis, keepdims=True), axis=2) loss = F.SequenceMask(loss.swapaxes(0, 1), sequence_length=valid_length, use_sequence_length=True).swapaxes(0, 1) return F.mean(loss, axis=self._batch_axis, exclude=True)
def hybrid_forward(self, F, pred, label, sample_weight=None): if not self._from_logits: pred = F.log_softmax(pred, axis=self._axis) if self._sparse_label: if self._size_average: valid_label_map = (label != self._ignore_label).astype('float32') loss = -(F.pick(pred, label, axis=self._axis, keepdims=True) * valid_label_map) else: loss = -F.pick(pred, label, axis=self._axis, keepdims=True) loss = F.where( label.expand_dims(axis=self._axis) == self._ignore_label, F.zeros_like(loss), loss) else: label = _reshape_like(F, label, pred) loss = -F.sum(pred * label, axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) if self._size_average: return F.mean(loss, axis=self._batch_axis, exclude=True) * \ valid_label_map.size / F.sum(valid_label_map) else: return F.mean(loss, axis=self._batch_axis, exclude=True)
def evaluate(test_net, test_data, args): exe_num = len(test_net._context) curr_states = test_net.get_states(merge_multi_context=False) # Set the state to zero when a new epoch begins for state_id in range(len(curr_states)): for exe_id in range(exe_num): curr_states[state_id][exe_id][:] = 0 test_net.set_states(curr_states) total_nll = 0.0 for i, start in enumerate(range(0, test_data.shape[0] - 1, args.bptt)): start = i * args.bptt data_batch_npy = np.take(test_data, np.arange(start, start + args.bptt), axis=0, mode="clip") target_batch_npy = np.take(test_data, np.arange(start + 1, start + 1 + args.bptt), axis=0, mode="clip") if start + args.bptt > test_data.shape[0]: valid_seq_len = test_data.shape[0] - start else: valid_seq_len = args.bptt test_net.forward(data_batch=mx.io.DataBatch(data=[mx.nd.array(data_batch_npy)]), is_train=False) outputs = test_net.get_outputs(merge_multi_context=False) local_nll = 0.0 for exe_id in range(exe_num): logits = outputs[0][exe_id] nll = - nd.pick(nd.log_softmax(logits), nd.array(target_batch_npy, ctx=logits.context), axis=-1).asnumpy() local_nll += nll[:valid_seq_len, :].mean() * valid_seq_len total_nll += local_nll / exe_num for out_id in range(1, len(outputs)): for exe_id in range(exe_num): curr_states[out_id - 1][exe_id] = outputs[out_id][exe_id] test_net.set_states(states=curr_states) avg_nll = total_nll / test_data.shape[0] return avg_nll
def hybrid_forward(self, F, x, y, ignore_label=-1.): output = F.softmax(x) pt = F.pick(output, y, axis=self._axis) mask = y != ignore_label loss = -self._alpha * ((1 - pt)**self._gamma) * F.log(pt) * mask return F.sum(loss) / F.sum(mask)
def loss(self, y_hat, y): return -nd.pick(y_hat, y).log()
def cross_entropy(self, label): return - nd.pick(nd.log(self.pro_act), label)
def cross_entropy(yhat, y): return - nd.pick(nd.log(yhat), y)
import mxnet.ndarray as nd arr = [i for i in range(100)] print(nd.pick(nd.array(arr)))
def cross_entropy(yhat, y): return -nd.pick(nd.log(yhat), y)
def cross_entropy(yhat, y): ret = -nd.pick(nd.log(yhat), y) #print("loss inf 1: " + str(yhat)) #print("loss inf 2: " + str(y)) return ret
def cross_entropy(yhat, y): return - nd.log(nd.pick(yhat, y, axis=1, keepdims=True))
def focal_loss(yhat, y, alpha, beta=2.0): alpha = alpha.reshape((1, -1)) alpha_matrix = alpha.broadcast_to(shape=yhat.shape) Pt = nd.pick(yhat, y, axis=1, keepdims=True) return - nd.pick(alpha_matrix, y, axis=1, keepdims=True) * ((1.0 - Pt)**beta) * nd.log(Pt)
def cross_entropy(yhat, y): # nd.pick(input, index)[i,j] = return -nd.pick(nd.log(yhat), y)
def cross_entropy(yhat, y): #yhat为预测 y为真实标签 return -nd.pick(nd.log(yhat), y) #注意为 负交叉熵
def cross_entropy(yhat, y):#yhat为预测 y为真实标签 return - nd.pick(nd.log(yhat), y)#注意为 负交叉熵
def Plus_cross_entroy(yhat, y): return -nd.pick(nd.log(yhat), y)
def cross_entropy(yhat, y): # 交叉熵,因为此处yvec中只有一个1 return - nd.pick(nd.log(yhat), y) # 返回key为y对应的log值
def cross_entropy(y_hat, y): return -nd.pick(y_hat.log(), y)
def cross_entropy(y_, y): return -nd.pick(nd.log(y_), y)
def cross_entropy(yhat, y): return -nd.pick(nd.log(yhat), y) # return nd.log(yhat)=0 but index[3] =value (if y[0,0,0,1,0,0])