예제 #1
0
    def _forward_alg(self, feats):
        # Do the forward algorithm to compute the partition function
        alphas = [[0.] * self.tagset_size]
        alphas[0][self.tag2idx[START_TAG]] = 1
        alphas = nd.array(alphas)

        # Iterate through the sentence
        for feat in feats:
            alphas_t = []  # The forward variables at this timestep
            for next_tag in range(self.tagset_size):
                # broadcast the emission score: it is the same regardless of
                # the previous tag
                emit_score = feat[next_tag].reshape((1, -1))
                # the ith entry of trans_score is the score of transitioning to
                # next_tag from i
                trans_score = self.transitions.data()[next_tag].reshape(
                    (1, -1))
                # The ith entry of next_tag_var is the value for the
                # edge (i -> next_tag) before we do log-sum-exp
                next_tag_var = alphas * nd.exp(trans_score + emit_score)
                # The forward variable for this tag is log-sum-exp of all the
                # scores.
                alphas_t.append(nsum(next_tag_var))
            alphas = nd.concat(*alphas_t, dim=0).reshape((1, -1))
        terminal_var = alphas * nd.exp(
            self.transitions.data()[self.tag2idx[STOP_TAG]])
        alpha = log_sum(terminal_var)
        return alpha
예제 #2
0
def regression_student_grad(student_outputs, teacher_pred, teacher_noise_precision):
    student_mean = student_outputs[0]
    student_var = student_outputs[1]
    grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred)

    grad_var = (1 - nd.exp(-student_var) * (nd.square(student_mean - teacher_pred)
                                            + 1.0 / teacher_noise_precision)) / 2
    return [grad_mean, grad_var]
예제 #3
0
 def check_unary_func(x):
     f_exp         = lambda x: nd.exp(x)
     f_exp_grad    = lambda x: [nd.exp(x)]
     autograd_assert(x, func=f_exp, grad_func=f_exp_grad)
     f_half        = lambda x: x/2
     f_half_grad   = lambda x: [nd.ones(x.shape) * 0.5]
     autograd_assert(x, func=f_half, grad_func=f_half_grad)
     f_square      = lambda x: x**2
     f_square_grad = lambda x: [2*x]
     autograd_assert(x, func=f_square, grad_func=f_square_grad)
예제 #4
0
 def check_unary_func(x):
     f_exp = lambda x: nd.exp(x)
     f_exp_grad = lambda x: [nd.exp(x)]
     autograd_assert(x, func=f_exp, grad_func=f_exp_grad)
     f_half = lambda x: x / 2
     f_half_grad = lambda x: [nd.ones(x.shape) * 0.5]
     autograd_assert(x, func=f_half, grad_func=f_half_grad)
     f_square = lambda x: x**2
     f_square_grad = lambda x: [2 * x]
     autograd_assert(x, func=f_square, grad_func=f_square_grad)
예제 #5
0
def regression_student_grad(student_outputs, teacher_pred,
                            teacher_noise_precision):
    student_mean = student_outputs[0]
    student_var = student_outputs[1]
    grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred)

    grad_var = (1 - nd.exp(-student_var) *
                (nd.square(student_mean - teacher_pred) +
                 1.0 / teacher_noise_precision)) / 2
    return [grad_mean, grad_var]
def test_unary_func():
    x = nd.uniform(shape=(4, 5))
    f_exp         = lambda x: nd.exp(x)
    f_exp_grad    = lambda x: [nd.exp(x)]
    autograd_assert(x, func=f_exp, grad_func=f_exp_grad)
    f_half        = lambda x: x/2
    f_half_grad   = lambda x: [nd.ones(x.shape) * 0.5]
    autograd_assert(x, func=f_half, grad_func=f_half_grad)
    f_square      = lambda x: x**2
    f_square_grad = lambda x: [2*x]
    autograd_assert(x, func=f_square, grad_func=f_square_grad)
def test_unary_func():
    x = nd.uniform(shape=(4, 5))
    f_exp         = lambda x: nd.exp(x)
    f_exp_grad    = lambda x: [nd.exp(x)]
    autograd_assert(x, func=f_exp, grad_func=f_exp_grad)
    f_half        = lambda x: x/2
    f_half_grad   = lambda x: [nd.ones(x.shape) * 0.5]
    autograd_assert(x, func=f_half, grad_func=f_half_grad)
    f_square      = lambda x: x**2
    f_square_grad = lambda x: [2*x]
    autograd_assert(x, func=f_square, grad_func=f_square_grad)
예제 #8
0
def regression_student_grad(student_outputs, teacher_pred, teacher_noise_precision):
    student_mean = student_outputs[0]
    student_var = student_outputs[1]
    grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred)

    grad_var = (1 - nd.exp(-student_var) * (nd.square(student_mean - teacher_pred)
                                            + 1.0 / teacher_noise_precision)) / 2
    # print student_mean
    # print teacher_pred
    # print grad_mean.asnumpy(), grad_var.asnumpy()
    # ch = raw_input()
    return [grad_mean, grad_var]
    def inference_g(self, observed_arr):
        '''
        Inference with generator.

        Args:
            observed_arr:       `mxnet.ndarray` of observed data points.
        
        Returns:
            Tuple data.
            - re-parametric data.
            - encoded data points.
            - re-encoded data points.
        '''
        encoded_arr = self.model.encoder(observed_arr)
        decoded_arr = self.model.decoder(encoded_arr)
        re_encoded_arr = self.re_encoder_model(decoded_arr)

        anomaly_arr = nd.square(encoded_arr - re_encoded_arr)
        anomaly_arr = nd.expand_dims(nd.exp(anomaly_arr.mean(axis=1)), axis=1)
        mean_arr = nd.expand_dims(decoded_arr.mean(axis=1), axis=1)
        gauss_arr = nd.random.normal_like(data=observed_arr, loc=0, scale=3.0)

        re_param_arr = mean_arr + (gauss_arr * anomaly_arr)

        kl_arr = -0.5 * (1 + nd.log(anomaly_arr) - mean_arr + anomaly_arr)
        re_param_arr = re_param_arr + kl_arr

        return re_param_arr, encoded_arr, re_encoded_arr
예제 #10
0
def my_loss(data, nc, ns, nq):
    data = data.astype('float64')
    cls_data = nd.reshape(data[0:nc * ns], (nc, ns, -1))
    cls_center = nd.mean(cls_data, axis=1) + 1e-10
    data_center_dis = nd.norm(data[nc * ns:].expand_dims(axis=1) -
                              cls_center.expand_dims(axis=0),
                              axis=2)**2

    weight = nd.zeros((nc * nq, nc), ctx=data.context, dtype='float64')
    for i in range(0, nc):
        weight[i * nq:i * nq + nq, i] = 1
    weight2 = 1 - weight

    temp1 = nd.log_softmax(-data_center_dis, axis=1)
    temp2 = nd.sum(temp1, axis=1)
    temp3 = nd.sum(-temp2)
    label = nd.argmin(data_center_dis, axis=1)
    return temp3 / (nc * nq), label

    loss1 = nd.sum(data_center_dis * weight)

    temp = nd.sum(nd.exp(-data_center_dis), axis=1)
    loss2 = nd.sum(nd.log(temp))

    if loss1 is np.nan or loss2 is np.nan:
        raise StopIteration

    return (loss1 + loss2) / (nc * nq), label
예제 #11
0
    def forward(self, signal: nd.NDArray, teacher_forcing_prob: float,
                latent_space_override: nd.NDArray = None):
        """

        Args:
            signal: Sin signal (m, signal_length), m - num of signals (batch_size)
            teacher_forcing_prob: The probability of activating the teacher forcing
            latent_space_override: The override value for the latent space.

        Returns:

        """
        sig_embedding = self.encoder(signal)  # (m,s), s - dim of the encoder embedding

        # Posterior of the latent space
        # Gaussian variance must be positive, therefore using log variance parametrization
        ls_mean, ls_log_var = self.latent_space(sig_embedding).split(axis=1, num_outputs=2)
        ls_std = nd.exp(ls_log_var * 0.5, axis=0)

        # Sampling from the unit gaussian instead of sampling from the latent space posterior
        # allow for gradient flow via latent_space_mean / latent_space_log_var parameters
        # z = (x-mu)/std, thus: x = mu + z*std
        normal_sample = nd.random_normal(0, 1, shape=ls_mean.shape)
        ls_val = ls_mean + ls_std * normal_sample

        if isinstance(latent_space_override, nd.NDArray):
            ls_val = latent_space_override

        length = signal.shape[1]
        reconstructed_sig = self.decoder(ls_val, length, signal, teacher_forcing_prob)  # (m,length)

        return SinBAEOutput(ls_mean, ls_log_var, ls_val, reconstructed_sig)
예제 #12
0
 def reparametrize(self, mu, logvar):
     '''
     mu is a number and logvar is a ndarray
     '''
     std = nd.exp(0.5 * logvar)
     eps = nd.random_normal(loc=0, scale=1,
                            shape=std.shape).as_in_context(ctx)
     return mu + eps * std
예제 #13
0
def softmax(X):
    # X.shape = (256, 10)
    exp = nd.exp(X)
    # 假设exp是矩阵,这里对行进行求和,并要求保留axis 1,
    # 就是返回 (nrows, 1) 形状的矩阵
    # partition.shape = (256, 1)
    partition = exp.sum(axis=1, keepdims=True)
    # a[i,j] = exp[i,j] / partition[i,1]
    a = exp / partition
    return a
예제 #14
0
    def goodness_of_function_loss_function(self):
        # 取指数使得所有值 > 0
        self.__batch_y_hat_exp = nd.exp(self.__batch_y_hat)
        # 求 partition 用于归一化概率
        self.__batch_y_hat_partition = self.__batch_y_hat_exp.sum(
            axis=1, keepdims=True)
        self.__batch_y_hat_exp_divided_partition = self.__batch_y_hat_exp / self.__batch_y_hat_partition

        return -nd.log(
            nd.pick(self.__batch_y_hat_exp_divided_partition, self.__batch_y))
예제 #15
0
def test1():
    x = nd.zeros((3, 4))
    print(x)
    print(nd.ones((4, 4)))
    print(nd.array([[1, 2, 3], [4, 5, 6]]))
    tmp1 = nd.random_normal(0, 1, shape=(3, 4))
    print(tmp1)
    print(tmp1.shape)
    print(tmp1.size)

    print(x + tmp1)

    print(nd.exp(tmp1))
예제 #16
0
def refine_anchor_generator(arm_anchor_boxes,arm_loc_preds):
    '''
        function: 
        input:
            arm_anchor_boxes: shape (1,h*w*num_anchors[:layers],4)
            arm_loc_preds: shape (batch,h*w*num_loc_pred[:layers])
    '''
    batch_size = arm_loc_preds.shape[0]
    arm_anchor_boxes = nd.concat(*[arm_anchor_boxes]*batch_size,dim=0) #(batch,h*w*num_anchors[:layers],4)
    arm_anchor_boxes_bs = nd.split(data=arm_anchor_boxes,axis=2,num_outputs=4)#(batch,all_anchors,1)*4
    
    al = arm_anchor_boxes_bs[0] # left top x
    at = arm_anchor_boxes_bs[1] # left top y
    ar = arm_anchor_boxes_bs[2] # right below x
    ab = arm_anchor_boxes_bs[3] # right below y
    aw = ar - al
    ah = ab - at
    ax = (al+ar)/2.0
    ay = (at+ab)/2.0
    arm_loc_preds = nd.reshape(data=arm_loc_preds,shape=(0,-1,4)) #(batch,h*w*num_anchors[:layers],4)
    arm_loc_preds_bs = nd.split(data=arm_loc_preds,axis=2,num_outputs=4)
    ox_preds = arm_anchor_boxes_bs[0]
    oy_preds = arm_anchor_boxes_bs[1]
    ow_preds = arm_anchor_boxes_bs[2]
    oh_preds = arm_anchor_boxes_bs[3]
    ## TODO: RCNN Paper object   
    ox = ox_preds * aw * 0.1 + ax
    oy = oy_preds * ah * 0.1 + ay
    ow = nd.exp(ow_preds * 0.2) * aw 
    oh = nd.exp(oh_preds * 0.2) * ah 

    out0 = ox - ow / 2.0
    out1 = oy - oh / 2.0
    out2 = ox + ow / 2.0
    out3 = oy + oh / 2.0

    refine_anchor = nd.concat(out0,out1,out2,out3,dim=2)
    # refine_anchor = nd.split(data=refine_anchor,axis=0,num_outputs=batch_size)
    return refine_anchor # (batch,h*w*num_anchors[:layers],4)
예제 #17
0
파일: yolo.py 프로젝트: z01nl1o02/toy-yolo
    def cvt_output_for_predict(self,pred): #how to interprete net output according format_groundtruth()
        predCls,predObj, XYWH = self.format_net_output(pred)
        batchSize,height,width,boxNum,_= XYWH.shape
        X,Y,W,H = XYWH.split(num_outputs=4, axis=-1)
        #pdb.set_trace()
        DY = nd.tile(nd.arange(0,height,repeat=width*boxNum, ctx=XYWH.context).reshape((1,height,width,boxNum,1)), (batchSize,1,1,1,1) )
        DX = nd.tile(nd.arange(0,width,repeat=boxNum,ctx=XYWH.context).reshape((1,1,width,boxNum,1)),(batchSize,height,1,1,1))
        X = (X + DX) / width
        Y = (Y + DY) / height
        #pdb.set_trace()
        W = nd.exp(W) - 1
        H = nd.exp(H) - 1

        
        W = nd.clip(W,0,1)
        H = nd.clip(H,0,1)
        X = nd.clip(X,0,1)
        Y = nd.clip(Y,0,1)
        left = X
        top = Y
        right = nd.clip(left + W,0,1)
        bottom = nd.clip(top + H, 0, 1)
        corners = nd.concat(left,top,right,bottom,dim=-1) #nms requiring corner format
        return predCls, predObj, corners
예제 #18
0
    def _forward_alg(self, feats, lens_):

        batch_size = feats.shape[0]
        tagset_size = feats.shape[2]
        length = feats.shape[1]

        init_alphas = nd.full((self.tagset_size, ), -10000.)
        init_alphas[self.tag_dictionary.get_idx_for_item(START_TAG)] = 0.

        forward_var_list = [init_alphas.tile((feats.shape[0], 1))]
        transitions = self.transitions.data().expand_dims(0).tile(
            (feats.shape[0], 1, 1))

        for i in range(feats.shape[1]):
            emit_score = feats[:, i, :]

            tag_var = \
                emit_score.expand_dims(2).tile((1, 1, transitions.shape[2])) + \
                transitions + \
                forward_var_list[i].expand_dims(2).tile((1, 1, transitions.shape[2])).transpose([0, 2, 1])

            max_tag_var = nd.max(tag_var, axis=2)

            new_tag_var = tag_var - max_tag_var.expand_dims(2).tile(
                (1, 1, transitions.shape[2]))

            agg_ = nd.log(nd.sum(nd.exp(new_tag_var), axis=2))

            forward_var_list.append(
                nd.full((feats.shape[0], feats.shape[2]),
                        val=max_tag_var + agg_))

            # cloned = forward_var.clone()
            # forward_var[:, i + 1, :] = max_tag_var + agg_

            # forward_var = cloned

        forward_var = nd.stack(*forward_var_list)[
            lens_,
            nd.array(list(range(feats.shape[0])), dtype='int32'), :]

        terminal_var = forward_var + \
                       self.transitions.data()[self.tag_dictionary.get_idx_for_item(STOP_TAG)].expand_dims(0).tile((
                           forward_var.shape[0], 1))

        alpha = log_sum_exp_batch(terminal_var)

        return alpha
    def forward(self, is_train=False):
        """Run forward on the current executor."""
        #self.curr_execgrp.forward(is_train=is_train)

        self.get_each_gpu_label()

        # l2-norm forward
        self.weight_norm = nd.L2Normalization(self.weight, mode='instance')

        # fc forward
        no_bias = True
        if no_bias:
            nd.FullyConnected(data=self.data_batch,
                              weight=self.weight_norm,
                              no_bias=True,
                              num_hidden=self.classes,
                              out=self.fc_output)
        else:
            nd.FullyConnected(data=self.data_batch,
                              weight=self.weight_norm,
                              bias=self.bias,
                              num_hidden=self.classes,
                              out=self.fc_output)
        # margin forward
        self.get_each_gpu_label()
        if self.data_of_cur_gpu.size > 0:
            margin_temp = self.fc_output[self.data_of_cur_gpu,
                                         self.label_of_cur_gpu]
            self.pick_fc_of_cur_gpu = margin_temp.copy()
            tem_data = self.margin_loss(self.pick_fc_of_cur_gpu)
            self.fc_output[self.data_of_cur_gpu,
                           self.label_of_cur_gpu] = tem_data[:]
        else:
            self.pick_fc_of_cur_gpu = None

        # softmax forward
        # first allreduce sum
        sum_fc = nd.sum(nd.exp(self.fc_output), axis=1)
        sum_fc = self.allreduce('global_sum_fc', sum_fc)
        assert len(sum_fc) > 0, "rank:{}, sum_fc".format(self.rank)
        self.global_sum_fc[:] = sum_fc[:]
        # second allreduce max
        max_fc = nd.max(self.fc_output, axis=1)
        max_fc = self.allreduce('global_max_fc',
                                max_fc,
                                op=perseus.PerseusOp.Max)
        assert len(max_fc) > 0, "rank:{}, max_fc".format(self.rank)
        self.global_max_fc[:] = max_fc[:]
예제 #20
0
def loss_function(recon_x, x, mu, logvar):
    """
    recon_x: generating images
    x: origin images
    mu: latent mean
    logvar: latent log variance
    """
    BCE = reconstruction_function(recon_x, x)  # mse loss
    BCE = nd.sum(BCE)
    # loss = 0.5 * sum(1 - log(sigma^2) + mu^2 + sigma^2)
    KLD_element = (nd.power(mu, 2) + nd.exp(logvar)) * (-1) + 1 + logvar
    KLD = nd.sum(KLD_element) * (-0.5)
    # KLD_element = nd.exp(logvar) + nd.power(mu, 2) - logvar - 1
    # KLD = nd.sum(KLD_element) * 0.5
    # KL divergence
    return BCE + KLD
예제 #21
0
    def calc_loss(self, signal: nd.NDArray, teacher_forcing_prob: float) -> (float, float):
        """
        Compute gradients of the loss function with respect of the model parameters.

        Args:
            signal: Sin signal: (m, signal_length), m - num of signals (batch_size)
            teacher_forcing_prob: TODO

        Returns: L2 Loss between input and decoded signals, KLD loss

        """

        decoded_signal_output = self(signal, teacher_forcing_prob)

        latent_space_mean = decoded_signal_output.latent_space_mean
        latent_space_log_var = decoded_signal_output.latent_space_log_var

        l2_loss = self.l2loss(signal, decoded_signal_output.decoded_signal)
        negative_kld = 0.5 * nd.sum(
            1 + latent_space_log_var - latent_space_mean ** 2 - nd.exp(latent_space_log_var), axis=1)

        return l2_loss, -negative_kld
예제 #22
0
    def rbf_kernels(self, x: NDArray, y: NDArray):
        """
        Computes exp(-c ||x - y||^2).
        ||x - y||^2 = x . x + y . y - 2 x . y
        Compute each term separately. x is are original features, y are features used for similarity
        """

        cross_products = nd.dot(x, y)

        x_products = nd.sum(sqr(x), axis=1, keepdims=True)
        x_products = nd.broadcast_axis(x_products, axis=1, size=y.shape[1])

        y_products = nd.sum(sqr(y), axis=0, keepdims=True)
        y_products = nd.broadcast_axis(y_products, axis=0, size=x.shape[0])

        sqr_difs = x_products + y_products - 2 * cross_products
        print(nd.mean(x_products), nd.mean(y_products),
              nd.mean(cross_products))
        print(nd.mean(sqr_difs))
        res = nd.exp(-0.05 * sqr_difs)
        print(res.shape)
        return res
예제 #23
0
파일: hack_bdqn.py 프로젝트: WowCZ/strac
def softplus(x):
    return nd.log(1. + nd.exp(x))
예제 #24
0
 def get_free_energy(self, v):
     x = nd.dot(v, self.W) + self.hb
     vt = nd.dot(v, self.vb)
     ht = nd.sum(nd.log(1.0 + nd.exp(x)), axis=1)
     fe = -ht - vt  #free energy, how to prevent scale
     return nd.mean(fe)
예제 #25
0
    if i >= burn_in:
        if 0 == i%thinning_interval:
            if (i+1) % (total_iter_num/sample_num) == 0:
                sgld_sample_list.append(copy_param(teacher_exe))
            # print student_exe.grad_arrays
            # print student_params
            # print student_params_grad
            # ch = raw_input()
            X_student_batch = X_batch + numpy.random.normal(0, 0.05, X_batch.shape)
            teacher_exe.arg_dict['data'][:] = X_student_batch
            teacher_exe.forward(is_train=False)
            teacher_exe.outputs[0].wait_to_read()
            teacher_pred = teacher_exe.outputs[0]
            student_exe.arg_dict['data'][:] = X_student_batch
            student_exe.forward(is_train=True)
            print numpy.hstack((X_batch*X_batch*X_batch, teacher_exe.outputs[0].asnumpy(), student_exe.outputs[0].asnumpy(), nd.exp(student_exe.outputs[1]).asnumpy()))
            print 'Student Loss:', student_loss(student_exe.outputs[0], student_exe.outputs[1],
                                                teacher_pred, teacher_noise_precision)
            student_exe.backward(student_grad(student_exe.outputs[0], student_exe.outputs[1],
                                              teacher_pred, teacher_noise_precision))
            for k in student_params:
                student_updater(k, student_params_grad[k], student_params[k])


distilled_sgld_mse, distilled_sgld_ret = \
    pred_test(testing_data=testing_data, exe=student_exe, save_path='toy-1d-distilled-sgld.txt')

sgld_mse, sgld_ret = \
    pred_test(testing_data=testing_data, exe=teacher_exe, param_list=sgld_sample_list,
              save_path='toy-1d-sgld.txt')
예제 #26
0
def student_grad(student_mean, student_var, teacher_pred, teacher_noise_precision):
    grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred)

    grad_var = (1 - nd.exp(-student_var) * (nd.square(student_mean - teacher_pred)
                                                  + 1 / teacher_noise_precision))/2
    return [grad_mean, grad_var]
예제 #27
0
def student_loss(student_mean, student_var, teacher_pred, teacher_noise_precision):
    return (0.5 * (student_var + nd.exp(-student_var) * (nd.square(teacher_pred - student_mean)
                                                         + 1 / teacher_noise_precision))).asnumpy()[
        0]
예제 #28
0
def log_sum_exp(vec):
    max_score = nd.max(vec).asscalar()
    return nd.log(nd.sum(nd.exp(vec - max_score))) + max_score
예제 #29
0
def softmax(X):  # 通过Softmax函数将任意输入归一化称合法的概率值
    exp = nd.exp(X)  # 指数
    # 对行求和
    partition = exp.sum(axis=1, keepdims=True)  # keepdims=True 保持其二维特性
    return exp / partition
예제 #30
0
파일: model.py 프로젝트: renhongkai/DKVMN
 def forward(self, is_train, req, in_data, out_data, aux):
     self.assign(out_data[0], req[0], 1.0 / (1.0 + nd.exp(- in_data[0])))
예제 #31
0
파일: utils.py 프로젝트: luoyancn/mxnetai
def softmax(x):  # softmax的作用就是把一个输出转换为概率
    exp = nd.exp(x)  # exp函数将x当中的所有数据变更为正数
    partition = exp.sum(axis=1, keepdims=True)  # 对第一维的数据求和,即第一列数据
    return exp / partition
예제 #32
0
def exp(input):
    return nd.exp(input)
예제 #33
0
파일: ndarray.py 프로젝트: tinylcy/dl
x = nd.array([[1, 2], [3, 4]])
print(x)

# 创建随机数组,每个元素的值都是随机采样而来,经常被用于初始化模型参数
y = nd.random_normal(0, 1, shape=(3, 4))
print(y)
print(y.shape)
print(y.size)

x = nd.random_normal(0, 1, shape=(3, 4))
print(x)
print(x + y)
print(x * y)
# 指数运算.
print(nd.exp(y))
# 转置
print(nd.dot(x, y.T))

# 广播
a = nd.arange(3).reshape((3, 1))
b = nd.arange(2).reshape((1, 2))
print('a:', a)
print('b:', b)
print('a+b:', a + b)

# 跟 Numpy 的转换
x = np.ones((2, 3))
y = nd.array(x)
z = y.asnumpy()
print([z, y])
예제 #34
0
def softmax(X):
    exp = nd.exp(X)
    partition = exp.sum(axis = 1, keepdims=True) # return (nrows, 1) matrix
    return exp / partition
def softmax(X):
    exp = nd.exp(X)
    # 假设exp是矩阵,这里对行进行求和,并要求保留axis 1,
    # 就是返回 (nrows, 1) 形状的矩阵
    partition = exp.sum(axis=1, keepdims=True)
    return exp / partition
예제 #36
0
def logsigmoid(val):
    max_elem = nd.maximum(0., -val)
    z = nd.exp(-max_elem) + nd.exp(-val - max_elem)
    return -(max_elem + nd.log(z))
예제 #37
0
def softmax(x):
    exp = nd.exp(x)
    pariition = exp.sum(axis=1, keepdim=True)
    return exp / partition
예제 #38
0
def softmax(X):
    exp = nd.exp(X)
    partition = exp.sum(axis=1, keepdims=True)
    return exp / partition
예제 #39
0
def evaluate_accuracy(data_iterator, net, W, b):
    acc = 0.
    for data, label in data_iterator:
        output = net(data, W, b)
        acc += accuracy(output, label)
    return acc / len(data_iterator)


if __name__ == '__main__':
    X = nd.random_normal(shape=(2, 5))
    X_prob = softmax(X)

    print(X)
    print(X_prob)
    print(nd.exp(X[0][0]) / (nd.exp(X[0][0]) + nd.exp(X[1][0])))

    # 1. 数据
    mnist_train = gluon.data.vision.FashionMNIST(train=True,
                                                 transform=transform)
    mnist_test = gluon.data.vision.FashionMNIST(train=False,
                                                transform=transform)
    batch_size = 256
    train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
    test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

    # 2. 模型(线性模型)W,b
    num_inputs = 28 * 28
    num_outputs = 10
    W = nd.random_normal(shape=(num_inputs, num_outputs))
    b = nd.random_normal(shape=num_outputs)
예제 #40
0
 def forward(self, is_train, req, in_data, out_data, aux):
     self.assign(out_data[0], req[0], 1.0 / (1.0 + nd.exp(- in_data[0])))
예제 #41
0
def softmax(X):
    X_exp = nd.exp(X)
    partition = X_exp.sum(axis=0, keepdims=True)
    return X_exp / partition