Beispiel #1
0
    def forward(self, cls_pred, box_pred, cls_target, box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1 and self._min_hard_negatives < 1:
            # no positive samples and no hard negatives, return dummy losses
            cls_losses = [nd.sum(cp * 0) for cp in cls_pred]
            box_losses = [nd.sum(bp * 0) for bp in box_pred]
            sum_losses = [nd.sum(cp * 0) + nd.sum(bp * 0) for cp, bp in zip(cls_pred, box_pred)]
            return sum_losses, cls_losses, box_losses


        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        sum_losses = []
        for cp, bp, ct, bt in zip(*[cls_pred, box_pred, cls_target, box_target]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(self._min_hard_negatives, pos.sum(axis=1)
                                              * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss, nd.zeros_like(cls_loss))
            cls_losses.append(nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all))

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho, box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all))
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, box_losses
 def mse(o, y):
     return nd.square(o - y).sum() * 0.5 / self.X.shape[0], nd.argmax(
         o, axis=1)
Beispiel #3
0
 def grad_grad_op(x):
     return x/nd.sqrt((nd.square(x)+1)**3)
 def forward(self, fg, bg, pred, mask, merg):
     c = fg * pred + (1 - pred) * bg
     dis = mask * (c - merg)
     l = nd.sqrt(self.eps + nd.square(dis).sum(0))
     return l
def score(gradient, v, f):
    if 2 * f + 2 > v.shape[1]:
        f = int(math.floor((v.shape[1] - 2) / 2.0))
    num_neighbours = v.shape[1] - 2 - f
    sorted_distance = nd.square(v - gradient).sum(axis=0).sort()
    return nd.sum(sorted_distance[1:(1 + num_neighbours)]).asscalar()
 def forward(self, x):
     x = nd.sigmoid(nd.sum(nd.square(x), 1))
     return x
Beispiel #7
0
def garchLLH(y, par):
    h = garchSim(nd.square(y), par)
    T = y.shape[0]
    llh = -0.5 * (T - 1) * math.log(
        2 * math.pi) - 0.5 * nd.sum(nd.log(h) + (y / nd.sqrt(h))**2)
    return llh.asscalar()
Beispiel #8
0
    def unlabeled_train_op_mmd_combine(self, update_enc=True):
        '''
        Trains the MMD model
        '''
        batch_size = self.args['batch_size']
        model_ctx = self.model_ctx
        eps = 1e-10

        # Retrieve data
        docs = self.data.get_documents(key='train')

        y_true = np.random.dirichlet(np.ones(self.ndim_y) *
                                     self.args['dirich_alpha'],
                                     size=batch_size)
        y_true = nd.array(y_true, ctx=model_ctx)

        with autograd.record():
            ### reconstruction phase ###
            y_onehot_u = self.Enc(docs)
            y_onehot_u_softmax = nd.softmax(y_onehot_u)
            if self.args['latent_noise'] > 0:
                y_noise = np.random.dirichlet(np.ones(self.ndim_y) *
                                              self.args['dirich_alpha'],
                                              size=batch_size)
                y_noise = nd.array(y_noise, ctx=model_ctx)
                y_onehot_u_softmax = (
                    1 - self.args['latent_noise']
                ) * y_onehot_u_softmax + self.args['latent_noise'] * y_noise
            x_reconstruction_u = self.Dec(y_onehot_u_softmax)

            logits = nd.log_softmax(x_reconstruction_u)
            loss_reconstruction = nd.mean(nd.sum(-docs * logits, axis=1))
            loss_total = loss_reconstruction * self.args['recon_alpha']

            ### mmd phase ###
            if self.args['adverse']:
                y_fake = self.Enc(docs)
                y_fake = nd.softmax(y_fake)
                loss_mmd = mmd_loss(y_true,
                                    y_fake,
                                    ctx_model=model_ctx,
                                    t=self.args['kernel_alpha'])
                loss_total = loss_total + loss_mmd

            if self.args['l2_alpha'] > 0:
                loss_total = loss_total + self.args['l2_alpha'] * nd.mean(
                    nd.sum(nd.square(y_onehot_u), axis=1))

            loss_total.backward()

        self.optimizer_enc.step(1)
        self.optimizer_dec.step(1)  # self.m.args['batch_size']

        latent_max = nd.zeros(self.args['ndim_y'], ctx=model_ctx)
        for max_ind in nd.argmax(y_onehot_u, axis=1):
            latent_max[max_ind] += 1.0
        latent_max /= batch_size
        latent_entropy = nd.mean(
            nd.sum(-y_onehot_u_softmax * nd.log(y_onehot_u_softmax + eps),
                   axis=1))
        latent_v = nd.mean(y_onehot_u_softmax, axis=0)
        dirich_entropy = nd.mean(nd.sum(-y_true * nd.log(y_true + eps),
                                        axis=1))

        if self.args['adverse']:
            loss_mmd_return = loss_mmd.asscalar()
        else:
            loss_mmd_return = 0.0
        return nd.mean(loss_reconstruction).asscalar(
        ), loss_mmd_return, latent_max.asnumpy(), latent_entropy.asscalar(
        ), latent_v.asnumpy(), dirich_entropy.asscalar()
Beispiel #9
0
                                                      even_split=False)
            angs_list = mx.gluon.utils.split_and_load(angs,
                                                      ctx_list=devices,
                                                      even_split=False)
            cate_list = mx.gluon.utils.split_and_load(cate,
                                                      ctx_list=devices,
                                                      even_split=False)
            loss_list = []

            with mx.autograd.record():
                for data, lmks, angs, cate in zip(data_list, lmks_list,
                                                  angs_list, cate_list):
                    lmks_regs = net(data)
                    lmks_regs = nd.Flatten(lmks_regs)

                    lmks_loss = nd.square(lmks_regs - lmks)
                    lmks_loss = nd.sum(lmks_loss, axis=1)

                    #angs_loss = 1 - mx.nd.cos((angs_regs - angs))
                    #angs_loss = mx.nd.sum(angs_loss, axis=1)

                    loss = lmks_loss

                    #if with_angle:
                    #    loss = loss * angs_loss

                    if with_category:
                        loss = loss * cate

                    loss_list.append(loss)
Beispiel #10
0
def RecLoss(rec_x, x):
    x_reshape = x.reshape((0, -1))
    diff = nd.square(x_reshape - rec_x)
    return nd.mean(nd.sum(diff, axis=1))
def flowdr(dem_fill,NoData,rows,cols,ctx,switch):
    ingrid = np.indices((rows, cols))
    ingrid[0]        # row indices
    ingrid[1]        # column indices
    ingridxmx=nd.array(ingrid[1],ctx[0]).reshape((1,1,rows, cols))
    ingridymx=nd.array(ingrid[0],ctx[0]).reshape((1,1,rows, cols))
    dem_fillmx=nd.array(dem_fill,ctx[0])
    demmx=dem_fillmx.reshape((1,1,rows, cols))
    res=1
    l=[0,1,2,3,4,5,6,7,0]
    direct=[1,2,4,8,16,32,64,128]
    direct_d=[[1,3],[2,6],[4,12],[8,24],[16,48],[32,96],[64,192],[128,129]]
    weight=[None]*8
    weight1=[None]*8
    convx=[None]*8
    convy=[None]*8
    convz=[None]*8
    runlen=[1,ma.pow(2,0.5),1,ma.pow(2,0.5),1,ma.pow(2,0.5),1,ma.pow(2,0.5)]*res
    n = [[[] for x in range(3)] for x in range(8)]#create list to store normal vectors for each facet
    s = [None]*8
    d = [None]*8

    weight[0] = nd.array([[0, 0, 0], [0, 1, -1], [0, 0, 0]], gpu(0))
    weight[1] = nd.array([[0, 0, -1], [0, 1, 0], [0, 0, 0]], gpu(0))
    weight[2] = nd.array([[0, -1, 0], [0, 1, 0], [0, 0, 0]], gpu(0))
    weight[3] = nd.array([[-1, 0, 0], [0, 1, 0], [0, 0, 0]], gpu(0))
    weight[4] = nd.array([[0, 0, 0], [-1, 1, 0], [0, 0, 0]], gpu(0))
    weight[5] = nd.array([[0, 0, 0], [0, 1, 0], [-1, 0, 0]], gpu(0))
    weight[6] = nd.array([[0, 0, 0], [0, 1, 0], [0, -1, 0]], gpu(0))
    weight[7] = nd.array([[0, 0, 0], [0, 1, 0], [0, 0, -1]], gpu(0))
    
    weight1[0] = nd.array([[0, 0, 0], [0, 1, -10], [0, 0, 0]], gpu(0))
    weight1[1] = nd.array([[0, 0, -10], [0, 1, 0], [0, 0, 0]], gpu(0))
    weight1[2] = nd.array([[0, -10, 0], [0, 1, 0], [0, 0, 0]], gpu(0))
    weight1[3] = nd.array([[-10, 0, 0], [0, 1, 0], [0, 0, 0]], gpu(0))
    weight1[4] = nd.array([[0, 0, 0], [-10, 1, 0], [0, 0, 0]], gpu(0))
    weight1[5] = nd.array([[0, 0, 0], [0, 1, 0], [-10, 0, 0]], gpu(0))
    weight1[6] = nd.array([[0, 0, 0], [0, 1, 0], [0, -10, 0]], gpu(0))
    weight1[7] = nd.array([[0, 0, 0], [0, 1, 0], [0, 0, -10]], gpu(0))

    d0=nd.zeros((rows, cols),ctx[0],dtype='float32')
    dd=nd.zeros((rows, cols),ctx[0],dtype='float32')
    d_flat=nd.zeros((rows, cols),ctx[0],dtype='float32')
    flat=nd.zeros((rows, cols),ctx[0],dtype='float32')
    dep=nd.zeros((rows, cols),ctx[0],dtype='float32')
    high=nd.zeros((rows, cols),ctx[0],dtype='float32')
    fd=nd.zeros((rows, cols),ctx[0],dtype='float32')-999
    d_compact=nd.zeros((rows, cols),ctx[0],dtype='float32')-1

    for i in range(0,8):
        w=weight[i].reshape((1, 1, 3, 3))
        convz[i] = nd.Convolution(data=demmx, weight=w, kernel=(3,3), no_bias=True, num_filter=1,pad=(1,1),cudnn_tune='off')
        convz[i]=convz[i][0,0,:,:]
        if switch==1 or 3:
            convx[i] = nd.Convolution(data=ingridxmx, weight=w, kernel=(3,3), no_bias=True, num_filter=1,pad=(1,1),cudnn_tune='off')
            convy[i] = nd.Convolution(data=ingridymx, weight=w, kernel=(3,3), no_bias=True, num_filter=1,pad=(1,1),cudnn_tune='off')        
            convx[i]=convx[i][0,0,:,:]
            convy[i]=convy[i][0,0,:,:]
        
    if switch==1 or 3:
        for p in range(0,8):#8 facets from N-NE clockwise
            l0=l[p]
            l1=l[p+1]
            d[l0]=d0-999#Nodata value
            dmax=d0-999
            smax=d0-999
            n[l0][0]= convz[l0]*convy[l1]-convz[l1]*convy[l0]#nx
            n[l0][1]= convz[l0]*convx[l1]-convz[l1]*convx[l0]#ny
            n[l0][2]= convy[l0]*convx[l1]-convy[l1]*convx[l0]#nz
            #make boolean mask to determine direction d and slope s
            d[l0]=nd.where(condition=((n[l0][0]==0)*(n[l0][1]>=0)),x=d0,y=d[l0])

            d[l0]=nd.where(condition=((n[l0][0]==0)*(n[l0][1])<0),x=d0+ma.pi,y=d[l0])

            d[l0]=nd.where(condition=(n[l0][0]>0),x=ma.pi/2-nd.arctan(n[l0][1]/n[l0][0]),y=d[l0])

            d[l0]=nd.where(condition=(n[l0][0]<0),x=3*ma.pi/2-nd.arctan(n[l0][1]/n[l0][0]),y=d[l0])


            d[l0]=nd.where(condition=((convz[l0]<=0)*(convz[l1]<=0)),x=dmax,y=d[l0])

            s[l0]=-nd.tan(nd.arccos(n[l0][2]/(nd.sqrt(nd.square(n[l0][0])+nd.square(n[l0][1])+nd.square(n[l0][2])))))#slope of the triangular facet
            s[l0]=nd.where(condition=((convz[l0]<=0)*(convz[l1]<=0)),x=smax,y=s[l0])
            #Modify the scenario when the steepest slope is outside the 45 range of each facet
            dmax=nd.where(condition=((convz[l0]/runlen[l0]>=convz[l1]/runlen[l0])*(convz[l0]>0)),x=d0+ma.pi*l0/4,y=dmax)
            dmax=nd.where(condition=((convz[l0]/runlen[l0]<convz[l1]/runlen[l0])*(convz[l1]>0)),x=d0+ma.pi*(l0+1)/4,y=dmax)

            smax=nd.where(condition=((convz[l0]>=convz[l1])*(convz[l0]>0)),x=convz[l0]/runlen[l0],y=smax)
            smax=nd.where(condition=((convz[l0]<convz[l1])*(convz[l1]>0)),x=convz[l1]/runlen[l1],y=smax)
            d[l0]=nd.where(condition=((d[l0]<ma.pi*l0/4)+(d[l0]>ma.pi*l1/4)),x=dmax,y=d[l0])

            s[l0]=nd.where(condition=((d[l0]<ma.pi*l0/4)+(d[l0]>ma.pi*l1/4)),x=smax,y=s[l0])

            if switch==1:

                #flat and depressions indicator grid    

                flat=(convz[l0]==0)+flat
                dep=(convz[l0]<0)+dep
                high=(convz[l0]>0)+high

        for q in range(0,8):#check if the 45 degree range angles need to be maintaied, otherwise delete (set to NoData)
            l0=l[q]
            l1=l[q+1]
            l2=l[q-1]
            dmax=d0-999
            if q==0:
                dmax=nd.where(condition=(d[0]==d[1]),x=d[0],y=dmax)
                dmax=nd.where(condition=(d[0]==d[7]),x=d[0],y=dmax)
                d[0]=nd.where(condition=((d[0]==ma.pi*l0/4)+(d[0]==ma.pi*l1/4)),x=dmax,y=d[0])
            else:
                dmax=nd.where(condition=(d[l0]==d[l1]),x=d[l0],y=dmax)
                dmax=nd.where(condition=(d[l0]==d[l2]),x=d[l0],y=dmax)
                d[l0]=nd.where(condition=((d[l0]==ma.pi*l0/4)+(d[l0]==ma.pi*l1/4)),x=dmax,y=d[l0])
    #Check if flat or surface depression area. then lable with -1 or -10 respectively

    if switch==1:

        fd=nd.where(condition=(flat==8),x=d0-2,y=fd)#flats

        fd=nd.where(condition=(dep>=1)*(high==0),x=d0-3,y=fd)#high edge

        high_zero=nd.where(condition=(high==0),x=d0+1,y=d0)
    
    
    for j in range (0,8):
        if switch==1 or switch==2:
            d_flat=nd.where(condition=(convz[j]==0),x=d0+direct[j],y=d0)+d_flat
        
        if switch==1:
            flat_near=nd.where(condition=(convz[j]==0),x=d0+5,y=d0)
            dd1=high_zero+flat_near
            w=weight1[j].reshape((1, 1, 3, 3))
            dd1=dd1.reshape((1,1,rows, cols))
            conv_near= nd.Convolution(data=dd1, weight=w, kernel=(3,3), no_bias=True, num_filter=1,pad=(1,1),cudnn_tune='off')
            conv_near= conv_near[0,0,:,:]
            dd=nd.where(condition=(conv_near==-5)+(conv_near==-59)+(conv_near==-54)+(conv_near==-4),x=d0+1,y=d0)+dd

        if switch==1 or switch==3:
            d_compact=nd.where(condition=(d[j]==ma.pi*j/4),x=d0+direct_d[j][0],y=d_compact)
            d_compact=nd.where(condition=(d[j]>j*ma.pi/4)*(d[j]<(j+1)*ma.pi/4),x=d0+direct_d[j][1],y=d_compact)

    if switch==1 or switch==3:
        d_compact=nd.where(condition=(dem_fillmx==d0+NoData),x=d0-999,y=d_compact)#NoData        
    
    if switch==1:
        fd=nd.where(condition=(dd>=1)*(high>=1),x=d0-1,y=fd)#low edge
        fd=nd.where(condition=(dep==8),x=d0-10,y=fd)#lowest points in depressions
        return (fd.asnumpy(),d_compact.asnumpy(),d_flat.asnumpy())

    if switch==2:
        return (d_flat.asnumpy())
    if switch==3:
        return (d_compact.asnumpy())
Beispiel #12
0
 def Norm(x):
     return nd.sqrt(nd.sum(nd.square(x), axis=1, keepdims=True))
Beispiel #13
0
def _variance(a: nd.NDArray) -> nd.NDArray:
  """Compute variance of a of shape [n_samples, ...]."""
  mean = nd.mean(a, 0, keepdims=True)
  return nd.mean(nd.square(a - mean), 0)
Beispiel #14
0
def CapLoss(y_pred, y_true):
    L = y_true * nd.square(nd.maximum(0., 0.9 - y_pred)) + \
        0.5 * (1 - y_true) * nd.square(nd.maximum(0., y_pred - 0.1))
    return nd.mean(nd.sum(L, 1))
Beispiel #15
0
def total_variation_loss(x):
    """ regularize convolutional masks (not currently in use) """
    a = nd.square(x[:, :, :-1, :-1] - x[:, :, 1:, :-1])
    b = nd.square(x[:, :, :-1, :-1] - x[:, :, :-1, 1:])
    return nd.sum(nd.mean(nd.power(a + b, 1.25), axis=(2,3)))
Beispiel #16
0
 def diffusion_kernel(a, tmpt, dim):
     # return (4 * np.pi * tmpt)**(-dim / 2) * nd.exp(- nd.square(nd.arccos(a)) / tmpt)
     return nd.exp(-nd.square(nd.arccos(a)) / tmpt)
Beispiel #17
0
def loss(predictions, targets):
    # return -nd.mean(targets * nd.log(predictions))
    # return -nd.mean((targets * nd.log(predictions)) + ((1 - targets) * nd.log(1 - predictions)))
    return nd.mean(nd.square(predictions - targets))
Beispiel #18
0
def loss_fn(y_pred, y):
    return nd.mean(nd.square(y_pred - y))
Beispiel #19
0
    def fit(self, num_steps=1):
        """
    Fit the models

    Returns:
      Loss Functions (Q1-mse, Q2-mse, alpha-entropy, Policy-kl)
    """
        logger_data = {k: [] for k in ["LossPi", "LossQ1", "LossQ2", "LossV"]}
        for step in range(num_steps):
            # sample a batch from memory
            minibatch = self.memory.sample(self.batch_size)
            obs = nd.array(minibatch["obs"], self.ctx)
            acts = nd.array(minibatch["act"], self.ctx)
            rewards = nd.array(minibatch["rew"], self.ctx)
            next_obs = nd.array(minibatch["next_obs"], self.ctx)
            nonterm = nd.array(minibatch["nt"], self.ctx)

            lr = self.lr(self.steps) * self.lrmult

            # update the policy function
            with autograd.record():
                _mu, _pi, _logp_pi = self.policy(obs)
                _obspi = nd.concat(obs, _pi, dim=-1)
                _q1_pi = self.qfn1(_obspi)
                pi_loss = nd.mean(self.alpha * _logp_pi - _q1_pi)
                pi_loss.backward()
            self.mu.update(lr)
            self.logstd.update(lr)
            self.policy_base.update(lr)

            # update the value functions
            logp_pi = nd.stop_gradient(_logp_pi)
            obspi = nd.stop_gradient(_obspi)
            obsact = nd.concat(obs, acts, dim=-1)
            q1_pi = self.qfn1(obspi)
            q2_pi = self.qfn2(obspi)
            min_q_pi = nd.minimum(q1_pi, q2_pi)
            v_targ = self.vfn_targ(next_obs)
            q_backup = nd.stop_gradient(rewards +
                                        self.gamma * nonterm * v_targ)
            v_backup = nd.stop_gradient(min_q_pi - self.alpha * logp_pi)
            with autograd.record():
                _q1 = self.qfn1(obsact)
                _q2 = self.qfn2(obsact)
                _v = self.vfn(obs)

                q1_loss = 0.5 * nd.mean(nd.square(q_backup - _q1))
                q2_loss = 0.5 * nd.mean(nd.square(q_backup - _q2))
                v_loss = 0.5 * nd.mean(nd.square(v_backup - _v))
                total_loss = q1_loss + q2_loss + v_loss
                total_loss.backward()
            self.qfn1.update(lr)
            self.qfn2.update(lr)
            self.vfn.update(lr)

            # update the target network
            for i in range(len(self.vfn.weights)):
                self.vfn_targ.weights[i][:] = \
                    self.polyak * self.vfn_targ.weights[i][:] + \
                    (1 - self.polyak) * self.vfn.weights[i][:]

            logger_data["LossPi"].append(pi_loss.asnumpy()[0])
            logger_data["LossQ1"].append(q1_loss.asnumpy()[0])
            logger_data["LossQ2"].append(q2_loss.asnumpy()[0])
            logger_data["LossV"].append(v_loss.asnumpy()[0])
        return logger_data
 def forward(self, x):
     x = nd.sqrt(nd.sum(nd.square(x), 1))
     return x
Beispiel #21
0
 def neglogp(action, mean, logstd):
     assert (mean.shape[-1] == logstd.shape[-1])
     std = nd.exp(logstd) + 1e-8
     return 0.5 * nd.sum(nd.square((action - mean) / std), axis=-1) \
         + 0.5 * np.log(2.0 * np.pi) * action.shape[-1] \
         + nd.sum(logstd, axis=-1)
Beispiel #22
0
     if len(grads_list) >= args.nworkers:
         accumulate_param = 0
         accumulate_grad = 0
         if model_idx != len(params_prev_list) - 1:
             grads_prev = grads_list[-1]
             params_prev = params_prev_list[-1]
         else:
             grads_prev = grads_list[-2]
             params_prev = params_prev_list[-2]
         for param, param_prev, grad_prev in zip(
                 net.collect_params().values(), params_prev,
                 grads_prev):
             if param.grad_req != 'null':
                 grad_current = param.grad()
                 param_current = param.data()
                 accumulate_param = accumulate_param + nd.square(
                     param_current - param_prev).sum()
                 accumulate_grad = accumulate_grad + nd.square(
                     grad_current - grad_prev).sum()
         lips = math.sqrt(accumulate_grad.asscalar()) / math.sqrt(
             accumulate_param.asscalar())
         if lips <= np.quantile(lips_list, quantile_q):
             byz_flag = False
             accept_counter = accept_counter + 1
         nd.waitall()
     else:
         byz_flag = False
         accept_counter = accept_counter + 1
 elif args.byz_test == 'zeno++':
     zeno_max_delay = args.zeno_delay
     zeno_rho = args.rho
     zeno_epsilon = args.epsilon
Beispiel #23
0
def squash(x, axis):

    s_squared_norm = nd.sum(nd.square(x), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / nd.sqrt(s_squared_norm +
                                                            1e-5)
    return scale * x
    def _update_params(self, accumulated_grads):
        # scale gradients by lot size, add noise, and update the parameters
        for param_name, param in self._params.items():
            # average the clipped gradients and then add noise to each averaged gradient
            param_grad_update = (accumulated_grads[param_name] / self._hyperparams['lot_size']) + \
                                mx.random.normal(0, self._hyperparams['sigma'], param.shape, ctx=self._model_ctx)

            # update biased first moment estimate
            self._m[param_name] = self._hyperparams['beta_1'] * self._m[param_name] + (1 - self._hyperparams['beta_1']) * param_grad_update

            # update biased second raw moment estimate
            self._v[param_name] = self._hyperparams['beta_2'] * self._v[param_name] + (1 - self._hyperparams['beta_2']) * nd.square(param_grad_update)

            # compute bias-corrected first moment estimate
            m_hat = self._m[param_name] / (1 - nd.power(self._hyperparams['beta_1'], self._step + 1))

            # compute bias-corrected second raw moment estimate
            v_hat = self._v[param_name] / (1 - nd.power(self._hyperparams['beta_2'], self._step + 1))

            # update params with ADAM
            param[:] = param - self._hyperparams['lr'] * m_hat / (nd.sqrt(v_hat) + 1e-8)
Beispiel #25
0
def Squash(vector, axis):
    norm = nd.sum(nd.square(vector), axis, keepdims=True)
    v_j = norm / (1 + norm) / nd.sqrt(norm, keepdims=True) * vector
    return v_j
def my_l2_loss(X, Y):
    num_instances = X.shape[0]
    return nd.sum(nd.square(X - Y)) / (2 * num_instances)
Beispiel #27
0
 def forward(self, x):
     out = nd.sqrt(nd.sum(nd.square(x), self.axis))
     return out
Beispiel #28
0
    def forward(self, cls_pred, ori_pred, box_pred, cls_target, ori_target,
                box_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        cls_pred, ori_pred, box_pred, cls_target, box_target = [_as_list(x) \
            for x in (cls_pred, ori_pred, box_pred, cls_target, box_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, op, bp, ct, bt in zip(
                *[cls_pred, ori_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1 and self._min_hard_negatives < 1:
            # no positive samples and no hard negatives, return dummy losses
            cls_losses = [nd.sum(cp * 0) for cp in cls_pred]
            ori_losses = [nd.sum(op * 0) for op in ori_pred]
            box_losses = [nd.sum(bp * 0) for bp in box_pred]
            sum_losses = [
                nd.sum(cp * 0) + nd.sum(op * 0) + nd.sum(bp * 0)
                for cp, op, bp in zip(cls_pred, ori_pred, box_pred)
            ]
            return sum_losses, cls_losses, ori_losses, box_losses

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        ori_losses = []
        box_losses = []
        sum_losses = []
        for cp, op, bp, ct, ot, bt in zip(
                *
            [cls_pred, ori_pred, box_pred, cls_target, ori_target, box_target
             ]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(
                self._min_hard_negatives,
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss,
                                nd.zeros_like(cls_loss))
            cls_losses.append(
                nd.sum(cls_loss, axis=0, exclude=True) / max(1., num_pos_all))

            pred = nd.log_softmax(op, axis=-1)
            pos = ot > 0
            ori_loss = -nd.pick(pred, ot, axis=-1, keepdims=False)
            rank = (ori_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < nd.maximum(
                self._min_hard_negatives,
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            ori_loss = nd.where((pos + hard_negative) > 0, ori_loss,
                                nd.zeros_like(ori_loss))
            ori_losses.append(
                nd.sum(ori_loss, axis=0, exclude=True) / max(1., num_pos_all))

            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho,
                                box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(
                nd.sum(box_loss, axis=0, exclude=True) / max(1., num_pos_all))
            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1])
            sum_losses.append(ori_losses[-1] + self._lambd * box_losses[-1])

        return sum_losses, cls_losses, ori_losses, box_losses
Beispiel #29
0
    def forward(self, cls_pred, box_pred, coef_center_pred, coef_pred,
                cls_target, box_target, coef_center_target, coef_target):
        """Compute loss in entire batch across devices."""
        # require results across different devices at this time
        # print(cls_pred[0].shape, box_pred[0].shape, coef_center_pred[0].shape)
        cls_pred, box_pred, coef_center_pred, coef_pred, cls_target, box_target, coef_center_target, coef_target = [_as_list(x) \
            for x in (cls_pred, box_pred, coef_center_pred, coef_pred, cls_target, box_target, coef_center_target, coef_target)]
        # cross device reduction to obtain positive samples in entire batch
        num_pos = []
        for cp, bp, ct, bt in zip(
                *[cls_pred, box_pred, cls_target, box_target]):
            pos_samples = (ct > 0)
            num_pos.append(pos_samples.sum())
        num_pos_all = sum([p.asscalar() for p in num_pos])
        if num_pos_all < 1:
            # no positive samples found, return dummy losses
            return nd.zeros((1, )), nd.zeros((1, )), nd.zeros((1, )), nd.zeros(
                (1, )), nd.zeros((1, ))

        # compute element-wise cross entropy loss and sort, then perform negative mining
        cls_losses = []
        box_losses = []
        coef_center_losses = []
        coef_losses = []
        sum_losses = []
        for cp, bp, coefcp, coefp, ct, bt, coefct, coeft in zip(*[
                cls_pred, box_pred, coef_center_pred, coef_pred, cls_target,
                box_target, coef_center_target, coef_target
        ]):
            pred = nd.log_softmax(cp, axis=-1)
            pos = ct > 0
            cls_loss = -nd.pick(pred, ct, axis=-1, keepdims=False)
            rank = (cls_loss * (pos - 1)).argsort(axis=1).argsort(axis=1)
            hard_negative = rank < (
                pos.sum(axis=1) * self._negative_mining_ratio).expand_dims(-1)
            # mask out if not positive or negative
            cls_loss = nd.where((pos + hard_negative) > 0, cls_loss,
                                nd.zeros_like(cls_loss))
            cls_losses.append(
                nd.sum(cls_loss, axis=0, exclude=True) / num_pos_all)

            # print(bp.shape, bt.shape)
            bp = _reshape_like(nd, bp, bt)
            box_loss = nd.abs(bp - bt)
            box_loss = nd.where(box_loss > self._rho,
                                box_loss - 0.5 * self._rho,
                                (0.5 / self._rho) * nd.square(box_loss))
            # box loss only apply to positive samples
            box_loss = box_loss * pos.expand_dims(axis=-1)
            box_losses.append(
                nd.sum(box_loss, axis=0, exclude=True) / num_pos_all)

            coefcp = _reshape_like(nd, coefcp, coefct)
            coef_center_loss = nd.abs(coefcp - coefct)
            coef_center_loss = nd.where(coef_center_loss > self._rho,
                                        coef_center_loss - 0.5 * self._rho,
                                        (0.5 / self._rho) *
                                        nd.square(coef_center_loss))
            coef_center_loss = coef_center_loss * pos.expand_dims(axis=-1)
            coef_center_losses.append(
                nd.sum(coef_center_loss, axis=0, exclude=True) / num_pos_all)

            coefp = _reshape_like(nd, coefp, coeft)
            coef_loss = nd.abs(coefp, coeft)
            coef_loss = nd.where(coef_loss > self._rho,
                                 coef_loss - 0.5 * self._rho,
                                 (0.5 / self._rho) * nd.square(coef_loss))
            coef_loss = coef_loss * pos.expand_dims(axis=-1)
            coef_losses.append(
                nd.sum(coef_loss, axis=0, exclude=True) / num_pos_all)

            sum_losses.append(cls_losses[-1] + self._lambd * box_losses[-1] +
                              coef_losses[-1] + coef_center_losses[-1])

        return sum_losses, cls_losses, box_losses, coef_center_losses, coef_losses