Example #1
0
def batch_pix_accuracy(output, target, ignore_bg=False):
    """PixAcc"""
    # inputs are NDarray, output 4D, target 3D
    predict = F.argmax(output, 1)
    target = target.astype(predict.dtype)
    if ignore_bg:
        pixel_labeled = (target > 0).sum().asscalar()
        pixel_correct = (F.equal(predict, target) *
                         (target > 0.0)).sum().asscalar()
    else:
        pixel_labeled = (target >= 0).sum().asscalar()
        pixel_correct = (F.equal(predict, target) *
                         (target >= 0.0)).sum().asscalar()
    return pixel_correct, pixel_labeled
Example #2
0
def batch_pix_accuracy(output, target):
    """PixAcc"""
    # inputs are NDarray, output 4D, target 3D
    predict = F.argmax(output, 1) + 1
    target = target.astype(predict.dtype) + 1
    pixel_labeled = (target > 0).sum().asscalar()
    pixel_correct = (F.equal(predict, target)*(target > 0)).sum().asscalar()
    return pixel_correct, pixel_labeled
Example #3
0
    def hybrid_forward(self, F, fts, ys, ftt, yt):
        """
        Semantic Alignment Loss
        :param F: Function
        :param yt: label for the target domain [N]
        :param ftt: features for the target domain [N, K]
        :param ys: label for the source domain [M]
        :param fts: features for the source domain [M, K]
        :return:
        """
        if self._fn:
            # Normalize ft
            fts = F.L2Normalization(fts, mode='instance')
            ftt = F.L2Normalization(ftt, mode='instance')

        fts_rpt = F.broadcast_to(fts.expand_dims(axis=0),
                                 shape=(self._bs_tgt, self._bs_src,
                                        self._embed_size))
        ftt_rpt = F.broadcast_to(ftt.expand_dims(axis=1),
                                 shape=(self._bs_tgt, self._bs_src,
                                        self._embed_size))

        dists = F.sum(F.square(ftt_rpt - fts_rpt), axis=2)

        yt_rpt = F.broadcast_to(yt.expand_dims(axis=1),
                                shape=(self._bs_tgt,
                                       self._bs_src)).astype('int32')
        ys_rpt = F.broadcast_to(ys.expand_dims(axis=0),
                                shape=(self._bs_tgt,
                                       self._bs_src)).astype('int32')

        y_same = F.equal(yt_rpt, ys_rpt).astype('float32')
        y_diff = F.not_equal(yt_rpt, ys_rpt).astype('float32')

        intra_cls_dists = dists * y_same
        inter_cls_dists = dists * y_diff

        max_dists = F.max(dists, axis=1, keepdims=True)
        max_dists = F.broadcast_to(max_dists,
                                   shape=(self._bs_tgt, self._bs_src))
        revised_inter_cls_dists = F.where(y_same, max_dists, inter_cls_dists)

        max_intra_cls_dist = F.max(intra_cls_dists, axis=1)
        min_inter_cls_dist = F.min(revised_inter_cls_dists, axis=1)

        loss = F.relu(max_intra_cls_dist - min_inter_cls_dist + self._margin)

        return loss
Example #4
0
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        #tmp_ctx = self._ctx_cpu
        tmp_ctx = self._ctx_single_gpu
        fc7_outs = []
        ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context)))
        #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu())
        arcface_module_outputs = []
        for i, _module in enumerate(self._arcface_modules):
          #_fc7 = _module.get_outputs(merge_multi_context=True)[0]
          out = _module.get_outputs(merge_multi_context=True)
          #print(out[0].shape)
          #print(out[1].shape)
          arcface_module_outputs.append(out)
          _fc7 = out[0]
          fc7_outs.append(_fc7)
          _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx)
          ctx_fc7_max[:,i] = _fc7_max

        local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1))
        nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max)
        global_fc7_max = local_fc7_max
        #local_fc7_sum = None
        local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1))
        local_fc7_sum[:,:] = 0.0
        for i, _module in enumerate(self._arcface_modules):
          _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max)
          fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max)
          fc7_outs[i] = nd.exp(fc7_outs[i])
          _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx)
          local_fc7_sum += _sum
        global_fc7_sum = local_fc7_sum

        if self._iter%self._verbose==0:
          #_ctx = self._context[-1]
          _ctx = self._ctx_cpu
          _probs = []
          for i, _module in enumerate(self._arcface_modules):
            _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i])
            _probs.append(_prob)
          fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context)))
          nd.concat(*_probs, dim=1, out=fc7_prob)
          fc7_pred = nd.argmax(fc7_prob, axis=1)
          local_label = self.global_label - self._local_class_start
          #local_label = self.get_ndarray2(_ctx, 'test_label', local_label)
          _pred = nd.equal(fc7_pred, local_label)
          print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0])


        #local_fc1_grad = []
        #fc1_grad_ctx = self._ctx_cpu
        fc1_grad_ctx = self._ctx_single_gpu
        local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size))
        local_fc1_grad[:,:] = 0.0
        total_eloss = []
        celoss_verbose = 1000
        if self._iter%celoss_verbose==0:
          fc7_celoss = self.get_ndarray(tmp_ctx, 'test_fc7_celoss', (self._batch_size,))
          fc7_celoss[:] = 0.0

        for i, _module in enumerate(self._arcface_modules):
          _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum)
          fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum)
          a = i*self._ctx_num_classes
          b = (i+1)*self._ctx_num_classes
          _label = self.global_label - self._ctx_class_start[i]
          _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label)
          onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes))
          nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label)
          #print(fc7_outs[i].shape, onehot_label.shape)

          if self._iter%celoss_verbose==0:
            _ce_loss = fc7_outs[i] * onehot_label
            _ce_loss = nd.sum(_ce_loss, axis=1)
            fc7_celoss += _ce_loss.as_in_context(tmp_ctx)
          fc7_outs[i] -= onehot_label

          out = arcface_module_outputs[i]
          out_grads = [fc7_outs[i]]
          for j in range(1, len(out)):
              eloss = out[j]
              #print('eloss%d:'%j, eloss.shape)
              #print(out_grads[0].shape)
              #egrad_shape = (out_grads[0].shape[0], eloss.shape[0])
              egrad_shape = eloss.shape
              egrad = self.get_ndarray(fc7_outs[i].context, 'egrad%d'%j, egrad_shape)
              #egrad[:][:] = 1.0/egrad_shape[0]
              egrad[:][:] = 1.0
              out_grads.append(egrad)
              if self._iter%self._verbose==0:
                  total_eloss.append(np.mean(eloss.asnumpy()))

          _module.backward(out_grads = out_grads)
          #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu())
          ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0])
          local_fc1_grad += ctx_fc1_grad

        if self._iter%self._verbose==0 and len(total_eloss)>0:
          print('{eloss}', self._iter, np.mean(total_eloss))
        #if self._iter%self._verbose==0:
        if self._iter%celoss_verbose==0:
          ce_loss = nd.log(fc7_celoss) * -1.0
          ce_loss = nd.mean(ce_loss)
          print('CELOSS,%d,%f'% (self._iter, ce_loss.asnumpy()))

        global_fc1_grad = local_fc1_grad
        self._curr_module.backward(out_grads = [global_fc1_grad])
Example #5
0
def array_equal(a, b):
    return nd.equal(a, b).asnumpy().all()
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        #tmp_ctx = self._ctx_cpu
        tmp_ctx = self._ctx_single_gpu
        fc7_outs = []
        ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context)))
        #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu())
        for i, _module in enumerate(self._arcface_modules):
          _fc7 = _module.get_outputs(merge_multi_context=True)[0]
          fc7_outs.append(_fc7)
          _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx)
          ctx_fc7_max[:,i] = _fc7_max

        local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1))
        nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max)
        global_fc7_max = local_fc7_max
        #local_fc7_sum = None
        local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1))
        local_fc7_sum[:,:] = 0.0
        for i, _module in enumerate(self._arcface_modules):
          _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max)
          fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max)
          fc7_outs[i] = nd.exp(fc7_outs[i])
          _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx)
          local_fc7_sum += _sum
        global_fc7_sum = local_fc7_sum

        if self._iter%self._verbose==0:
          #_ctx = self._context[-1]
          _ctx = self._ctx_cpu
          _probs = []
          for i, _module in enumerate(self._arcface_modules):
            _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i])
            _probs.append(_prob)
          fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context)))
          nd.concat(*_probs, dim=1, out=fc7_prob)
          fc7_pred = nd.argmax(fc7_prob, axis=1)
          local_label = self.global_label - self._local_class_start
          #local_label = self.get_ndarray2(_ctx, 'test_label', local_label)
          _pred = nd.equal(fc7_pred, local_label)
          print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0])


        #local_fc1_grad = []
        #fc1_grad_ctx = self._ctx_cpu
        fc1_grad_ctx = self._ctx_single_gpu
        local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size))
        local_fc1_grad[:,:] = 0.0

        loss = nd.zeros(shape=(self._batch_size), ctx=self._ctx_cpu)
        for i, _module in enumerate(self._arcface_modules):
          _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum)
          fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum)
          a = i*self._ctx_num_classes
          b = (i+1)*self._ctx_num_classes
          _label = self.global_label - self._ctx_class_start[i]
          _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label)
          onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes))
          nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label)
          
          #for debug
          loss -= (mx.nd.sum(mx.nd.log(fc7_outs[i]) * onehot_label, axis=1)).as_in_context(self._ctx_cpu)
          fc7_outs[i] -= onehot_label
          _module.backward(out_grads = [fc7_outs[i]])
          print('for debug, fc7 outs max is ', i, mx.nd.max(fc7_outs[i]))
          print('for debug, fc7 outs min is ', i, mx.nd.min(fc7_outs[i]))
          #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu())
          ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0])
          local_fc1_grad += ctx_fc1_grad
          print('for debug, global fc1_grad max is ', i, mx.nd.max(ctx_fc1_grad))
          print('for debug, ctx fc1 grad shape, ', ctx_fc1_grad.shape)

        global_fc1_grad = local_fc1_grad
        #  global_fc1_grad = mx.nd.clip(local_fc1_grad, a_min=-15, a_max=15)
        print('for debug, after clip global fc1_grad max is ', mx.nd.max(global_fc1_grad))
        self._curr_module.backward(out_grads = [global_fc1_grad])
        # for debug
        return mx.nd.sum(loss)
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        ## ============= forward classifier layer ===========
        fc7_outs = []
        for i, _module in enumerate(self._arcface_modules):
            _fc7 = _module.get_outputs(merge_multi_context=True)[0]
            fc7_outs.append(_fc7)

        ctx_max = map(
            lambda fc7_out: nd.max(fc7_out, axis=1, keepdims=True).
            as_in_context(self._ctx_single_gpu), fc7_outs)
        local_fc7_max = nd.max(nd.concat(*ctx_max, dim=1),
                               axis=1,
                               keepdims=True)
        fc7_exps = list(
            map(
                lambda fc7_out: nd.exp(fc7_out - local_fc7_max.as_in_context(
                    fc7_out.context)), fc7_outs))
        ctx_sum = map(
            lambda fc7_out: nd.sum(fc7_out, axis=1, keepdims=True).
            as_in_context(self._ctx_single_gpu), fc7_exps)
        exp_sum = nd.sum(nd.concat(*ctx_sum, dim=1), axis=1, keepdims=True)
        softmax_outs = list(
            map(
                lambda fc7_exp: nd.broadcast_div(
                    fc7_exp, exp_sum.as_in_context(fc7_exp.context)),
                fc7_exps))

        onehot_device_labels = [
            nd.one_hot((self.global_label).as_in_context(device) -
                       self._ctx_class_start[i],
                       depth=self._ctx_num_classes,
                       on_value=1.0,
                       off_value=0.0) for i, device in enumerate(self._context)
        ]

        ## ============= verbose train accuracy and loss ===========
        if self._iter % self._verbose == 0:
            local_label = self.global_label - self._local_class_start

            fc7_pred = self.parall_argmax(softmax_outs, self._ctx_single_gpu)
            _pred = nd.equal(fc7_pred, local_label).asnumpy()[0]

            loss = self.parall_loss(softmax_outs, onehot_device_labels,
                                    self._ctx_single_gpu).asscalar()
            assert not math.isnan(loss)

            self.logger.info(
                '[Iter {}] train acc : {}, total loss : {}'.format(
                    self._iter, np.mean(_pred), loss))

        ## ============= backward large weight classifier layer with gradient ===========
        local_fc1_grad = self.get_ndarray_by_shape(
            self._ctx_single_gpu, 'local_fc1_grad',
            (self._batch_size, self._emb_size))
        local_fc1_grad[:, :] = 0.0
        for i, _module in enumerate(self._arcface_modules):
            _module.backward(
                out_grads=[softmax_outs[i] - onehot_device_labels[i]])
            ctx_fc1_grad = self.get_ndarray_by_v_arr(
                self._ctx_single_gpu, 'ctx_fc1_grad_%d' % i,
                _module.get_input_grads()[0])
            local_fc1_grad += ctx_fc1_grad

        ## ============= backward backbone ===============
        global_fc1_grad = local_fc1_grad
        self._backbone_module.backward(out_grads=[global_fc1_grad])