Esempio n. 1
0
    def hybrid_forward(self, F, fts, ys, ftt, yt):
        """
        Semantic Alignment Loss
        :param F: Function
        :param yt: label for the target domain [N]
        :param ftt: features for the target domain [N, K]
        :param ys: label for the source domain [M]
        :param fts: features for the source domain [M, K]
        :return:
        """
        if self._fn:
            # Normalize ft
            fts = F.L2Normalization(fts, mode='instance')
            ftt = F.L2Normalization(ftt, mode='instance')

        fts_rpt = F.broadcast_to(fts.expand_dims(axis=0),
                                 shape=(self._bs_tgt, self._bs_src,
                                        self._embed_size))
        ftt_rpt = F.broadcast_to(ftt.expand_dims(axis=1),
                                 shape=(self._bs_tgt, self._bs_src,
                                        self._embed_size))

        dists = F.sum(F.square(ftt_rpt - fts_rpt), axis=2)

        yt_rpt = F.broadcast_to(yt.expand_dims(axis=1),
                                shape=(self._bs_tgt,
                                       self._bs_src)).astype('int32')
        ys_rpt = F.broadcast_to(ys.expand_dims(axis=0),
                                shape=(self._bs_tgt,
                                       self._bs_src)).astype('int32')

        y_same = F.equal(yt_rpt, ys_rpt).astype('float32')
        y_diff = F.not_equal(yt_rpt, ys_rpt).astype('float32')

        intra_cls_dists = dists * y_same
        inter_cls_dists = dists * y_diff

        max_dists = F.max(dists, axis=1, keepdims=True)
        max_dists = F.broadcast_to(max_dists,
                                   shape=(self._bs_tgt, self._bs_src))
        revised_inter_cls_dists = F.where(y_same, max_dists, inter_cls_dists)

        max_intra_cls_dist = F.max(intra_cls_dists, axis=1)
        min_inter_cls_dist = F.min(revised_inter_cls_dists, axis=1)

        loss = F.relu(max_intra_cls_dist - min_inter_cls_dist + self._margin)

        return loss
    def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
        data = in_data[0]
        rois = in_data[1]
        BS, C, H, W = data.shape
        N = rois.shape[0]
        dout = out_grad[0]
        ddata = nd.zeros_like(data)

        rois = rois.asnumpy()
        for i in range(N):
            roi = rois[i]
            batch_id = roi[0].astype(np.int64)
            x1, y1, x2, y2 = roi[1:] * self.spatial_scale
            x1, y1, x2, y2 = np.floor(x1), np.floor(y1), np.ceil(x2), np.ceil(
                y2)
            x1, y1, x2, y2 = np.clip(x1, 0, W), np.clip(y1, 0, H), np.clip(
                x2, 0, W), np.clip(y2, 0, H)
            x1, y1, x2, y2 = x1.astype(np.int64), y1.astype(
                np.int64), x2.astype(np.int64), y2.astype(np.int64)
            if x1 >= x2 or y1 >= y2:
                continue
            h = y2 - y1
            w = x2 - x1
            # (C, h, w)
            roi_data = data[batch_id, :, y1:y2, x1:x2]
            # (h*w, C)
            roi_data = roi_data.reshape((C, -1)).transpose((1, 0))
            # (h*w, C, 1)
            roi_data = roi_data.reshape((0, 0, 1))
            # (h*w, C, C)
            out_product = nd.batch_dot(roi_data, roi_data.transpose((0, 2, 1)))
            # (C, C)
            if self.type == "max":
                reduce_product = nd.max(out_product, axis=0)
                max_mask = out_product == reduce_product
                # max_index = nd.argmax(out_product, axis=0)
                # max_index = max_index.reshape((C * C))
                # d_max = nd.eye(h*w)[max_index].transpose((1, 0)).reshape((h*w, C, C))
                dout_product = nd.stack(*[dout[i]
                                          for _ in range(h * w)]) * max_mask
            elif self.type == "mean":
                dout_product = nd.stack(*[dout[i]
                                          for _ in range(h * w)]) / (h * w)
            else:
                raise NotImplementedError()

            droi_data = []
            for j in range(C):
                droi_data.append(
                    nd.sum(dout_product[:, j, :] * roi_data[:, :, 0], axis=1) +
                    nd.sum(dout_product[:, :, j] * roi_data[:, :, 0], axis=1))
            droi_data = nd.stack(*droi_data, axis=1)  # (hw, C)
            droi_data = droi_data.transpose((1, 0)).reshape((C, h, w))
            ddata[batch_id, :, y1:y2, x1:x2] = droi_data
        self.assign(in_grad[0], req[0], ddata)
        self.assign(in_grad[1], req[1], nd.zeros_like(in_data[1]))
Esempio n. 3
0
    def hybrid_forward(self, F, x):
        x = self.first_conv(x)
        x = self.feature(x)
        x = self.conv_last(x)
        x = self.globalpool(x)
        x = self.LastSE(x)
        x = x.reshape(-1, 1280)

        x = self.fc(x)
        x = self.dropout(x)
        x = self.classifier(x)
        x = F.max(x)
        return x
def rgb_to_lab(image_srgb, ctx=None):

    if ctx is None:
        raise ValueError("ctx can not be None")

    if image_srgb is None:
        raise ValueError("image_srgb can not be None")

    with mx.Context(ctx):

        srgb = __check_image(image_srgb)

        if nd.max(srgb).asscalar() > 1:
            srgb = __normalize_rgb_image(srgb)

        srgb_pixels = nd.reshape(srgb, [-1, 3])

        linear_mask = nd.cast(srgb_pixels <= 0.04045, dtype='float32')
        exponential_mask = nd.cast(srgb_pixels > 0.04045, dtype='float32')
        rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask
        rgb_to_xyz = nd.array([
            #    X        Y          Z
            [0.412453, 0.212671, 0.019334],  # R
            [0.357580, 0.715160, 0.119193],  # G
            [0.180423, 0.072169, 0.950227],  # B
        ])
        xyz_pixels = nd.linalg_gemm2(rgb_pixels, rgb_to_xyz)

        # https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
        # convert to fx = f(X/Xn), fy = f(Y/Yn), fz = f(Z/Zn)
        # normalize for D65 white point
        xyz_normalized_pixels = nd.multiply(xyz_pixels, nd.array([1 / 0.950456, 1.0, 1 / 1.088754]))

        epsilon = 6 / 29
        linear_mask = nd.cast(xyz_normalized_pixels <= (epsilon ** 3), dtype='float32')
        exponential_mask = nd.cast(xyz_normalized_pixels > (epsilon ** 3), dtype='float32')
        fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon ** 2) + 4 / 29) * linear_mask + (
                                                                                                  xyz_normalized_pixels ** (
                                                                                                  1 / 3)) * exponential_mask
            # convert to lab
        fxfyfz_to_lab = nd.array([
                #  l       a       b
                [0.0, 500.0, 0.0],  # fx
                [116.0, -500.0, 200.0],  # fy
                [0.0, 0.0, -200.0],  # fz
            ])
        lab_pixels = nd.linalg_gemm2(fxfyfz_pixels, fxfyfz_to_lab) + nd.array([-16.0, 0.0, 0.0])

        return nd.reshape(lab_pixels, srgb.shape)
    def forward(self, is_train=False):
        """Run forward on the current executor."""
        #self.curr_execgrp.forward(is_train=is_train)

        self.get_each_gpu_label()

        # l2-norm forward
        self.weight_norm = nd.L2Normalization(self.weight, mode='instance')

        # fc forward
        no_bias = True
        if no_bias:
            nd.FullyConnected(data=self.data_batch,
                              weight=self.weight_norm,
                              no_bias=True,
                              num_hidden=self.classes,
                              out=self.fc_output)
        else:
            nd.FullyConnected(data=self.data_batch,
                              weight=self.weight_norm,
                              bias=self.bias,
                              num_hidden=self.classes,
                              out=self.fc_output)
        # margin forward
        self.get_each_gpu_label()
        if self.data_of_cur_gpu.size > 0:
            margin_temp = self.fc_output[self.data_of_cur_gpu,
                                         self.label_of_cur_gpu]
            self.pick_fc_of_cur_gpu = margin_temp.copy()
            tem_data = self.margin_loss(self.pick_fc_of_cur_gpu)
            self.fc_output[self.data_of_cur_gpu,
                           self.label_of_cur_gpu] = tem_data[:]
        else:
            self.pick_fc_of_cur_gpu = None

        # softmax forward
        # first allreduce sum
        sum_fc = nd.sum(nd.exp(self.fc_output), axis=1)
        sum_fc = self.allreduce('global_sum_fc', sum_fc)
        assert len(sum_fc) > 0, "rank:{}, sum_fc".format(self.rank)
        self.global_sum_fc[:] = sum_fc[:]
        # second allreduce max
        max_fc = nd.max(self.fc_output, axis=1)
        max_fc = self.allreduce('global_max_fc',
                                max_fc,
                                op=perseus.PerseusOp.Max)
        assert len(max_fc) > 0, "rank:{}, max_fc".format(self.rank)
        self.global_max_fc[:] = max_fc[:]
Esempio n. 6
0
    def _forward_alg(self, feats, lens_):

        batch_size = feats.shape[0]
        tagset_size = feats.shape[2]
        length = feats.shape[1]

        init_alphas = nd.full((self.tagset_size, ), -10000.)
        init_alphas[self.tag_dictionary.get_idx_for_item(START_TAG)] = 0.

        forward_var_list = [init_alphas.tile((feats.shape[0], 1))]
        transitions = self.transitions.data().expand_dims(0).tile(
            (feats.shape[0], 1, 1))

        for i in range(feats.shape[1]):
            emit_score = feats[:, i, :]

            tag_var = \
                emit_score.expand_dims(2).tile((1, 1, transitions.shape[2])) + \
                transitions + \
                forward_var_list[i].expand_dims(2).tile((1, 1, transitions.shape[2])).transpose([0, 2, 1])

            max_tag_var = nd.max(tag_var, axis=2)

            new_tag_var = tag_var - max_tag_var.expand_dims(2).tile(
                (1, 1, transitions.shape[2]))

            agg_ = nd.log(nd.sum(nd.exp(new_tag_var), axis=2))

            forward_var_list.append(
                nd.full((feats.shape[0], feats.shape[2]),
                        val=max_tag_var + agg_))

            # cloned = forward_var.clone()
            # forward_var[:, i + 1, :] = max_tag_var + agg_

            # forward_var = cloned

        forward_var = nd.stack(*forward_var_list)[
            lens_,
            nd.array(list(range(feats.shape[0])), dtype='int32'), :]

        terminal_var = forward_var + \
                       self.transitions.data()[self.tag_dictionary.get_idx_for_item(STOP_TAG)].expand_dims(0).tile((
                           forward_var.shape[0], 1))

        alpha = log_sum_exp_batch(terminal_var)

        return alpha
def make_grid(image_tensor, rows):
    cols = image_tensor.shape[0] // rows
    if image_tensor.ndim == 2:
        image_tensor = image_tensor.reshape(-1, 1, 28, 28)
    if image_tensor.ndim != 4:
        raise ValueError(f"Image tensor has wrong dimension. Expected 4, actual {image_tensor.ndim}")
    n, c, h, w = image_tensor.shape
    image_tensor = (image_tensor + 1) / 2
    assert nd.max(image_tensor) <= 1
    assert nd.min(image_tensor) >= 0
    grid = image_tensor.reshape(rows, cols, c, h, w)
    grid = grid.transpose(axes=(0, 3, 1, 4, 2))
    grid = grid.reshape(rows * h, cols * w, c).asnumpy()
    if grid.ndim == 3 and grid.shape[2] == 1:
        grid = grid.squeeze()
    return grid
def bilinear_roi_pooling(data, rois, spatial_scale, type="max"):
    """

    :param data: (BS, C, H, W)
    :param rois: (N, 5)
    :param spatial_scale: float
    :param type:
    :return:
    """
    assert isinstance(spatial_scale, float)
    BS, C, H, W = data.shape
    N = rois.shape[0]
    out_data = []
    rois = rois.asnumpy()
    for i in range(N):
        roi = rois[i]
        batch_id = roi[0].astype(np.int64)
        x1, y1, x2, y2 = roi[1:] * spatial_scale
        x1, y1, x2, y2 = np.floor(x1), np.floor(y1), np.ceil(x2), np.ceil(y2)
        x1, y1, x2, y2 = np.clip(x1, 0,
                                 W), np.clip(y1, 0,
                                             H), np.clip(x2, 0,
                                                         W), np.clip(y2, 0, H)
        x1, y1, x2, y2 = x1.astype(np.int64), y1.astype(np.int64), x2.astype(
            np.int64), y2.astype(np.int64)
        if x1 >= x2 or y1 >= y2:
            out_data.append(
                nd.zeros((C, C), ctx=data.context, dtype=data.dtype))
            continue
        # (C, h, w)
        roi_data = data[batch_id, :, y1:y2, x1:x2]
        # (h*w, C)
        roi_data = roi_data.reshape((C, -1)).transpose((1, 0))
        # (h*w, C, 1)
        roi_data = roi_data.reshape((0, 0, 1))
        # (h*w, C, C)
        out_product = nd.batch_dot(roi_data, roi_data.transpose((0, 2, 1)))
        # (C, C)
        if type == "max":
            reduce_product = nd.max(out_product, axis=0)
        elif type == "mean":
            reduce_product = nd.mean(out_product, axis=0)
        else:
            raise NotImplementedError()
        out_data.append(reduce_product)
    out_data = nd.stack(*out_data)
    return out_data
Esempio n. 9
0
    def update(self, data, batch_size, episode_num, discount_factor):

        with autograd.record():
            observations = nd.zeros((batch_size, 1, 128, 128))
            actions = nd.zeros(batch_size)
            rewards = nd.zeros_like(actions)
            next_obs = nd.zeros_like(observations)
            dones = nd.zeros_like(actions)

            for i in range(batch_size):
                observations[i] = data[i].obs
                actions[i] = data[i].action
                rewards[i] = data[i].reward
                next_obs[i] = data[i].next_obs
                dones[i] = data[i].done

            actions = actions.reshape((-1, 1))
            rewards = rewards.reshape((-1, 1))
            dones = dones.reshape((-1, 1))

            print('observations:', observations.shape)
            print('actions:', actions.shape)
            print('rewards:', rewards.shape)
            print('next observations:', next_obs.shape)
            print('dones:', dones.shape)

            not_dones = nd.array(np.logical_not(dones).astype('int8'))

            with autograd.predict_mode():
                next_max_action_values = nd.max(self.model(next_obs), 1)
            target = nd.array(
                rewards) + discount_factor * next_max_action_values * not_dones
            del next_max_action_values

            obs_values = self.model(observations)

            obs_actions_values = nd.zeros_like(actions)
            for i in range(len(obs_actions_values)):
                obs_actions_values[i] = obs_values[i][actions[i]]
            del obs_values

            loss = self.loss(obs_actions_values, target)
        loss.backward()
        self.trainer.step(batch_size, True)

        return loss
Esempio n. 10
0
 def collect(self, name, arr):
     """Callback function for collecting min and max values from an NDArray."""
     name = py_str(name)
     if self.include_layer is not None and not self.include_layer(name):
         return
     handle = ctypes.cast(arr, NDArrayHandle)
     arr = NDArray(handle, writable=False)
     min_range = ndarray.min(arr).asscalar()
     max_range = ndarray.max(arr).asscalar()
     if name in self.min_max_dict:
         cur_min_max = self.min_max_dict[name]
         self.min_max_dict[name] = (min(cur_min_max[0], min_range),
                                    max(cur_min_max[1], max_range))
     else:
         self.min_max_dict[name] = (min_range, max_range)
     if self.logger is not None:
         self.logger.info("Collecting layer %s min_range=%f, max_range=%f" %
                          (name, min_range, max_range))
Esempio n. 11
0
    def _predict_scores_batch(self, sentences: List[Sentence]):
        all_feats, tags, lengths = self.forward(sentences)

        overall_score = 0
        all_tags_seqs = []

        for feats in all_feats:
            # viterbi to get tag_seq
            if self.use_crf:
                score, tag_seq = self.viterbi_decode(feats)
            else:
                score, tag_seq = nd.max(feats, 1)
                tag_seq = list(tag_seq.data())

            # overall_score += score
            all_tags_seqs.extend(tag_seq)

        return overall_score, all_tags_seqs
Esempio n. 12
0
def _quantize_params(qsym, params, th_dict):
    """Given a quantized symbol and a dict of params that have not been quantized,
    generate quantized params. Currently only supports quantizing the arg_params
    with names of `weight` or `bias`, not aux_params. If `qsym` contains symbols
    that are excluded from being quantized, their corresponding params will
    not be quantized, but saved together with quantized params of the symbols that
    have been quantized.
    
    Parameters
    ----------
    qsym : Symbol
        Quantized symbol from FP32 symbol.
    params : dict of str->NDArray
    th_dict: dict of min/max pairs of layers' output
    """
    inputs_name = qsym.list_arguments()
    quantized_params = {}
    for name in inputs_name:
        if name.endswith(('weight_quantize', 'bias_quantize')):
            original_name = name[:-len('_quantize')]
            param = params[original_name]
            # pylint: disable=unbalanced-tuple-unpacking
            val, vmin, vmax = ndarray.contrib.quantize(
                data=param,
                min_range=ndarray.min(param),
                max_range=ndarray.max(param),
                out_type='int8')
            quantized_params[name] = val
            quantized_params[name + '_min'] = vmin
            quantized_params[name + '_max'] = vmax
        elif name in params:
            quantized_params[name] = params[name]
        elif name.endswith(('_min')):
            output = name[:-len('_min')]
            if output in th_dict:
                quantized_params[name] = ndarray.array([th_dict[output][0]])
        elif name.endswith(('_max')):
            output = name[:-len('_min')]
            if output in th_dict:
                quantized_params[name] = ndarray.array([th_dict[output][1]])
    return quantized_params
 def forward(self, is_train, req, in_data, out_data, aux):
     data = in_data[0]
     rois = in_data[1]
     BS, C, H, W = data.shape
     N = rois.shape[0]
     out = []
     rois = rois.asnumpy()
     for i in range(N):
         roi = rois[i]
         batch_id = roi[0].astype(np.int64)
         x1, y1, x2, y2 = roi[1:] * self.spatial_scale
         x1, y1, x2, y2 = np.floor(x1), np.floor(y1), np.ceil(x2), np.ceil(
             y2)
         x1, y1, x2, y2 = np.clip(x1, 0, W), np.clip(y1, 0, H), np.clip(
             x2, 0, W), np.clip(y2, 0, H)
         x1, y1, x2, y2 = x1.astype(np.int64), y1.astype(
             np.int64), x2.astype(np.int64), y2.astype(np.int64)
         if x1 >= x2 or y1 >= y2:
             out.append(nd.zeros((C, C), ctx=data.context,
                                 dtype=data.dtype))
             continue
         # (C, h, w)
         roi_data = data[batch_id, :, y1:y2, x1:x2]
         # (h*w, C)
         roi_data = roi_data.reshape((C, -1)).transpose((1, 0))
         # (h*w, C, 1)
         roi_data = roi_data.reshape((0, 0, 1))
         # (h*w, C, C)
         out_product = nd.batch_dot(roi_data, roi_data.transpose((0, 2, 1)))
         if self.type == "max":
             reduce_product = nd.max(out_product, axis=0)
         elif self.type == "mean":
             reduce_product = nd.mean(out_product, axis=0)
         else:
             raise NotImplementedError()
         out.append(reduce_product)
     out = nd.stack(*out)
     self.assign(out_data[0], req[0], out)
Esempio n. 14
0
    def predict_multi(self, imgs):
        loader = DataLoader(imgs.as_in_context(self.ctx),
                            self.batch_size,
                            last_batch='keep')
        max_sims = []
        labels = []
        features = []
        cls_center = nd.L2Normalization(self.cls_center)
        max_sims = []
        labels = []
        for data in loader:

            data_batch = mx.io.DataBatch(data=(data, ),
                                         pad=self.batch_size - data.shape[0])
            self.model.forward(data_batch, is_train=False)
            embeddings = self.model.get_outputs()[0]
            features.append(embeddings)
            embeddings = nd.L2Normalization(embeddings, mode='instance')

            if self.cls_center is not None:
                temp1 = embeddings.expand_dims(axis=1)
                temp2 = cls_center.expand_dims(axis=0)
                dis_mat = nd.sum(temp1 * temp2, axis=2)
                max_sim = nd.max(dis_mat, axis=1)
                label = nd.argmax(dis_mat, axis=1)

                labels += list(label.asnumpy())
                max_sims += list(max_sim.asnumpy())
            else:
                label = None

        features = nd.concatenate(features, axis=0)
        if self.label_map is not None:
            labels = [self.label_map[int(x)] for x in labels]

        return (max_sims, labels), features
Esempio n. 15
0
    def backward_sample(self, total_feature, label):
        this_rank_classes = int(self.memory_bank.num_sample)
        local_index, unique_sorted_global_label = self.memory_bank.sample(
            label)

        # Get local index
        _mapping_dict = {}
        local_sampled_class = local_index + self.rank * self.memory_bank.num_local
        global_label_set = set(unique_sorted_global_label)
        for idx, absolute_label in enumerate(local_sampled_class):
            if absolute_label in global_label_set:
                _mapping_dict[
                    absolute_label] = idx + self.rank * self.memory_bank.num_sample

        label_list = list(label.asnumpy())
        mapping_label = []
        for i in range(len(label_list)):
            absolute_label = label_list[i]
            if absolute_label in _mapping_dict.keys():
                mapping_label.append(_mapping_dict[absolute_label])
            else:
                mapping_label.append(-1)

        mapping_label = nd.array(mapping_label, dtype=np.int32)

        # Get weight
        local_index = nd.array(local_index)
        local_index = self.get_ndarray2(self.gpu, "local_index", local_index)
        sample_weight, sample_weight_mom = self.memory_bank.get(local_index)

        # Sync to gpu
        if self.memory_bank.gpu:
            _data = self.get_ndarray2(self.gpu, "data_%d" % self.rank,
                                      total_feature)
            _weight = self.get_ndarray2(self.gpu, 'weight_%d' % self.rank,
                                        sample_weight)
            _weight_mom = self.get_ndarray2(self.gpu,
                                            'weight_mom_%d' % self.rank,
                                            sample_weight_mom)
        else:
            _data = self.get_ndarray2(self.gpu, "data_%d" % self.rank,
                                      total_feature)
            _weight = self.get_ndarray2(self.gpu, 'weight_%d' % self.rank,
                                        sample_weight)
            _weight_mom = self.get_ndarray2(self.gpu,
                                            'weight_mom_%d' % self.rank,
                                            sample_weight_mom)

        # Attach grad
        _data.attach_grad()
        _weight.attach_grad()

        # Convert label
        _label = self.get_ndarray2(self.gpu, 'mapping_label_%d' % self.rank,
                                   mapping_label)
        _label = _label - int(self.rank * self.memory_bank.num_sample)
        _fc7, _one_hot = self.fc7_model.forward(_data,
                                                _weight,
                                                mapping_label=_label,
                                                depth=this_rank_classes)

        # Sync max
        max_fc7 = nd.max(_fc7, axis=1, keepdims=True)
        max_fc7 = nd.reshape(max_fc7, -1)

        total_max_fc7 = self.get_ndarray(context=self.gpu,
                                         name='total_max_fc7',
                                         shape=(max_fc7.shape[0], self.size),
                                         dtype='float32')
        total_max_fc7[:] = 0
        total_max_fc7[:, self.rank] = max_fc7
        hvd.allreduce_(total_max_fc7, average=False)

        global_max_fc7 = self.get_ndarray(context=self.gpu,
                                          name='global_max_fc7',
                                          shape=(max_fc7.shape[0], 1),
                                          dtype='float32')
        nd.max(total_max_fc7, axis=1, keepdims=True, out=global_max_fc7)

        # Calculate exp(logits)
        _fc7_grad = nd.broadcast_sub(_fc7, global_max_fc7)
        _fc7_grad = nd.exp(_fc7_grad)

        # Calculate sum
        sum_fc7 = nd.sum(_fc7_grad, axis=1, keepdims=True)
        global_sum_fc7 = hvd.allreduce(sum_fc7, average=False)

        # Calculate grad
        _fc7_grad = nd.broadcast_div(_fc7_grad, global_sum_fc7)

        # Calculate loss
        tmp = _fc7_grad * _one_hot
        tmp = nd.sum(tmp, axis=1, keepdims=True)
        tmp = self.get_ndarray2(self.gpu, 'ctx_loss', tmp)
        tmp = hvd.allreduce(tmp, average=False)
        global_loss = -nd.mean(nd.log(tmp + 1e-30))

        _fc7_grad = _fc7_grad - _one_hot

        # Backward
        _fc7.backward(out_grad=_fc7_grad)

        # Update center
        _weight_grad = _weight.grad
        self.memory_optimizer.update(weight=_weight,
                                     grad=_weight_grad,
                                     state=_weight_mom,
                                     learning_rate=self.memory_lr)
        if self.memory_bank.gpu:
            self.memory_bank.set(index=local_index,
                                 updated_weight=_weight,
                                 updated_weight_mom=_weight_mom)
        else:
            self.memory_bank.set(index=local_index,
                                 updated_weight=self.get_ndarray2(
                                     mx.cpu(), "cpu_weight_%d" % self.rank,
                                     _weight),
                                 updated_weight_mom=self.get_ndarray2(
                                     mx.cpu(), "cpu_weight_mom_%d" % self.rank,
                                     _weight_mom))
        return _data.grad, global_loss
Esempio n. 16
0
def log_sum_exp_batch(vecs):
    maxi = nd.max(vecs, 1)
    maxi_bc = maxi.expand_dims(1).tile((1, vecs.shape[1]))
    recti_ = nd.log(nd.sum(nd.exp(vecs - maxi_bc), 1))
    return maxi + recti_
Esempio n. 17
0
def test_max():
    print("test max")
    tmp_dir = DIR + "max/"

    os.makedirs(tmp_dir + "0/", exist_ok=True)
    shape = np.random.randint(low=2, high=10, size=(4))
    print(shape)
    a = np.random.randint(low=-127, high=127, size=shape)
    np.save(tmp_dir + "0/in_0.npy", a.astype("int32"))
    params = {'axis': [1, 3]}
    save_dict(params, tmp_dir + "0/attr.txt")
    b = nd.max(nd.array(a), **params)
    np.save(tmp_dir + "0/out_0.npy", b.asnumpy().astype("int32"))
    #    print(b.asnumpy().astype("int32").flatten())

    os.makedirs(tmp_dir + "1/", exist_ok=True)
    shape = np.random.randint(low=2, high=10, size=(4))
    print(shape)
    a = np.random.randint(low=-127, high=127, size=shape)
    np.save(tmp_dir + "1/in_0.npy", a.astype("int32"))
    params = {}
    save_dict(params, tmp_dir + "1/attr.txt")
    b = nd.max(nd.array(a), **params)
    np.save(tmp_dir + "1/out_0.npy", b.asnumpy().astype("int32"))
    #    print(b.asnumpy().astype("int32").flatten())

    os.makedirs(tmp_dir + "2/", exist_ok=True)
    shape = np.random.randint(low=2, high=10, size=(4))
    print(shape)
    a = np.random.randint(low=-127, high=127, size=shape)
    np.save(tmp_dir + "2/in_0.npy", a.astype("int32"))
    params = {'axis': [0]}
    save_dict(params, tmp_dir + "2/attr.txt")
    b = nd.max(nd.array(a), **params)
    np.save(tmp_dir + "2/out_0.npy", b.asnumpy().astype("int32"))
    #    print(b.asnumpy().astype("int32").flatten())

    os.makedirs(tmp_dir + "3/", exist_ok=True)
    shape = np.random.randint(low=2, high=10, size=(4))
    print(shape)
    a = np.random.randint(low=-127, high=127, size=shape)
    np.save(tmp_dir + "3/in_0.npy", a.astype("int32"))
    params = {'axis': [2]}
    save_dict(params, tmp_dir + "3/attr.txt")
    b = nd.max(nd.array(a), **params)
    np.save(tmp_dir + "3/out_0.npy", b.asnumpy().astype("int32"))
    #    print(b.asnumpy().astype("int32").flatten())

    os.makedirs(tmp_dir + "4/", exist_ok=True)
    shape = np.random.randint(low=2, high=10, size=(4))
    print(shape)
    a = np.random.randint(low=-127, high=127, size=shape)
    np.save(tmp_dir + "4/in_0.npy", a.astype("int32"))
    params = {'axis': [3]}
    save_dict(params, tmp_dir + "4/attr.txt")
    b = nd.max(nd.array(a), **params)
    np.save(tmp_dir + "4/out_0.npy", b.asnumpy().astype("int32"))
    #    print(b.asnumpy().astype("int32").flatten())

    os.makedirs(tmp_dir + "5/", exist_ok=True)
    shape = np.random.randint(low=2, high=10, size=(4))
    print(shape)
    a = np.random.randint(low=-127, high=127, size=shape)
    np.save(tmp_dir + "5/in_0.npy", a.astype("int32"))
    params = {'axis': [1, 2, 3]}
    save_dict(params, tmp_dir + "5/attr.txt")
    b = nd.max(nd.array(a), **params)
    np.save(tmp_dir + "5/out_0.npy", b.asnumpy().astype("int32"))
    #   print(b.asnumpy().astype("int32").flatten())

    os.makedirs(tmp_dir + "6/", exist_ok=True)
    shape = np.random.randint(low=2, high=10, size=(4))
    print(shape)
    a = np.random.randint(low=-127, high=127, size=shape)
    np.save(tmp_dir + "6/in_0.npy", a.astype("int32"))
    params = {'axis': [0, 1, 2, 3]}
    save_dict(params, tmp_dir + "6/attr.txt")
    b = nd.max(nd.array(a), **params)
    np.save(tmp_dir + "6/out_0.npy", b.asnumpy().astype("int32"))
Esempio n. 18
0
    def backward(self, total_feature, label):
        memory_bank = self.memory_bank
        assert memory_bank.num_local == memory_bank.num_sample, "pass"

        _data = self.get_ndarray2(self.gpu, "data_%d" % self.rank,
                                  total_feature)
        # Attach grad
        _data.attach_grad()
        memory_bank.weight.attach_grad()

        # Convert label
        _label = self.get_ndarray2(self.gpu, 'label_%d' % self.rank, label)
        _label = _label - int(self.rank * memory_bank.num_local)
        _fc7, _one_hot = self.fc7_model.forward(_data,
                                                memory_bank.weight,
                                                mapping_label=_label,
                                                depth=memory_bank.num_local)

        # Sync max
        max_fc7 = nd.max(_fc7, axis=1, keepdims=True)
        max_fc7 = nd.reshape(max_fc7, -1)

        total_max_fc7 = self.get_ndarray(context=self.gpu,
                                         name='total_max_fc7',
                                         shape=(max_fc7.shape[0], self.size),
                                         dtype='float32')
        total_max_fc7[:] = 0
        total_max_fc7[:, self.rank] = max_fc7
        hvd.allreduce_(total_max_fc7, average=False)

        global_max_fc7 = self.get_ndarray(context=self.gpu,
                                          name='global_max_fc7',
                                          shape=(max_fc7.shape[0], 1),
                                          dtype='float32')
        nd.max(total_max_fc7, axis=1, keepdims=True, out=global_max_fc7)

        # Calculate exp(logits)
        _fc7_grad = nd.broadcast_sub(_fc7, global_max_fc7)
        _fc7_grad = nd.exp(_fc7_grad)

        # Calculate sum
        sum_fc7 = nd.sum(_fc7_grad, axis=1, keepdims=True)
        global_sum_fc7 = hvd.allreduce(sum_fc7, average=False)

        # Calculate prob
        _fc7_grad = nd.broadcast_div(_fc7_grad, global_sum_fc7)

        # Calculate loss
        tmp = _fc7_grad * _one_hot
        tmp = nd.sum(tmp, axis=1, keepdims=True)
        tmp = self.get_ndarray2(self.gpu, 'ctx_loss', tmp)
        tmp = hvd.allreduce(tmp, average=False)
        global_loss = -nd.mean(nd.log(tmp + 1e-30))

        # Calculate fc7 grad
        _fc7_grad = _fc7_grad - _one_hot

        # Backward
        _fc7.backward(out_grad=_fc7_grad)

        # Update center
        _weight_grad = memory_bank.weight.grad
        self.memory_optimizer.update(weight=memory_bank.weight,
                                     grad=_weight_grad,
                                     state=memory_bank.weight_mom,
                                     learning_rate=self.memory_lr)

        return _data.grad, global_loss
Esempio n. 19
0
    def forward(self, inputs, loss=None, training=True, commtype='average'):
        assert len(inputs) == self.slots + 1

        if self.non_local_mode:
            return self.forward_non_local(inputs, loss, training)
        if self.message_embedding:
            return self.forward_message_embedding(inputs, loss, training)

        local_drop_vec = nd.ones_like(inputs[0])
        local_drop_vec = self.local_dropout_op(local_drop_vec)
        for i in range(self.slots):
            inputs[i] = inputs[i] * local_drop_vec
        inputs[-1] = self.global_dropout_op(inputs[-1])

        # local_share_vec = []
        # local_private_vec = []
        # if self.concrete_share_rate:
        #     raise ValueError('no share_private!!!')
        #     for i in range(self.slots):
        #         proba = nd.sigmoid(data=self.share_rate[i].data())
        #         proba = nd.broadcast_axis(data=proba, axis=(0, 1), size=inputs[0].shape)
        #         u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=inputs[0].shape, ctx=CTX)
        #         local_share_vec.append(nd.sigmoid(10. * (
        #             nd.log(proba) - nd.log(1. - proba) +
        #             nd.log(u_vec) - nd.log(1. - u_vec)
        #         )))
        #         local_private_vec.append(1. - local_share_vec[i])
        #         # print 'proba:', proba
        #         # print 'dropout_regularizer:', self.dropout_regularizer
        #         if loss is not None:
        #             loss.append(
        #                 self.dropout_regularizer * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba)))
        #     if random.random() < 0.01:
        #         for i in range(self.slots):
        #             proba = nd.sigmoid(data=self.share_rate[i].data())
        #             print proba.asnumpy(),
        #         print ''
        # else:
        #     local_share_vec = [nd.ones_like(inputs[0]), ] * self.slots
        #     local_private_vec = [nd.zeros_like(inputs[0]), ] * self.slots
        # local_share_vec = (1. - self.private_rate) * nd.Dropout(
        #     nd.ones(shape=(inputs[0].shape[0], self.local_units)), p=self.private_rate, mode='always')
        # local_private_vec = 1. - local_share_vec

        comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1))
        if self.use_comm and self.topo_learning_mode:
            proba = nd.sigmoid(self.topo.data())

            if random.random() < 1e-2:
                print '---------------------------------------------'
                print proba.asnumpy()
                print '---------------------------------------------'

            u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1))
            comm_rate = nd.sigmoid(10. * (
                nd.log(proba) - nd.log(1. - proba) +
                nd.log(u_vec) - nd.log(1. - u_vec)
            ))
            if loss is not None:
                loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba)))

        results = []
        for i in range(self.slots):
            results.append(self.local_share_trans.forward(inputs[i], training=training))
        results.append(self.global_trans.forward(inputs[-1], training=training))

        if self.use_comm:
            if self.topo_learning_mode:
                assert self.concrete_share_rate is False
                for i in range(self.slots):
                    tmp = nd.zeros_like(results[i])
                    norm = nd.zeros_like(comm_rate[0][0])
                    for j in range(self.slots):
                        if i != j:
                            tmp = tmp + self.local2local_share_comm(inputs[j], training=training) * comm_rate[j][i]
                            norm = norm + comm_rate[j][i]
                    # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i]
                    tmp = tmp + self.global2local_comm(inputs[-1], training=training) * comm_rate[-1][i]
                    norm = norm + comm_rate[-1][i]
                    if nd.sum(norm) > 1e-5:
                        results[i] = results[i] + tmp / norm

                tmp = nd.zeros_like(results[-1])
                norm = nd.zeros_like(comm_rate[0][0])
                for j in range(self.slots):
                    tmp = tmp + self.local2global_comm(inputs[j], training=training) * comm_rate[j][-1]
                    norm = norm + comm_rate[j][-1]
                if nd.sum(norm) > 1e-5:
                    results[-1] = results[-1] + tmp / norm
            else:
                if commtype == 'average':
                    for i in range(self.slots):
                        tmp = nd.zeros_like(results[i])
                        for j in range(self.slots):
                            if j != i:
                                tmp = tmp + self.local2local_share_comm.forward(inputs[j], training=training)
                        tmp = tmp + self.global2local_comm.forward(inputs[-1], training=training)
                        results[i] = results[i] + (tmp / float(self.slots))

                    tmp = nd.zeros_like(results[-1])
                    for i in range(self.slots):
                        tmp = tmp + self.local2global_comm.forward(inputs[i], training=training)
                    results[-1] = results[-1] + (tmp / float(self.slots))
                elif commtype == 'maxpooling':
                    for i in range(self.slots):
                        tmp = []
                        for j in range(self.slots):
                            if j != i:
                                tmp.append(self.local2local_share_comm.forward(inputs[j], training=training))
                        tmp.append(self.global2local_comm.forward(inputs[-1], training=training))

                        for k in range(len(tmp)):
                            tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1]))

                        tmp = nd.concat(*tmp, dim=1)
                        maxcomm = nd.max(tmp, axis=1)
                        results[i] = results[i] + maxcomm

                    tmp = []
                    for i in range(self.slots):
                        tmp.append(self.local2global_comm.forward(inputs[i], training=training))
                    for k in range(len(tmp)):
                        tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1]))

                    tmp = nd.concat(*tmp, dim=1)
                    maxcomm = nd.max(tmp, axis=1)
                    results[-1] = results[-1] + maxcomm

        if self.block_mode:
            assert self.local_in_units == self.local_units
            assert self.global_in_units == self.global_units

            for i in range(self.slots):
                results[i] = self.yz_weight_local(results[i], training=training) + inputs[i]
            results[-1] = self.yz_weight_global(results[-1], training=training) + inputs[-1]

        return results
Esempio n. 20
0
    def forward(self, input_vec, loss=None, training=True):
        # print('************* ' + str(input_vec.shape[1]) + ' *************')
        # print('############# ' + str(input_vec.shape) + ' #############')
        assert input_vec.shape[1] == self.input_dimension

        # get inputs for every slot(including global)
        inputs = {}
        for slot in self.slots:
            inputs[slot] = input_vec[:, self.slot_dimension[slot][0]:self.slot_dimension[slot][1]]
        input_global = []
        for seg in self.global_dimension:
            input_global.append(input_vec[:, seg[0]:seg[1]])
        inputs['global'] = nd.concat(*input_global, dim=1)

        layer = []
        # inputs -> first_hidden_layer
        if (not self.sort_input_vec) and self.state_feature != 'dip':
            layer.append([])
            for slot in self.slots:
                layer[0].append(self.input_trans[slot](inputs[slot]))
            layer[0].append(self.input_trans['global'](inputs['global']))
        elif self.state_feature == 'dip':
            sorted_inputs = []
            for slot in self.slots:
                sorted_inputs.append(inputs[slot])
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans.forward(sorted_inputs, loss, training=training))
        elif self.sort_input_vec:
            sorted_inputs = []
            for slot in self.slots:
                tmp = inputs[slot][:, :-2].sort(is_ascend=False)
                if tmp.shape[1] < 20:
                    tmp = nd.concat(tmp, nd.zeros((tmp.shape[0], 20 - tmp.shape[1]), ctx=CTX), dim=1)
                else:
                    tmp = nd.slice_axis(tmp, axis=1, begin=0, end=20)
                sorted_inputs.append(nd.concat(tmp, inputs[slot][:, -2:], dim=1))
            sorted_inputs.append(inputs['global'])
            layer.append(self.input_trans.forward(sorted_inputs, loss, training=training))

        # hidden_layers
        for i in range(self.hidden_layers - 1):
            if self.recurrent_mode is False:
                # equal to 'layer.append(self.ma_trans[i](layer[-1], loss))'
                layer.append(self.ma_trans[i](layer[i], loss))
            else:
                layer.append(self.ma_trans(layer[i], loss))

        if self.share_last_layer is False:
            # dropout of last hidden layer
            for j in range(len(self.slots)):
                layer[-1][j] = self.local_out_drop_op.forward(layer[-1][j])
            layer[-1][-1] = self.global_out_drop_op.forward(layer[-1][-1])

            # last_hidden_layer -> outputs
            outputs = []
            slotv_probs = []
            slotqs = []
            slot_probs = []
            top_decision = []
            for i in range(len(self.slots) + 1):
                if self.use_dueling is False:
                    outputs.append(self.output_trans[i](layer[-1][i]))
                else:
                    if i < len(self.slots):
                        cur_slotv_prob = self.output_trans_local_valueP.forward(layer[-1][i], training=training)
                        cur_slotv_prob = nd.softmax(cur_slotv_prob)
                    else:
                        cur_slotv_prob = self.output_trans_global_valueP.forward(layer[-1][i], training=training)
                        cur_slotv_prob = nd.softmax(cur_slotv_prob)

                    if self.dueling_share_last:
                        if i < len(self.slots):
                            cur_slotq = self.output_trans_local_slotQ.forward(layer[-1][i], training=training)
                            cur_slot_prob = self.output_trans_local_slotP.forward(layer[-1][i], training=training).reshape(-1,1)
                            cur_slotv_prob = cur_slotv_prob*cur_slot_prob
                            # cur_slot_prob = nd.softmax(cur_slot_prob)
                            if self.shared_last_layer_use_bias:
                                cur_slotq = cur_slotq + nd.slice(self.value_bias_local.data(), begin=(i, ), end=(i + 1, ))
                        else:
                            cur_slotq = self.output_trans_global_slotQ.forward(layer[-1][i], training=training)
                            cur_slot_prob = self.output_trans_global_slotP.forward(layer[-1][i], training=training).reshape(-1,1)
                            cur_slotv_prob = cur_slotv_prob*cur_slot_prob
                            # cur_slot_prob = nd.softmax(cur_slot_prob)

                        top_decision.append(cur_slot_prob)
                    else:
                        cur_slotq = self.output_trans_value[i](layer[-1][i])

                    slotv_probs.append(cur_slotv_prob)
                    slot_probs.append(cur_slot_prob)
                    slotqs.append(cur_slotq)

            # batch_slotv_probs_list = []
            # slot_prob_softmax = nd.softmax(nd.concat(*slot_probs, dim=1))
            # slot_prob_split = nd.split(slot_prob_softmax, axis=1, num_outputs=len(self.slots)+1)
            # assert len(slotv_probs) == len(self.slots)+1
            # for i in range(len(slotv_probs)):
            #     tmp = slot_prob_split[i].reshape(-1,1)*slotv_probs[i]
            #     batch_slotv_probs_list.append(tmp)
            batch_slot_prob = nd.softmax(nd.concat(*slot_probs, dim=1))
            batch_slot_slotq = nd.concat(*slotqs, dim=1)
            batch_slotv_prob = nd.softmax(nd.concat(*slotv_probs, dim=1))
            batch_top_decision = nd.softmax(nd.concat(*top_decision,dim=1))

            # print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@')
            # print(batch_slotv_prob)
            # print(batch_slot_prob.shape)
            # print(batch_slot_slotq.shape)
            # print(batch_slotv_prob.shape)

            prob = batch_slotv_prob
            value = nd.max(batch_slot_slotq, axis=1)
            top_decision = batch_top_decision

            # CTname = threading.currentThread().getName()
            # print(CTname+' top decision is : ')
            # print(top_decision)

        return prob, value, top_decision
Esempio n. 21
0
def softmax(y_linear):
    exp = nd.exp(y_linear - nd.max(y_linear))
    partition = nd.sum(exp, axis=0, exclude=True).reshape((-1, 1))
    return exp / partition
Esempio n. 22
0
def transform_softmax(x):
    max_of_dim1 = nd.max(x, axis=1, keepdims=True)
    return (nd.exp(x - max_of_dim1).T /
            nd.exp(x - max_of_dim1).sum(axis=1, keepdims=True).T).T
Esempio n. 23
0
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        #tmp_ctx = self._ctx_cpu
        tmp_ctx = self._ctx_single_gpu
        fc7_outs = []
        ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context)))
        #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu())
        arcface_module_outputs = []
        for i, _module in enumerate(self._arcface_modules):
          #_fc7 = _module.get_outputs(merge_multi_context=True)[0]
          out = _module.get_outputs(merge_multi_context=True)
          #print(out[0].shape)
          #print(out[1].shape)
          arcface_module_outputs.append(out)
          _fc7 = out[0]
          fc7_outs.append(_fc7)
          _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx)
          ctx_fc7_max[:,i] = _fc7_max

        local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1))
        nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max)
        global_fc7_max = local_fc7_max
        #local_fc7_sum = None
        local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1))
        local_fc7_sum[:,:] = 0.0
        for i, _module in enumerate(self._arcface_modules):
          _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max)
          fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max)
          fc7_outs[i] = nd.exp(fc7_outs[i])
          _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx)
          local_fc7_sum += _sum
        global_fc7_sum = local_fc7_sum

        if self._iter%self._verbose==0:
          #_ctx = self._context[-1]
          _ctx = self._ctx_cpu
          _probs = []
          for i, _module in enumerate(self._arcface_modules):
            _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i])
            _probs.append(_prob)
          fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context)))
          nd.concat(*_probs, dim=1, out=fc7_prob)
          fc7_pred = nd.argmax(fc7_prob, axis=1)
          local_label = self.global_label - self._local_class_start
          #local_label = self.get_ndarray2(_ctx, 'test_label', local_label)
          _pred = nd.equal(fc7_pred, local_label)
          print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0])


        #local_fc1_grad = []
        #fc1_grad_ctx = self._ctx_cpu
        fc1_grad_ctx = self._ctx_single_gpu
        local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size))
        local_fc1_grad[:,:] = 0.0
        total_eloss = []
        celoss_verbose = 1000
        if self._iter%celoss_verbose==0:
          fc7_celoss = self.get_ndarray(tmp_ctx, 'test_fc7_celoss', (self._batch_size,))
          fc7_celoss[:] = 0.0

        for i, _module in enumerate(self._arcface_modules):
          _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum)
          fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum)
          a = i*self._ctx_num_classes
          b = (i+1)*self._ctx_num_classes
          _label = self.global_label - self._ctx_class_start[i]
          _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label)
          onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes))
          nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label)
          #print(fc7_outs[i].shape, onehot_label.shape)

          if self._iter%celoss_verbose==0:
            _ce_loss = fc7_outs[i] * onehot_label
            _ce_loss = nd.sum(_ce_loss, axis=1)
            fc7_celoss += _ce_loss.as_in_context(tmp_ctx)
          fc7_outs[i] -= onehot_label

          out = arcface_module_outputs[i]
          out_grads = [fc7_outs[i]]
          for j in range(1, len(out)):
              eloss = out[j]
              #print('eloss%d:'%j, eloss.shape)
              #print(out_grads[0].shape)
              #egrad_shape = (out_grads[0].shape[0], eloss.shape[0])
              egrad_shape = eloss.shape
              egrad = self.get_ndarray(fc7_outs[i].context, 'egrad%d'%j, egrad_shape)
              #egrad[:][:] = 1.0/egrad_shape[0]
              egrad[:][:] = 1.0
              out_grads.append(egrad)
              if self._iter%self._verbose==0:
                  total_eloss.append(np.mean(eloss.asnumpy()))

          _module.backward(out_grads = out_grads)
          #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu())
          ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0])
          local_fc1_grad += ctx_fc1_grad

        if self._iter%self._verbose==0 and len(total_eloss)>0:
          print('{eloss}', self._iter, np.mean(total_eloss))
        #if self._iter%self._verbose==0:
        if self._iter%celoss_verbose==0:
          ce_loss = nd.log(fc7_celoss) * -1.0
          ce_loss = nd.mean(ce_loss)
          print('CELOSS,%d,%f'% (self._iter, ce_loss.asnumpy()))

        global_fc1_grad = local_fc1_grad
        self._curr_module.backward(out_grads = [global_fc1_grad])
Esempio n. 24
0
    def forward(self, inputs, loss=None, training=True, commtype='average', topo='FC'):
        assert len(inputs) == self.slots + 1

        local_drop_vec = nd.ones_like(inputs[0])
        local_drop_vec = self.local_dropout_op(local_drop_vec)
        for i in range(self.slots):
            inputs[i] = inputs[i] * local_drop_vec
        inputs[-1] = self.global_dropout_op(inputs[-1])

        if topo == 'FC':
            comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1))
        elif topo == 'FUC':
            comm_rate = nd.zeros(shape=(self.slots + 1, self.slots + 1))
        elif topo == 'Master':
            comm_rate = nd.ones(shape=(self.slots + 1, self.slots + 1))
            for i in range(self.slots):
                for j in range(self.slots):
                    comm_rate[i][j] = 0

        if self.use_comm and self.topo_learning_mode:
            proba = nd.sigmoid(self.topo.data())

            if random.random() < 1e-2:
                print '---------------------------------------------'
                print proba.asnumpy()
                print '---------------------------------------------'

            u_vec = nd.random_uniform(low=1e-5, high=1. - 1e-5, shape=(self.slots + 1, self.slots + 1))
            comm_rate = nd.sigmoid(10. * (
                    nd.log(proba) - nd.log(1. - proba) +
                    nd.log(u_vec) - nd.log(1. - u_vec)
            ))
            if loss is not None:
                loss.append(4e-4 * nd.sum(proba * nd.log(proba) + (1. - proba) * nd.log(1. - proba)))

        results = []
        for i in range(self.slots):
            results.append(self.local_share_trans.forward(inputs[i], training=training))
        results.append(self.global_trans.forward(inputs[-1], training=training))

        if commtype == 'average':
            for i in range(self.slots):
                tmp = nd.zeros_like(results[i])
                norm = nd.zeros_like(comm_rate[0][0])
                for j in range(self.slots):
                    if i != j:
                        tmp = tmp + self.local2local_share_comm.forward(nd.concat(inputs[j], dim=1),
                                                                        training=training) * comm_rate[j][i]
                        norm = norm + comm_rate[j][i]
                # results[i] = results[i] + self.global2local_comm(inputs[-1]) * comm_rate[-1][i]
                tmp = tmp + self.global2local_comm.forward(nd.concat(inputs[-1], dim=1), training=training) * \
                      comm_rate[-1][i]
                norm = norm + comm_rate[-1][i]
                if nd.sum(norm) > 1e-5:
                    results[i] = results[i] + tmp / norm

            tmp = nd.zeros_like(results[-1])
            norm = nd.zeros_like(comm_rate[0][0])
            for j in range(self.slots):
                tmp = tmp + self.local2global_comm.forward(nd.concat(inputs[j], dim=1), training=training) * \
                      comm_rate[j][-1]
                norm = norm + comm_rate[j][-1]
            if nd.sum(norm) > 1e-5:
                results[-1] = results[-1] + tmp / norm

        elif commtype == 'maxpooling':
            for i in range(self.slots):
                tmp = []
                for j in range(self.slots):
                    if j != i:
                        tmp.append(self.local2local_share_comm.forward(inputs[j], training=training))
                tmp.append(self.global2local_comm.forward(inputs[-1], training=training))

                for k in range(len(tmp)):
                    tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1]))

                tmp = nd.concat(*tmp, dim=1)
                maxcomm = nd.max(tmp, axis=1)
                results[i] = results[i] + maxcomm

            tmp = []
            for i in range(self.slots):
                tmp.append(self.local2global_comm.forward(inputs[i], training=training))
            for k in range(len(tmp)):
                tmp[k] = tmp[k].reshape((tmp[k].shape[0], 1, tmp[k].shape[1]))

            tmp = nd.concat(*tmp, dim=1)
            maxcomm = nd.max(tmp, axis=1)
            results[-1] = results[-1] + maxcomm

        return results
Esempio n. 25
0
    def forward(self, input_vec, loss=None, training=True):
        assert input_vec.shape[1] == self.input_dimension

        # get inputs for every slot(including global)
        inputs = {}
        for slot in self.slots:
            inputs[slot] = input_vec[:, self.slot_dimension[slot][0]:self.slot_dimension[slot][1]]
        input_global = []
        for seg in self.global_dimension:
            input_global.append(input_vec[:, seg[0]:seg[1]])
        inputs['global'] = nd.concat(*input_global, dim=1)

        layer = []
        # inputs -> first_hidden_layer
        sorted_inputs = []
        for slot in self.slots:
            sorted_inputs.append(inputs[slot])
        sorted_inputs.append(inputs['global'])
        layer.append(self.input_trans.forward(sorted_inputs, loss, training=training))

        # hidden_layers
        for i in range(self.hidden_layers - 1):
            layer.append(self.ma_trans[i](layer[i], loss))

        if self.share_last_layer is False:
            # dropout of last hidden layer
            for j in range(len(self.slots)):
                layer[-1][j] = self.local_out_drop_op.forward(layer[-1][j])
            layer[-1][-1] = self.global_out_drop_op.forward(layer[-1][-1])

        # last_hidden_layer -> outputs
        slotv_probs = []
        slotqs = []
        slot_probs = []
        top_decision = []
        for i in range(len(self.slots) + 1):
            if i < len(self.slots):
                cur_slotv_prob = self.output_trans_local_valueP.forward(layer[-1][i], training=training)
            else:
                cur_slotv_prob = self.output_trans_global_valueP.forward(layer[-1][i], training=training)

            cur_slotv_prob_adv = cur_slotv_prob - nd.max(cur_slotv_prob, axis=1, keepdims=True)

            if i < len(self.slots):
                cur_slotq = self.output_trans_local_slotQ.forward(layer[-1][i], training=training)
                cur_slot_prob = self.output_trans_local_slotP.forward(layer[-1][i],
                                                                      training=training).reshape(-1, 1)
                if self.shared_last_layer_use_bias:
                    cur_slotq = cur_slotq + nd.slice(self.value_bias_local.data(), begin=(i,), end=(i + 1,))
            else:
                cur_slotq = self.output_trans_global_slotQ.forward(layer[-1][i], training=training)
                cur_slot_prob = self.output_trans_global_slotP.forward(layer[-1][i],
                                                                       training=training).reshape(-1, 1)
            cur_slotv_prob = cur_slot_prob + cur_slotv_prob_adv
            top_decision.append(cur_slot_prob)

            slotv_probs.append(cur_slotv_prob)
            slot_probs.append(cur_slot_prob)
            slotqs.append(cur_slotq)

        batch_slot_slotq = nd.concat(*slotqs, dim=1)
        batch_slotv_prob = nd.softmax(nd.concat(*slotv_probs, dim=1))
        batch_top_decision = nd.softmax(nd.concat(*top_decision, dim=1))

        prob = batch_slotv_prob
        value = nd.sum(batch_top_decision * batch_slot_slotq, axis=1)
        top_decision = batch_top_decision

        return prob, value, top_decision
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        ## ============= forward classifier layer ===========
        fc7_outs = []
        for i, _module in enumerate(self._arcface_modules):
            _fc7 = _module.get_outputs(merge_multi_context=True)[0]
            fc7_outs.append(_fc7)

        ctx_max = map(
            lambda fc7_out: nd.max(fc7_out, axis=1, keepdims=True).
            as_in_context(self._ctx_single_gpu), fc7_outs)
        local_fc7_max = nd.max(nd.concat(*ctx_max, dim=1),
                               axis=1,
                               keepdims=True)
        fc7_exps = list(
            map(
                lambda fc7_out: nd.exp(fc7_out - local_fc7_max.as_in_context(
                    fc7_out.context)), fc7_outs))
        ctx_sum = map(
            lambda fc7_out: nd.sum(fc7_out, axis=1, keepdims=True).
            as_in_context(self._ctx_single_gpu), fc7_exps)
        exp_sum = nd.sum(nd.concat(*ctx_sum, dim=1), axis=1, keepdims=True)
        softmax_outs = list(
            map(
                lambda fc7_exp: nd.broadcast_div(
                    fc7_exp, exp_sum.as_in_context(fc7_exp.context)),
                fc7_exps))

        onehot_device_labels = [
            nd.one_hot((self.global_label).as_in_context(device) -
                       self._ctx_class_start[i],
                       depth=self._ctx_num_classes,
                       on_value=1.0,
                       off_value=0.0) for i, device in enumerate(self._context)
        ]

        ## ============= verbose train accuracy and loss ===========
        if self._iter % self._verbose == 0:
            local_label = self.global_label - self._local_class_start

            fc7_pred = self.parall_argmax(softmax_outs, self._ctx_single_gpu)
            _pred = nd.equal(fc7_pred, local_label).asnumpy()[0]

            loss = self.parall_loss(softmax_outs, onehot_device_labels,
                                    self._ctx_single_gpu).asscalar()
            assert not math.isnan(loss)

            self.logger.info(
                '[Iter {}] train acc : {}, total loss : {}'.format(
                    self._iter, np.mean(_pred), loss))

        ## ============= backward large weight classifier layer with gradient ===========
        local_fc1_grad = self.get_ndarray_by_shape(
            self._ctx_single_gpu, 'local_fc1_grad',
            (self._batch_size, self._emb_size))
        local_fc1_grad[:, :] = 0.0
        for i, _module in enumerate(self._arcface_modules):
            _module.backward(
                out_grads=[softmax_outs[i] - onehot_device_labels[i]])
            ctx_fc1_grad = self.get_ndarray_by_v_arr(
                self._ctx_single_gpu, 'ctx_fc1_grad_%d' % i,
                _module.get_input_grads()[0])
            local_fc1_grad += ctx_fc1_grad

        ## ============= backward backbone ===============
        global_fc1_grad = local_fc1_grad
        self._backbone_module.backward(out_grads=[global_fc1_grad])
Esempio n. 27
0
def log_sum_exp(vec):
    max_score = nd.max(vec).asscalar()
    return nd.log(nd.sum(nd.exp(vec - max_score))) + max_score
Esempio n. 28
0
    def hybrid_forward(self, F, preds, label):
        label = label.astype('float32')
        dist = F.sqrt(F.sum(F.square(preds), axis=1))

        return label * F.square(dist) + (1 - label) * F.square(
            F.max(self._m - dist, 0))
    def backward(self, out_grads=None):
        #print('in backward')
        assert self.binded and self.params_initialized
        #tmp_ctx = self._ctx_cpu
        tmp_ctx = self._ctx_single_gpu
        fc7_outs = []
        ctx_fc7_max = self.get_ndarray(tmp_ctx, 'ctx_fc7_max', (self._batch_size, len(self._context)))
        #local_fc7_max = nd.zeros( (self.global_label.shape[0],1), ctx=mx.cpu())
        for i, _module in enumerate(self._arcface_modules):
          _fc7 = _module.get_outputs(merge_multi_context=True)[0]
          fc7_outs.append(_fc7)
          _fc7_max = nd.max(_fc7, axis=1).as_in_context(tmp_ctx)
          ctx_fc7_max[:,i] = _fc7_max

        local_fc7_max = self.get_ndarray(tmp_ctx, 'local_fc7_max', (self._batch_size, 1))
        nd.max(ctx_fc7_max, axis=1, keepdims=True, out=local_fc7_max)
        global_fc7_max = local_fc7_max
        #local_fc7_sum = None
        local_fc7_sum = self.get_ndarray(tmp_ctx, 'local_fc7_sum', (self._batch_size,1))
        local_fc7_sum[:,:] = 0.0
        for i, _module in enumerate(self._arcface_modules):
          _max = self.get_ndarray2(fc7_outs[i].context, 'fc7_max', global_fc7_max)
          fc7_outs[i] = nd.broadcast_sub(fc7_outs[i], _max)
          fc7_outs[i] = nd.exp(fc7_outs[i])
          _sum = nd.sum(fc7_outs[i], axis=1, keepdims=True).as_in_context(tmp_ctx)
          local_fc7_sum += _sum
        global_fc7_sum = local_fc7_sum

        if self._iter%self._verbose==0:
          #_ctx = self._context[-1]
          _ctx = self._ctx_cpu
          _probs = []
          for i, _module in enumerate(self._arcface_modules):
            _prob = self.get_ndarray2(_ctx, '_fc7_prob_%d'%i, fc7_outs[i])
            _probs.append(_prob)
          fc7_prob = self.get_ndarray(_ctx, 'test_fc7_prob', (self._batch_size, self._ctx_num_classes*len(self._context)))
          nd.concat(*_probs, dim=1, out=fc7_prob)
          fc7_pred = nd.argmax(fc7_prob, axis=1)
          local_label = self.global_label - self._local_class_start
          #local_label = self.get_ndarray2(_ctx, 'test_label', local_label)
          _pred = nd.equal(fc7_pred, local_label)
          print('{fc7_acc}', self._iter, nd.mean(_pred).asnumpy()[0])


        #local_fc1_grad = []
        #fc1_grad_ctx = self._ctx_cpu
        fc1_grad_ctx = self._ctx_single_gpu
        local_fc1_grad = self.get_ndarray(fc1_grad_ctx, 'local_fc1_grad', (self._batch_size,self._emb_size))
        local_fc1_grad[:,:] = 0.0

        loss = nd.zeros(shape=(self._batch_size), ctx=self._ctx_cpu)
        for i, _module in enumerate(self._arcface_modules):
          _sum = self.get_ndarray2(fc7_outs[i].context, 'fc7_sum', global_fc7_sum)
          fc7_outs[i] = nd.broadcast_div(fc7_outs[i], _sum)
          a = i*self._ctx_num_classes
          b = (i+1)*self._ctx_num_classes
          _label = self.global_label - self._ctx_class_start[i]
          _label = self.get_ndarray2(fc7_outs[i].context, 'label', _label)
          onehot_label = self.get_ndarray(fc7_outs[i].context, 'label_onehot', (self._batch_size, self._ctx_num_classes))
          nd.one_hot(_label, depth=self._ctx_num_classes, on_value = 1.0, off_value = 0.0, out=onehot_label)
          
          #for debug
          loss -= (mx.nd.sum(mx.nd.log(fc7_outs[i]) * onehot_label, axis=1)).as_in_context(self._ctx_cpu)
          fc7_outs[i] -= onehot_label
          _module.backward(out_grads = [fc7_outs[i]])
          print('for debug, fc7 outs max is ', i, mx.nd.max(fc7_outs[i]))
          print('for debug, fc7 outs min is ', i, mx.nd.min(fc7_outs[i]))
          #ctx_fc1_grad = _module.get_input_grads()[0].as_in_context(mx.cpu())
          ctx_fc1_grad = self.get_ndarray2(fc1_grad_ctx, 'ctx_fc1_grad_%d'%i, _module.get_input_grads()[0])
          local_fc1_grad += ctx_fc1_grad
          print('for debug, global fc1_grad max is ', i, mx.nd.max(ctx_fc1_grad))
          print('for debug, ctx fc1 grad shape, ', ctx_fc1_grad.shape)

        global_fc1_grad = local_fc1_grad
        #  global_fc1_grad = mx.nd.clip(local_fc1_grad, a_min=-15, a_max=15)
        print('for debug, after clip global fc1_grad max is ', mx.nd.max(global_fc1_grad))
        self._curr_module.backward(out_grads = [global_fc1_grad])
        # for debug
        return mx.nd.sum(loss)
Esempio n. 30
0
def max(input, dim):
    return nd.max(input, axis=dim)
Esempio n. 31
0
def log_sum_exp(vec):
    max_score = nd.max(vec).asscalar()
    return nd.log(nd.sum(nd.exp(vec - max_score))) + max_score