def my_loss(data, nc, ns, nq):
    data = data.astype('float64')
    cls_data = nd.reshape(data[0:nc * ns], (nc, ns, -1))
    cls_center = nd.mean(cls_data, axis=1) + 1e-10
    data_center_dis = nd.norm(data[nc * ns:].expand_dims(axis=1) -
                              cls_center.expand_dims(axis=0),
                              axis=2)**2

    weight = nd.zeros((nc * nq, nc), ctx=data.context, dtype='float64')
    for i in range(0, nc):
        weight[i * nq:i * nq + nq, i] = 1
    weight2 = 1 - weight

    temp1 = nd.log_softmax(-data_center_dis, axis=1)
    temp2 = nd.sum(temp1, axis=1)
    temp3 = nd.sum(-temp2)
    label = nd.argmin(data_center_dis, axis=1)
    return temp3 / (nc * nq), label

    loss1 = nd.sum(data_center_dis * weight)

    temp = nd.sum(nd.exp(-data_center_dis), axis=1)
    loss2 = nd.sum(nd.log(temp))

    if loss1 is np.nan or loss2 is np.nan:
        raise StopIteration

    return (loss1 + loss2) / (nc * nq), label
Beispiel #2
0
def proto_loss(embedding, nc, ns, nq):
    embedding = embedding.astype('float64');
    cls_data = nd.reshape(embedding[0:nc*ns], (nc, ns, -1)); cls_data.attach_grad()
    cls_center = nd.mean(cls_data, axis=1);
    data_center_dis = nd.norm(embedding[nc*ns:].expand_dims(axis=1) - cls_center.expand_dims(axis=0),
                              axis=2) ** 2

    # print(nd.max(data_center_dis).asscalar())


    weight = nd.zeros((nc*nq, nc), ctx=embedding.context, dtype='float64')
    pick_vec = nd.zeros((nc*nq), ctx=embedding.context)
    for i in range(0, nc):
        weight[i*nq:i*nq+nq, i] = 1
        pick_vec[i*nq:i*nq+nq] = i
    """
    temp = nd.SoftmaxOutput(-data_center_dis, label)
    temp = nd.log(temp) * weight
    temp = nd.sum(-temp, axis=1)
    predict = nd.argmin(data_center_dis, axis=1)
    return -temp * nd.log(temp), predict
    """

    temp1 = nd.log_softmax(-data_center_dis, axis=1);
    temp2 = nd.pick(temp1, index=pick_vec, axis=1);
    temp3 = nd.sum(-temp2);
    label = nd.argmin(data_center_dis, axis=1)
    return temp3 / (nc * nq), label
Beispiel #3
0
 def hybrid_forward(self, F, x, y, ignore_label):
     output = F.log_softmax(x)
     label_matrix = mx.nd.zeros(output.shape, ctx=output.context)
     for i in xrange(label_matrix.shape[1]):
         label_matrix[:, i] = (y == i)
     ignore_unit = (y == ignore_label)
     loss = -F.sum(output * label_matrix, axis=1)
     return F.sum(loss) / (output.shape[0] - F.sum(ignore_unit))
 def hybrid_forward(self, F, pred, label, valid_length):
     pred = pred[:, :-1, :]
     label = label[:, 1:]
     valid_length = valid_length - 1
     if not self._from_logits:
         pred = F.log_softmax(pred, self._axis)
     loss = mx.nd.squeeze(
         -F.pick(pred, label, axis=self._axis, keepdims=True), axis=2)
     loss = F.SequenceMask(loss.swapaxes(0, 1),
                           sequence_length=valid_length,
                           use_sequence_length=True).swapaxes(0, 1)
     return F.mean(loss, axis=self._batch_axis, exclude=True)
Beispiel #5
0
    def sample(self, batch_size=1, with_details=False, with_entropy=False):
        """
        Returns
        -------
        configs : list of dict
            list of configurations
        """
        inputs = self.static_inputs[batch_size]
        hidden = self.static_init_hidden[batch_size]

        actions = []
        entropies = []
        log_probs = []

        for idx in range(len(self.num_tokens)):
            logits, hidden = self.forward(inputs,
                                          hidden,
                                          idx,
                                          is_embed=(idx == 0))

            probs = F.softmax(logits, axis=-1)
            log_prob = F.log_softmax(logits, axis=-1)
            entropy = -(log_prob *
                        probs).sum(1, keepdims=False) if with_entropy else None

            action = mx.random.multinomial(probs, 1)
            ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context),
                              action.astype('float32'))
            selected_log_prob = F.gather_nd(log_prob, ind)

            actions.append(action[:, 0])
            entropies.append(entropy)
            log_probs.append(selected_log_prob)

            inputs = action[:, 0] + sum(self.num_tokens[:idx])
            inputs.detach()

        configs = []
        for idx in range(batch_size):
            config = {}
            for i, action in enumerate(actions):
                choice = action[idx].asscalar()
                k, space = self.spaces[i]
                config[k] = int(choice)
            configs.append(config)

        if with_details:
            entropies = F.stack(*entropies,
                                axis=1) if with_entropy else entropies
            return configs, F.stack(*log_probs, axis=1), entropies
        else:
            return configs
Beispiel #6
0
    def sample(self, batch_size=1, with_details=False, with_entropy=False):
        # self-attention
        x = self.embedding(batch_size).reshape(
            -3, 0)  # .squeeze() # b x action x h
        kshape = (batch_size, self.num_total_tokens, self.hidden_size)
        vshape = (batch_size, self.num_total_tokens, 1)
        querry = self.querry(x).reshape(*kshape)  # b x actions x h
        key = self.key(x).reshape(*kshape)  # b x actions x h
        value = self.value(x).reshape(*vshape)  # b x actions x 1
        atten = mx.nd.linalg_gemm2(querry, key,
                                   transpose_b=True).softmax(axis=1)
        alphas = mx.nd.linalg_gemm2(atten, value).squeeze(axis=-1)

        actions = []
        entropies = []
        log_probs = []
        for idx in range(len(self.num_tokens)):
            i0 = sum(self.num_tokens[:idx])
            i1 = sum(self.num_tokens[:idx + 1])
            logits = alphas[:, i0:i1]

            probs = F.softmax(logits, axis=-1)
            log_prob = F.log_softmax(logits, axis=-1)

            entropy = -(log_prob *
                        probs).sum(1, keepdims=False) if with_entropy else None

            action = mx.random.multinomial(probs, 1)
            ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context),
                              action.astype('float32'))
            selected_log_prob = F.gather_nd(log_prob, ind)

            actions.append(action[:, 0])
            entropies.append(entropy)
            log_probs.append(selected_log_prob)

        configs = []
        for idx in range(batch_size):
            config = {}
            for i, action in enumerate(actions):
                choice = action[idx].asscalar()
                k, space = self.spaces[i]
                config[k] = int(choice)
            configs.append(config)

        if with_details:
            entropies = F.stack(*entropies,
                                axis=1) if with_entropy else entropies
            return configs, F.stack(*log_probs, axis=1), entropies
        else:
            return configs
Beispiel #7
0
def evaluate(test_net, test_data, args):
    exe_num = len(test_net._context)
    curr_states = test_net.get_states(merge_multi_context=False)
    # Set the state to zero when a new epoch begins
    for state_id in range(len(curr_states)):
        for exe_id in range(exe_num):
            curr_states[state_id][exe_id][:] = 0
    test_net.set_states(curr_states)
    total_nll = 0.0
    for i, start in enumerate(range(0, test_data.shape[0] - 1, args.bptt)):
        start = i * args.bptt
        data_batch_npy = np.take(test_data,
                                 np.arange(start, start + args.bptt),
                                 axis=0,
                                 mode="clip")
        target_batch_npy = np.take(test_data,
                                   np.arange(start + 1, start + 1 + args.bptt),
                                   axis=0,
                                   mode="clip")
        if start + args.bptt > test_data.shape[0]:
            valid_seq_len = test_data.shape[0] - start
        else:
            valid_seq_len = args.bptt
        test_net.forward(data_batch=mx.io.DataBatch(data=[mx.nd.array(data_batch_npy)]),
                         is_train=False)
        outputs = test_net.get_outputs(merge_multi_context=False)
        local_nll = 0.0
        for exe_id in range(exe_num):
            logits = outputs[0][exe_id]
            nll = - nd.pick(nd.log_softmax(logits), nd.array(target_batch_npy, ctx=logits.context),
                            axis=-1).asnumpy()
            local_nll += nll[:valid_seq_len, :].mean() * valid_seq_len
        total_nll += local_nll / exe_num
        for out_id in range(1, len(outputs)):
            for exe_id in range(exe_num):
                curr_states[out_id - 1][exe_id] = outputs[out_id][exe_id]
        test_net.set_states(states=curr_states)
    avg_nll = total_nll / test_data.shape[0]
    return avg_nll
Beispiel #8
0
    def sample(self, batch_size=1, with_details=False, with_entropy=False):
        actions = []
        entropies = []
        log_probs = []

        for idx in range(len(self.num_tokens)):
            logits = self.decoders[idx](batch_size)

            probs = F.softmax(logits, axis=-1)
            log_prob = F.log_softmax(logits, axis=-1)

            entropy = -(log_prob *
                        probs).sum(1, keepdims=False) if with_entropy else None

            action = mx.random.multinomial(probs, 1)
            ind = mx.nd.stack(mx.nd.arange(probs.shape[0], ctx=action.context),
                              action.astype('float32'))
            selected_log_prob = F.gather_nd(log_prob, ind)

            actions.append(action[:, 0])
            entropies.append(entropy)
            log_probs.append(selected_log_prob)

        configs = []
        for idx in range(batch_size):
            config = {}
            for i, action in enumerate(actions):
                choice = action[idx].asscalar()
                k, space = self.spaces[i]
                config[k] = int(choice)
            configs.append(config)

        if with_details:
            entropies = F.stack(*entropies,
                                axis=1) if with_entropy else entropies
            return configs, F.stack(*log_probs, axis=1), entropies
        else:
            return configs
Beispiel #9
0
 def hybrid_forward(self, F, pred, label, sample_weight=None):
     if not self._from_logits:
         pred = F.log_softmax(pred, axis=self._axis)
     if self._sparse_label:
         if self._size_average:
             valid_label_map = (label !=
                                self._ignore_label).astype('float32')
             loss = -(F.pick(pred, label, axis=self._axis, keepdims=True) *
                      valid_label_map)
         else:
             loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
             loss = F.where(
                 label.expand_dims(axis=self._axis) == self._ignore_label,
                 F.zeros_like(loss), loss)
     else:
         label = _reshape_like(F, label, pred)
         loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
     loss = _apply_weighting(F, loss, self._weight, sample_weight)
     if self._size_average:
         return F.mean(loss, axis=self._batch_axis, exclude=True) * \
             valid_label_map.size / F.sum(valid_label_map)
     else:
         return F.mean(loss, axis=self._batch_axis, exclude=True)
Beispiel #10
0
 def log_pdf(self, y):
     return nd.sum(
         nd.nansum(y * nd.log_softmax(self.unnormalized_mean),
                   axis=0,
                   exclude=True))
Beispiel #11
0
 def softmax_cross_entropy(yhat_linear, y):
     return -nd.nansum(
         y * nd.log_softmax(yhat_linear), axis=0, exclude=True)