Example #1
0
def argmax_crf1d(cost, xs):
    alpha = xs[0]
    alphas = []
    max_inds = []
    for x in xs[1:]:
        batch = x.shape[0]
        if alpha.shape[0] > batch:
            alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0)
            alphas.append(alpha_rest)
        else:
            alphas.append(None)
        b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost)
        scores = b_alpha + b_cost
        max_ind = minmax.argmax(scores, axis=1)
        max_inds.append(max_ind)
        alpha = minmax.max(scores, axis=1) + x

    inds = minmax.argmax(alpha, axis=1)
    path = [inds.data]
    for m, a in zip(max_inds[::-1], alphas[::-1]):
        inds = select_item.select_item(m, inds)
        if a is not None:
            inds = concat.concat([inds, minmax.argmax(a, axis=1)], axis=0)
        path.append(inds.data)
    path.reverse()

    score = minmax.max(alpha, axis=1)
    for a in alphas[::-1]:
        if a is None:
            continue
        score = concat.concat([score, minmax.max(a, axis=1)], axis=0)

    return score, path
Example #2
0
def argmax_crf1d(cost, xs):
    """Computes a state that maximizes a joint probability of the given CRF.

    Args:
        cost (Variable): A :math:`K \\times K` matrix which holds transition
            cost between two labels, where :math:`K` is the number of labels.
        xs (list of Variable): Input vector for each label.
            ``len(xs)`` denotes the length of the sequence,
            and each :class:`~chainer.Variable` holds a :math:`B \\times K`
            matrix, where :math:`B` is mini-batch size, :math:`K` is the number
            of labels.
            Note that :math:`B`\\ s in all the variables are not necessary
            the same, i.e., it accepts the input sequences with different
            lengths.

    Returns:
        tuple: A tuple of :class:`~chainer.Variable` object ``s`` and a
        :class:`list` ``ps``.
        The shape of ``s`` is ``(B,)``, where ``B`` is the mini-batch size.
        i-th element of ``s``, ``s[i]``, represents log-likelihood of i-th
        data.
        ``ps`` is a list of :class:`numpy.ndarray` or
        :class:`cupy.ndarray`, and denotes the state that maximizes the
        point probability.
        ``len(ps)`` is equal to ``len(xs)``, and shape of each ``ps[i]`` is
        the mini-batch size of the corresponding ``xs[i]``. That means,
        ``ps[i].shape == xs[i].shape[0:1]``.
    """
    alpha = xs[0]
    alphas = []
    max_inds = []
    for x in xs[1:]:
        batch = x.shape[0]
        if alpha.shape[0] > batch:
            alpha, alpha_rest = split_axis.split_axis(alpha, [batch], axis=0)
            alphas.append(alpha_rest)
        else:
            alphas.append(None)
        b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost)
        scores = b_alpha + b_cost
        max_ind = minmax.argmax(scores, axis=1)
        max_inds.append(max_ind)
        alpha = minmax.max(scores, axis=1) + x

    inds = minmax.argmax(alpha, axis=1)
    path = [inds.data]
    for m, a in zip(max_inds[::-1], alphas[::-1]):
        inds = select_item.select_item(m, inds)
        if a is not None:
            inds = concat.concat([inds, minmax.argmax(a, axis=1)], axis=0)
        path.append(inds.data)
    path.reverse()

    score = minmax.max(alpha, axis=1)
    for a in alphas[::-1]:
        if a is None:
            continue
        score = concat.concat([score, minmax.max(a, axis=1)], axis=0)

    return score, path
Example #3
0
def argmax_crf1d(cost, xs):
    alpha = xs[0]
    max_inds = []
    for x in xs[1:]:
        b_alpha, b_cost = broadcast.broadcast(alpha[..., None], cost)
        scores = b_alpha + b_cost
        max_ind = minmax.argmax(scores, axis=1)
        max_inds.append(max_ind)
        alpha = minmax.max(scores, axis=1) + x

    inds = minmax.argmax(alpha, axis=1)
    path = [inds.data]
    for m in reversed(max_inds):
        inds = select_item.select_item(m, inds)
        path.append(inds.data)
    path.reverse()

    return minmax.max(alpha, axis=1), path
Example #4
0
    def predict_labels(self, m, h, xp=np):
        mh = F.concat((m, h), 1)
        scores = self.mlp_label(mh, per_element=False)

        yl = minmax.argmax(scores, axis=1).data
        if xp is cuda.cupy:
            yl = cuda.to_cpu(yl)
        yl = np.insert(yl, 0, np.int32(-1))

        return scores, yl
Example #5
0
    def predict_pos(self, w, xp=np):
        x = self.unigram_embed(w)
        scores = self.mlp_pos(x, per_element=False)

        yp = minmax.argmax(scores, axis=1).data
        if xp is cuda.cupy:
            yp = cuda.to_cpu(yp)
        yp = np.insert(yp, 0, np.int32(-1))

        return scores, yp
Example #6
0
    def predict_arcs(self, m, h, train=True, xp=np):
        scores = self.biaffine_arc(
            F.dropout(m, self.pred_layers_dropout),
            F.dropout(h, self.pred_layers_dropout)) + masking_matrix(
                len(m), self.n_dummy, xp=xp)

        yh = minmax.argmax(scores, axis=1).data
        if xp is cuda.cupy:
            yh = cuda.to_cpu(yh)

        # if not train:
        #     not_tree = detect_cycle(yh)

        #     if not_tree:
        #         yh_mst = mst(scores)
        #         yh = yh_mst

        # conflict = False
        # for yi, ymi in zip(yh, yh_mst):
        #     if yi != ymi:
        #         conflict = True
        #         break
        # print('\n{} {}'.format(not_tree, conflict))

        # print(yh)
        # print(yh_mst)

        # print(scores.data)
        # p = np.zeros((len(yh), len(yh)+1))
        # for i, yi in enumerate(yh):
        #     p[i][yi] = 1
        #     print(p)

        for i in range(self.n_dummy):
            yh = np.insert(yh, 0, np.int32(constants.NO_PARENTS_ID))

        return scores, yh
Example #7
0
    def act_and_merge_features(self,
                               xs,
                               ws,
                               vs,
                               ms,
                               gcs=None,
                               get_att_score=False):
        hs = []
        pcs = []
        ass = []  # attention scores

        xp = cuda.get_array_module(xs[0])
        closs = chainer.Variable(xp.array(0, dtype='f'))

        if gcs is None:
            gcs = [None] * len(xs)
        for x, w, v, gc, mask in zip(xs, ws, vs, gcs, ms):
            # print('x', x.shape)
            if w is None and v is None:  # no words were found for devel/test data
                a = xp.zeros((len(x), self.chunk_embed_out_dim), dtype='f')
                pc = np.zeros(len(x), 'i')
                pcs.append(pc)
                h = F.concat((x, a), axis=1)  # (n, dt) @ (n, dc) => (n, dt+dc)
                hs.append(h)
                continue

            if w is not None:
                w = F.dropout(w, self.embed_dropout)

            ## calculate weight for w

            mask_ij = mask[0]
            if self.use_attention:  # wavg or wcon
                mask_i = mask[1]
                # print('w', w.shape)

                w_scores = self.biaffine(
                    F.dropout(x, self.biaffine_dropout),
                    F.dropout(w, self.biaffine_dropout))  # (n, m)
                w_scores = w_scores + mask_ij  # a masked element becomes 0 after softmax operation
                w_weight = F.softmax(w_scores)
                w_weight = w_weight * mask_i  # raw of char w/o no candidate words become a 0 vector

                # print('ww', w_weight.shape, '\n', w_weight)

            elif self.chunk_pooling_type == constants.AVG:
                w_weight = self.normalize(mask_ij, xp=xp)

            if not self.use_concat and self.chunk_vector_dropout > 0:
                mask_drop = xp.ones(w_weight.shape, dtype='f')
                for i in range(w_weight.shape[0]):
                    if self.chunk_vector_dropout > np.random.rand():
                        mask_drop[i] = xp.zeros(w_weight.shape[1], dtype='f')
                w_weight = w_weight * mask_drop

            ## calculate weight for v

            if self.use_concat:
                mask_ik = mask[2]
                n = x.shape[0]
                wd = self.chunk_embed_dim_merged  #w.shape[1]
                if self.chunk_pooling_type == constants.WCON:
                    ikj_table = mask[3]
                    v_weight0 = F.concat(
                        [
                            F.expand_dims(  # (n, m) -> (n, k)
                                F.get_item(w_weight[i], ikj_table[i]),
                                axis=0) for i in range(n)
                        ],
                        axis=0)
                    # print('mask_ik', mask_ik.shape, '\n', mask_ik)
                    # print('v_weight0', v_weight0.shape, '\n', v_weight0)
                    v_weight0 *= mask_ik
                    # print('ikj_table', ikj_table)

                else:
                    v_weight0 = mask_ik

                v_weight = F.transpose(v_weight0)  # (n,k)
                v_weight = F.expand_dims(v_weight, 2)  # (k,n)
                v_weight = F.broadcast_to(
                    v_weight, (self.chunk_concat_num, n, wd))  # (k,n,wd)
                v_weight = F.concat(v_weight, axis=1)  # (k,n*wd)

                if self.chunk_vector_dropout > 0:
                    mask_drop = xp.ones(v_weight.shape, dtype='f')
                    for i in range(v_weight.shape[0]):
                        if self.chunk_vector_dropout > np.random.rand():
                            mask_drop[i] = xp.zeros(v_weight.shape[1],
                                                    dtype='f')
                    v_weight *= mask_drop

            ## calculate summary vector a
            if self.use_average:  # avg or wavg
                a = F.matmul(w_weight, w)  # (n, m) * (m, dc)  => (n, dc)

            else:  # con or wcon
                v = F.concat(v, axis=1)
                a = v * v_weight
                # print('a', a.shape, a)

            ## get predicted (attended) chunks
            if self.use_attention:  # wavg or wcon
                if self.chunk_pooling_type == constants.WAVG:
                    weight = w_weight
                else:
                    weight = v_weight0
                pc = minmax.argmax(weight, axis=1).data
                if xp is cuda.cupy:
                    pc = cuda.to_cpu(pc)
                pcs.append(pc)

            #     if get_att_score:
            #         ascore = minmax.max(weight, axis=1).data
            #         ass.append(ascore)

            #     ncand = [sum([1 if val >= 0 else 0 for val in raw]) for raw in _mask]
            #     print('pred', pc)
            #     print('gold', gc)
            #     print('ncand', ncand)
            #     print('weight', weight.shape, weight.data)
            #     print('weight')
            #     for i, e in enumerate(weight.data):
            #         print(i, e)

            h = F.concat((x, a), axis=1)  # (n, dt) @ (n, dc) => (n, dt+dc)

            hs.append(h)

        if closs.data == 0:
            closs = None
        else:
            closs /= len(xs)

        if get_att_score:
            return closs, pcs, hs, ass
        else:
            return closs, pcs, hs