Esempio n. 1
0
 def _log_det_jacobian(self, x, y):
     shape = x.shape
     scale = self.scale
     if isinstance(scale, numbers.Number):
         xp = cuda.get_array_module(x, y)
         result = exponential.log(basic_math.absolute(scale)) \
             * xp.ones(shape, dtype=x.dtype)
     else:
         result = exponential.log(basic_math.absolute(scale))
     if self.event_dim:
         result_size = result.shape[:-self.event_dim] + (-1, )
         result = sum_mod.sum(result.view(result_size), axis=-1)
         shape = shape[:-self.event_dim]
     return broadcast.broadcast_to(result, shape)
Esempio n. 2
0
    def __init__(self,
                 delta_v=0.5,
                 delta_d=1.5,
                 max_embedding_dim=10,
                 norm=1,
                 alpha=1.0,
                 beta=1.0,
                 gamma=0.001):
        self.delta_v = delta_v
        self.delta_d = delta_d
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.max_embedding_dim = max_embedding_dim

        if self.max_embedding_dim <= 0:
            raise ValueError("Max number of embeddings has to be positive!")

        # L1 or L2 norm is allowed only
        if norm == 1:
            self.norm = lambda x, axis=None: c_sum(absolute(x), axis=axis)
        elif norm == 2:
            self.norm = lambda x, axis=None: sqrt(c_sum(x**2, axis=axis))
        else:
            raise ValueError("For discriminative loss, "
                             "norm can only be 1 or 2. "
                             "Obtained the value : {}".format(norm))
Esempio n. 3
0
def _kl_multivariatenormal_multivariatenormal(dist1, dist2):
    st = moveaxis.moveaxis(dist1.scale_tril, (-2, -1), (0, 1))
    diag = st[list(range(dist1.d)), list(range(dist1.d))]
    logdet1 = sum_mod.sum(exponential.log(basic_math.absolute(diag)), axis=0)

    st = moveaxis.moveaxis(dist2.scale_tril, (-2, -1), (0, 1))
    diag = st[list(range(dist2.d)), list(range(dist2.d))]
    logdet2 = sum_mod.sum(exponential.log(basic_math.absolute(diag)), axis=0)

    scale_tril_inv2 = _batch_triangular_inv(dist2.scale_tril.reshape(
        -1, dist2.d, dist2.d))
    trace = sum_mod.sum(matmul.matmul(
        scale_tril_inv2, dist1.scale_tril.reshape(-1, dist2.d, dist2.d)) ** 2,
        axis=(-1, -2)).reshape(dist1.batch_shape)

    mu = dist1.loc - dist2.loc
    mah = matmul.matmul(scale_tril_inv2, mu.reshape(-1, dist1.d, 1))
    mah = sum_mod.sum(mah ** 2, axis=-2).reshape(dist1.batch_shape)
    return logdet2 - logdet1 + 0.5 * trace + 0.5 * mah - 0.5 * dist1.d
    def merge_representation(self, index, section, xs, ys):
        """
        Merge and average the context representation to prepare the input
        for next layer. If the prediction is 'O', its corresponding row of
        context representation of xs will be used as the input for next  
        layer, otherwise its corresponding row of ys will be seleted as the
        input for next layer.
        
        + index: merge index for predicts         
        + xs: context representation, input of bi_word_tag BiLSTM layer
        + ys: context representation, output of bi_word_tag BiLSTM layer
        e.g. predicts: B-Gene, I-Gene, O,B-protein,B-DNA
          index array:       
          [ 1, -1, -1, -1
            1, -1, -1, -1
           -1,  0, -1, -1
           -1, -1,  1, -1
           -1, -1, -1,  1 ]

          ys_index clip array:
          [ 1,  0,  0,  0
            1,  0,  0,  0
            0,  1,  0,  0
            0,  0,  1,  0
            0,  0,  0,  1 ]

          xs index(1-|index|) array: 
          [ 0,  0,  0,  0
            0,  0,  0,  0
            0,  1,  0,  0
            0,  0,  0,  0
            0,  0,  0,  0 ]
        """
        ys_index = index.copy()
        ys_index = F.clip(ys_index.astype('f'), 0., 1.0)
        ys = F.matmul(ys_index, F.vstack(ys), transa=True)
        xs_index = index.copy()
        xs_index = 1 - Fmat.absolute(xs_index.astype('f'))
        xs = F.matmul(xs_index, F.vstack(xs), transa=True)

        # Sum word vectors
        ys = Fmat.add(xs, ys)

        # Average word vectors for entity representation
        sum_index = F.sum(ys_index, axis=0)
        sum_index = F.clip(sum_index, 1.0, 1000000.0)
        sum_index = F.tile(sum_index, (ys.shape[1], 1))
        sum_index = F.transpose(sum_index)
        ys = Fmat.div(ys, sum_index)
        ys = F.split_axis(ys, section, axis=0)

        return ys
Esempio n. 5
0
    def __init__(self, delta_v=0.5, delta_d=1.5,
                 max_embedding_dim=10, norm=1,
                 alpha=1.0, beta=1.0, gamma=0.001):
        self.delta_v = delta_v
        self.delta_d = delta_d
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.max_embedding_dim = max_embedding_dim

        if self.max_embedding_dim <= 0:
            raise ValueError("Max number of embeddings has to be positive!")

        # L1 or L2 norm is allowed only
        if norm == 1:
            self.norm = lambda x, axis=None: c_sum(absolute(x), axis=axis)
        elif norm == 2:
            self.norm = lambda x, axis=None: sqrt(c_sum(x ** 2, axis=axis))
        else:
            raise ValueError("For discriminative loss, "
                             "norm can only be 1 or 2. "
                             "Obtained the value : {}".format(norm))
Esempio n. 6
0
 def _logdet(self, x):
     st = moveaxis.moveaxis(x, (-2, -1), (0, 1))
     diag = st[list(range(self.d)), list(range(self.d))]
     logdet = sum_mod.sum(
         exponential.log(basic_math.absolute(diag)), axis=0)
     return logdet