Beispiel #1
0
    def forward(self, attention_hidden_state, memory, processed_memory,
                attention_weights_cat, mask):
        prev_attention = attention_weights_cat[:, 0]
        if attention_weights_cat.sum() == 0:
            # first step
            attention_weights = torch.zeros_like(prev_attention)
            attention_weights[:, 0] = 1.
        else:
            alignment = super().get_alignment_energies(attention_hidden_state,
                                                       processed_memory,
                                                       attention_weights_cat)
            if self.training:
                # soft:
                alignment = alignment + self.gaussian_noise(alignment)
                if mask is not None:
                    # fill inplace:
                    alignment = alignment.data.masked_fill_(
                        mask, self.score_mask_value)

                # p_select = self.sigmoid(alignment)
                log_p_select = self.logsigmoid(alignment)
                log_1_minus_p_select = self.logsigmoid(-alignment)
                log_cumprod_1_minus_p = torch.cumsum(log_1_minus_p_select,
                                                     dim=1)
                # log_cumprod_1_minus_p = self.log_safe_cumprod(1 - p_select)
                log_attention_weights_prev = torch.log(
                    torch.clamp(prev_attention, min=1e-10, max=1))
                log_attention_weights = log_p_select + log_cumprod_1_minus_p + torch.logcumsumexp(
                    log_attention_weights_prev - log_cumprod_1_minus_p, dim=1)
                attention_weights = torch.exp(
                    torch.clamp(log_attention_weights, max=1))
            else:
                # hard:
                above_threshold = (alignment >
                                   0).float()  # zero because sigmoid!

                p_select = above_threshold * torch.cumsum(prev_attention,
                                                          dim=1)
                attention = p_select * self.exclusive_cumprod(1 - p_select)

                # Not attended => attend at last encoder output
                # Assume that encoder outputs are not padded (this is true on inference)
                attended = attention.sum(dim=1)
                for batch_i in range(attention_weights_cat.shape[0]):
                    if not attended[batch_i]:
                        attention[batch_i, -1] = 1
                attention_weights = attention
        # apply attention:
        attention_context = torch.bmm(attention_weights.unsqueeze(1), memory)
        attention_context = attention_context.squeeze(1)
        return attention_context, attention_weights
Beispiel #2
0
    def forward(self, log_h, y):
        log_h = log_h.flatten()

        durations, events = y.T

        # sort input
        durations, idx = durations.sort(descending=True)
        log_h = log_h[idx]
        events = events[idx]

        event_ind = events.nonzero().flatten()

        # numerator
        log_num = log_h[event_ind].mean()

        # logcumsumexp of events
        event_lcse = torch.logcumsumexp(log_h, dim=0)[event_ind]

        # number of events for each unique risk set
        _, tie_inverses, tie_count = torch.unique_consecutive(
            durations[event_ind], return_counts=True, return_inverse=True)

        # position of last event (lowest duration) of each unique risk set
        tie_pos = tie_count.cumsum(axis=0) - 1

        # logcumsumexp by tie for each event
        event_tie_lcse = event_lcse[tie_pos][tie_inverses]

        if self.method == "breslow":
            log_den = event_tie_lcse.mean()

        elif self.method == "efron":
            # based on https://bydmitry.github.io/efron-tensorflow.html

            # logsumexp of ties, duplicated within tie set
            tie_lse = scatter_logsumexp(log_h[event_ind], tie_inverses,
                                        dim=0)[tie_inverses]
            # multiply (add in log space) with corrective factor
            aux = torch.ones_like(tie_inverses)
            aux[tie_pos[:-1] + 1] -= tie_count[:-1]
            event_id_in_tie = torch.cumsum(aux, dim=0) - 1
            discounted_tie_lse = (tie_lse + torch.log(event_id_in_tie) -
                                  torch.log(tie_count[tie_inverses]))

            # denominator
            log_den = log_substract(event_tie_lcse, discounted_tie_lse).mean()

        # loss is negative log likelihood
        return log_den - log_num
def partial_ll_loss(lrisks, tb, eb, eps=1e-2):

    tb = tb + eps * np.random.random(len(tb))
    sindex = np.argsort(-tb)

    tb = tb[sindex]
    eb = eb[sindex]

    lrisks = lrisks[sindex]  # lrisks = tf.gather(lrisks, sindex)
    # lrisksdenom = tf.math.cumulative_logsumexp(lrisks)
    lrisksdenom = torch.logcumsumexp(lrisks, dim=0)

    plls = lrisks - lrisksdenom
    pll = plls[eb == 1]

    pll = torch.sum(pll)  # pll = tf.reduce_sum(pll)

    return -pll
Beispiel #4
0
 def other_ops(self):
     a = torch.randn(4)
     b = torch.randn(4)
     c = torch.randint(0, 8, (5, ), dtype=torch.int64)
     e = torch.randn(4, 3)
     f = torch.randn(4, 4, 4)
     size = [0, 1]
     dims = [0, 1]
     return (
         torch.atleast_1d(a),
         torch.atleast_2d(a),
         torch.atleast_3d(a),
         torch.bincount(c),
         torch.block_diag(a),
         torch.broadcast_tensors(a),
         torch.broadcast_to(a, (4)),
         # torch.broadcast_shapes(a),
         torch.bucketize(a, b),
         torch.cartesian_prod(a),
         torch.cdist(e, e),
         torch.clone(a),
         torch.combinations(a),
         torch.corrcoef(a),
         # torch.cov(a),
         torch.cross(e, e),
         torch.cummax(a, 0),
         torch.cummin(a, 0),
         torch.cumprod(a, 0),
         torch.cumsum(a, 0),
         torch.diag(a),
         torch.diag_embed(a),
         torch.diagflat(a),
         torch.diagonal(e),
         torch.diff(a),
         torch.einsum("iii", f),
         torch.flatten(a),
         torch.flip(e, dims),
         torch.fliplr(e),
         torch.flipud(e),
         torch.kron(a, b),
         torch.rot90(e),
         torch.gcd(c, c),
         torch.histc(a),
         torch.histogram(a),
         torch.meshgrid(a),
         torch.lcm(c, c),
         torch.logcumsumexp(a, 0),
         torch.ravel(a),
         torch.renorm(e, 1, 0, 5),
         torch.repeat_interleave(c),
         torch.roll(a, 1, 0),
         torch.searchsorted(a, b),
         torch.tensordot(e, e),
         torch.trace(e),
         torch.tril(e),
         torch.tril_indices(3, 3),
         torch.triu(e),
         torch.triu_indices(3, 3),
         torch.vander(a),
         torch.view_as_real(torch.randn(4, dtype=torch.cfloat)),
         torch.view_as_complex(torch.randn(4, 2)),
         torch.resolve_conj(a),
         torch.resolve_neg(a),
     )
Beispiel #5
0
 print(torch.argmax(mat1, 1))  # 按行
 print(torch.amin(mat1, 0))  # 按列
 print(torch.amin(mat1, 1))  # 按行
 print(torch.argmin(mat1))  # 所有元素
 print(torch.argmin(mat1, 0))  # 按列
 print(torch.argmin(mat1, 1))  # 按行
 print(torch.argsort(mat1, 0))  # 按列, returns the indices
 print(torch.argsort(mat1, 1))  # 按行
 print(torch.topk(mat1, 2))
 # print(torch.msort(mat1))  # 按行
 print(torch.kthvalue(mat1, 1, 0))
 print(torch.kthvalue(mat1, 1, 1))
 print(torch.logsumexp(mat1, 1))  # 按行
 """cum"""
 print("cum function:")
 print(torch.logcumsumexp(x, dim=0))  # log (sigma(exp(xi)))
 print(torch.cummax(x, dim=0))
 print(torch.cummin(x, dim=0))
 print(torch.cumprod(x, dim=0))
 print(torch.cumsum(x, dim=0))
 """vec <> vec"""
 a = torch.tensor([9.7, float('nan'), 3.1, float('nan')])
 b = torch.tensor([-2.2, 0.5, float('nan'), float('nan')])
 c = torch.tensor([9.7, 1, 3.1, 4])
 d = torch.tensor([1.7, 1.2, 3.1, 2])
 print(torch.maximum(a, b))
 print(torch.minimum(a, b))
 print(torch.fmod(a, 2))
 print(torch.dist(c, d, 1))  # p-norm
 print(torch.norm(c))
 print(torch.div(c, d))