def forward(self, attention_hidden_state, memory, processed_memory, attention_weights_cat, mask): prev_attention = attention_weights_cat[:, 0] if attention_weights_cat.sum() == 0: # first step attention_weights = torch.zeros_like(prev_attention) attention_weights[:, 0] = 1. else: alignment = super().get_alignment_energies(attention_hidden_state, processed_memory, attention_weights_cat) if self.training: # soft: alignment = alignment + self.gaussian_noise(alignment) if mask is not None: # fill inplace: alignment = alignment.data.masked_fill_( mask, self.score_mask_value) # p_select = self.sigmoid(alignment) log_p_select = self.logsigmoid(alignment) log_1_minus_p_select = self.logsigmoid(-alignment) log_cumprod_1_minus_p = torch.cumsum(log_1_minus_p_select, dim=1) # log_cumprod_1_minus_p = self.log_safe_cumprod(1 - p_select) log_attention_weights_prev = torch.log( torch.clamp(prev_attention, min=1e-10, max=1)) log_attention_weights = log_p_select + log_cumprod_1_minus_p + torch.logcumsumexp( log_attention_weights_prev - log_cumprod_1_minus_p, dim=1) attention_weights = torch.exp( torch.clamp(log_attention_weights, max=1)) else: # hard: above_threshold = (alignment > 0).float() # zero because sigmoid! p_select = above_threshold * torch.cumsum(prev_attention, dim=1) attention = p_select * self.exclusive_cumprod(1 - p_select) # Not attended => attend at last encoder output # Assume that encoder outputs are not padded (this is true on inference) attended = attention.sum(dim=1) for batch_i in range(attention_weights_cat.shape[0]): if not attended[batch_i]: attention[batch_i, -1] = 1 attention_weights = attention # apply attention: attention_context = torch.bmm(attention_weights.unsqueeze(1), memory) attention_context = attention_context.squeeze(1) return attention_context, attention_weights
def forward(self, log_h, y): log_h = log_h.flatten() durations, events = y.T # sort input durations, idx = durations.sort(descending=True) log_h = log_h[idx] events = events[idx] event_ind = events.nonzero().flatten() # numerator log_num = log_h[event_ind].mean() # logcumsumexp of events event_lcse = torch.logcumsumexp(log_h, dim=0)[event_ind] # number of events for each unique risk set _, tie_inverses, tie_count = torch.unique_consecutive( durations[event_ind], return_counts=True, return_inverse=True) # position of last event (lowest duration) of each unique risk set tie_pos = tie_count.cumsum(axis=0) - 1 # logcumsumexp by tie for each event event_tie_lcse = event_lcse[tie_pos][tie_inverses] if self.method == "breslow": log_den = event_tie_lcse.mean() elif self.method == "efron": # based on https://bydmitry.github.io/efron-tensorflow.html # logsumexp of ties, duplicated within tie set tie_lse = scatter_logsumexp(log_h[event_ind], tie_inverses, dim=0)[tie_inverses] # multiply (add in log space) with corrective factor aux = torch.ones_like(tie_inverses) aux[tie_pos[:-1] + 1] -= tie_count[:-1] event_id_in_tie = torch.cumsum(aux, dim=0) - 1 discounted_tie_lse = (tie_lse + torch.log(event_id_in_tie) - torch.log(tie_count[tie_inverses])) # denominator log_den = log_substract(event_tie_lcse, discounted_tie_lse).mean() # loss is negative log likelihood return log_den - log_num
def partial_ll_loss(lrisks, tb, eb, eps=1e-2): tb = tb + eps * np.random.random(len(tb)) sindex = np.argsort(-tb) tb = tb[sindex] eb = eb[sindex] lrisks = lrisks[sindex] # lrisks = tf.gather(lrisks, sindex) # lrisksdenom = tf.math.cumulative_logsumexp(lrisks) lrisksdenom = torch.logcumsumexp(lrisks, dim=0) plls = lrisks - lrisksdenom pll = plls[eb == 1] pll = torch.sum(pll) # pll = tf.reduce_sum(pll) return -pll
def other_ops(self): a = torch.randn(4) b = torch.randn(4) c = torch.randint(0, 8, (5, ), dtype=torch.int64) e = torch.randn(4, 3) f = torch.randn(4, 4, 4) size = [0, 1] dims = [0, 1] return ( torch.atleast_1d(a), torch.atleast_2d(a), torch.atleast_3d(a), torch.bincount(c), torch.block_diag(a), torch.broadcast_tensors(a), torch.broadcast_to(a, (4)), # torch.broadcast_shapes(a), torch.bucketize(a, b), torch.cartesian_prod(a), torch.cdist(e, e), torch.clone(a), torch.combinations(a), torch.corrcoef(a), # torch.cov(a), torch.cross(e, e), torch.cummax(a, 0), torch.cummin(a, 0), torch.cumprod(a, 0), torch.cumsum(a, 0), torch.diag(a), torch.diag_embed(a), torch.diagflat(a), torch.diagonal(e), torch.diff(a), torch.einsum("iii", f), torch.flatten(a), torch.flip(e, dims), torch.fliplr(e), torch.flipud(e), torch.kron(a, b), torch.rot90(e), torch.gcd(c, c), torch.histc(a), torch.histogram(a), torch.meshgrid(a), torch.lcm(c, c), torch.logcumsumexp(a, 0), torch.ravel(a), torch.renorm(e, 1, 0, 5), torch.repeat_interleave(c), torch.roll(a, 1, 0), torch.searchsorted(a, b), torch.tensordot(e, e), torch.trace(e), torch.tril(e), torch.tril_indices(3, 3), torch.triu(e), torch.triu_indices(3, 3), torch.vander(a), torch.view_as_real(torch.randn(4, dtype=torch.cfloat)), torch.view_as_complex(torch.randn(4, 2)), torch.resolve_conj(a), torch.resolve_neg(a), )
print(torch.argmax(mat1, 1)) # 按行 print(torch.amin(mat1, 0)) # 按列 print(torch.amin(mat1, 1)) # 按行 print(torch.argmin(mat1)) # 所有元素 print(torch.argmin(mat1, 0)) # 按列 print(torch.argmin(mat1, 1)) # 按行 print(torch.argsort(mat1, 0)) # 按列, returns the indices print(torch.argsort(mat1, 1)) # 按行 print(torch.topk(mat1, 2)) # print(torch.msort(mat1)) # 按行 print(torch.kthvalue(mat1, 1, 0)) print(torch.kthvalue(mat1, 1, 1)) print(torch.logsumexp(mat1, 1)) # 按行 """cum""" print("cum function:") print(torch.logcumsumexp(x, dim=0)) # log (sigma(exp(xi))) print(torch.cummax(x, dim=0)) print(torch.cummin(x, dim=0)) print(torch.cumprod(x, dim=0)) print(torch.cumsum(x, dim=0)) """vec <> vec""" a = torch.tensor([9.7, float('nan'), 3.1, float('nan')]) b = torch.tensor([-2.2, 0.5, float('nan'), float('nan')]) c = torch.tensor([9.7, 1, 3.1, 4]) d = torch.tensor([1.7, 1.2, 3.1, 2]) print(torch.maximum(a, b)) print(torch.minimum(a, b)) print(torch.fmod(a, 2)) print(torch.dist(c, d, 1)) # p-norm print(torch.norm(c)) print(torch.div(c, d))