def test_logsoftmax(): x = torch.tensor([[0.0, 1.0, 2.0], [0.0, 2.0, 4.0]]) print('x: ', x) print('F.logsoftmax(x, dim=0): ', F.log_softmax(x, dim=0)) x_ibp = ibp.IntervalBoundedTensor(x, x, x) ls = ibp.log_softmax(x_ibp, dim=0) print('ibp.log_softmax(x, lb=x, ub=x): ', ls.val, ls.lb, ls.ub) lb = x - torch.tensor(0.1) ub = x + torch.tensor(0.1) print('lb: ', lb) print('ub: ', lb) x_ibp = ibp.IntervalBoundedTensor(x, lb, ub) ls = ibp.log_softmax(x_ibp, dim=0) print('ibp.log_softmax(x, lb, ub): ', ls.val, ls.lb, ls.ub)
def attend_on(self, source, target, attention): """ Args: - source: (bXsXe) - target: (bXtXe) - attention: (bXtXs) """ attention_logsoftmax = ibp.log_softmax(attention, 1) attention_normalized = ibp.activation(torch.exp, attention_logsoftmax) attended_target = ibp.matmul_nneg(attention_normalized, source) # (bXtXe) return ibp.cat([target, attended_target], dim=-1)
def attention_pool(x, mask, layer): """Attention pooling Args: x: batch of inputs, shape (B, n, h) mask: binary mask, shape (B, n) layer: Linear layer mapping h -> 1 Returns: pooled version of x, shape (B, h) """ attn_raw = layer(x).squeeze(2) # B, n, 1 -> B, n attn_raw = ibp.add(attn_raw, (1 - mask) * -1e20) attn_logsoftmax = ibp.log_softmax(attn_raw, 1) attn_probs = ibp.activation(torch.exp, attn_logsoftmax) # B, n return ibp.bmm(attn_probs.unsqueeze(1), x).squeeze(1) # B, 1, n x B, n, h -> B, h