def train_forward_decode(net: EdgeGroupingOnTransform, image, instance_masking, instance_edge, instance_num, edge, pool_edge, grouping_matrix, nearby_matrix, adjacent_matrix, i, memory, mock_output): mock_output = mock_output[:, 0:1 + i, :] out = net.decode(memory, None, mock_output, subsequent_mask(mock_output.size(1)).type_as(edge.data)) # 取出预测序列的最后一个元素 out = out[:, -1:, :] out = net.generator(out) output = torch.cat([mock_output, out], dim=1) if i == 783: gm = output[:, 1:, :].permute(0, 2, 1).reshape(grouping_matrix.shape) else: gm = None k = torch.zeros(instance_num.shape[0], 9).to(instance_num.device) for i, num in enumerate(instance_num): k[i, num] = 1 gm_l = balance_bce_loss(out, grouping_matrix[:, 1 + i:2 + i, :]) * 10 k_l = torch.tensor(0).to(image.device) gm_acc = torch.tensor(0).to(image.device) # topk_accuracy(gm, grouping_matrix, pool_edge, k=5) k_acc = torch.tensor(0).to(image.device) # k_accuracy(k, instance_num) return gm, k, gm_l, gm_acc, {'k_loss': k_l.detach().cpu(), 'gm_loss': gm_l.detach().cpu(), "top5_acc": gm_acc.detach().cpu(), 'k_acc': k_acc.detach().cpu()}
def val_forward(net: EdgeGroupingOnTransform, image, instance_masking, instance_edge, instance_num, edge, pool_edge, grouping_matrix, nearby_matrix, adjacent_matrix): src = chunk_image(edge, 8) src = src.reshape((pool_edge.shape[0], -1, 8 * 8)) # 添加开始占位符 src = torch.cat([torch.zeros(pool_edge.shape[0], 1, 64).to(src.device), src], dim=1) memory = net.encode(src, None) output = torch.zeros(pool_edge.shape[0], 1, 784, dtype=src.dtype, device=src.device) for i in range(784): out = net.decode(memory, None, output, subsequent_mask(output.size(1)).type_as(src.data)) # 取出预测序列的最后一个元素 out = out[:, -1:, :] out = net.generator(out) output = torch.cat([output, out], dim=1) print(f'val decode step {i + 1}/784', end='\r') # 交换维度,将channel放在第二位 gm = output[:, 1:, :].permute(0, 2, 1).reshape(grouping_matrix.shape) k = torch.zeros(instance_num.shape[0], 9, dtype=src.dtype, device=src.device) for i, num in enumerate(instance_num): k[i, num] = 1 gm_l = mask_bce_loss(gm, grouping_matrix, pool_edge) k_l = torch.tensor(0.).to(image.device) gm_acc = topk_accuracy(gm, grouping_matrix, pool_edge, k=5) k_acc = k_accuracy(k, instance_num) return gm, k, gm_l, gm_acc, {'k_loss': k_l.detach().cpu(), 'gm_loss': gm_l.detach().cpu(), "top5_acc": gm_acc.detach().cpu(), 'k_acc': k_acc.detach().cpu()}
def greedy_decode(model, src, src_mask, max_len, start_symbol): memory = model.encode(src, src_mask) ys = torch.ones(1, 1).fill_(start_symbol).type_as(src.data) for i in range(max_len - 1): out = model.decode( memory, src_mask, Variable(ys), Variable(subsequent_mask(ys.size(1)).type_as(src.data))) prob = model.generator(out[:, -1]) _, next_word = torch.max(prob, dim=1) next_word = next_word.data[0] ys = torch.cat( [ys, torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=1) return ys
def run(model: EncoderDecoder, src, max_len): memory = model.encode(src, None) print("memory size:", memory.shape) output = torch.zeros(1, 1, 784).type_as(src.data) for i in range(max_len - 1): print("decode pos:", i) out = model.decode(memory, None, output, subsequent_mask(output.size(1)).type_as(src.data)) # 取出预测序列的最后一个元素 out = out[:, -1:, :] out = model.generator(out) output = torch.cat([output, out], dim=1) return output
def make_std_mask(tgt, pad): tgt_mask = (tgt != pad).unsqueeze(-1) tgt_mask = tgt_mask &\ Variable(subsequent_mask(tgt.size(-1)).type_as(tgt_mask.data)) return tgt_mask
def make_std_mask(tgt, pad): "Create a mask to hide padding and future words." tgt_mask = (tgt != pad).unsqueeze(-2) tgt_mask = tgt_mask & Variable( subsequent_mask(tgt.size(-1)).type_as(tgt_mask.data)) return tgt_mask