def _add_bond(molecule): if molecule.num_atom < 2: return molecule temp_expand_adj = molecule.expand_mat temp_adj = molecule.adj mask_row = mask(temp_expand_adj) goal_mol = None goal_smiles = None for i in mask_row: row = temp_adj[i] for j in range(len(row)): if row[j] > 0 and j in mask_row: temp_adj[i][j] += 1 temp_adj[j][i] += 1 goal_adj = temp_adj goal_node_list = molecule.node_list goal_mol = adj2mol(goal_node_list, goal_adj.astype(int), deconfig.possible_bonds) goal_smiles = Chem.MolToSmiles(goal_mol) break if goal_mol != None: break if goal_mol != None: return Molecule(goal_smiles, deconfig) else: return molecule
def compute_loss(self, batch: DSTBatchData, ontology): loss = None for act_slot, (ont_idx, ont) in ontology.items(): as_idx = torch.tensor(self.vocabs.speaker_state["user"] .act_slot[act_slot]).to(self.device) ont_idx, ont = ont_idx.to(self.device), ont.to(self.device) logit = self.model( as_idx, *batch.sent.tensors, *self.prepare_system_acts(batch.system_acts), *ont.tensors ) # ont_idx: [num_ont] -> [batch_size x num_ont x state_lat] # s: [batch_size x state_lat] -> # [batch_size x num_ont x state_lat] # target: [batch_size x num_ont] s = batch.belief_state target = \ ((ont_idx.unsqueeze(0).unsqueeze(-1) == s.value.unsqueeze(1)) .masked_fill(~utils.mask(s.lens).unsqueeze(1), 0).any(-1)) current_loss = self._bce(logit, target.float()) if self.loss == "mean": current_loss = current_loss.mean(-1) elif self.loss == "sum": current_loss = current_loss.sum(-1) else: raise ValueError(f"unsupported loss method: {self.loss}") if loss is None: loss = current_loss else: loss += current_loss return loss
def _add_atom(molecule): if molecule.num_atom < 1: return molecule temp_node_list = molecule.node_list # print(len(temp_node_list)) temp_expand_adj = molecule.expand_mat # print(temp_expand_adj.shape[0]) temp_adj = molecule.adj temp_elements = deconfig.temp_elements atom_index = np.random.choice(deconfig.length_elements, 1)[0] atom = temp_elements[atom_index] mask_row = mask(temp_expand_adj) if len(mask_row) < 1: return molecule mask_index = np.random.choice(mask_row, 1)[0] goal_length = molecule.num_atom + 1 goal_adj = np.zeros([goal_length, goal_length]) goal_adj[:goal_length - 1, :goal_length - 1] = temp_adj goal_adj[goal_length - 1, mask_index] = goal_adj[mask_index, goal_length - 1] = 1 temp_node_list.append(atom) goal_node_list = temp_node_list goal_mol = adj2mol(goal_node_list, goal_adj.astype(int), deconfig.possible_bonds) goal_smiles = Chem.MolToSmiles(goal_mol) return Molecule(goal_smiles, deconfig)
def add_boxes(bboxes, ignore_matrix): """ Creates tmp_score and tmp_detect arrays. bboxes: list of bounding boxes and scores [x1, y1, x2, y2, score] ignore_matrix: Boolean mask of region to ignore for boxes. """ h, w = ignore_matrix.shape tmp_score = np.zeros((h, w)) tmp_detect = np.zeros((h, w), dtype=bool) for x1, y1, x2, y2, score in bboxes: # for each box x1, y1, x2, y2 = map(int, (x1, y1, x2, y2)) tmp_score[y1:y2, x1:x2] = np.maximum(score, tmp_score[y1:y2, x1:x2]) # add box tmp_detect[y1:y2, x1:x2] = True tmp_score = utils.mask(tmp_score, ignore_matrix) # get rid of stuff in ignore regions tmp_detect &= ignore_matrix return tmp_score, tmp_detect
def test_turn_state_encoder_decoder(): dataset = create_dummy_dataset() vocabs = list(dataset.vocabs.turn.slot_values.values()) encoder = GenericStateEncoder( vocabs=vocabs, output_dim=100, label_encoder=functools.partial( EmbeddingLabelEncoder ), label_layer=feedforward.MultiLayerFeedForward, label_pooling=pooling.SumPooling, state_pooling=pooling.MaxPooling, output_layer=feedforward.MultiLayerFeedForward ) decoder = GenericStateDecoder( input_dim=100, vocabs=vocabs, input_layer=feedforward.MultiLayerFeedForward, output_layer=feedforward.MultiLayerFeedForward, label_emb=EmbeddingLabelEncoder ) encoder.reset_parameters() decoder.reset_parameters() encoder.train(), decoder.train() params = [p for p in encoder.parameters() if p.requires_grad] params += [p for p in decoder.parameters() if p.requires_grad] optimizer = op.Adam(params) bce = nn.BCEWithLogitsLoss(reduction="none") vocab_lens = torch.LongTensor(list(map(len, vocabs))) x_sparse = torch.randint(0, 2, (4, len(vocab_lens), max(vocab_lens))).byte() x_sparse = x_sparse.masked_fill(~utils.mask(vocab_lens), 0) x, lens = utils.to_sparse(x_sparse) x_sparse = x_sparse.masked_fill(~utils.mask(vocab_lens), -1) lens = torch.randint(0, 3, (4, len(encoder.vocabs))) + 1 for i in range(100): logits = decoder(encoder(x, lens)) loss = bce(logits, x_sparse.float()) loss = loss.masked_fill(~utils.mask(vocab_lens), 0).sum() print(i, loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() encoder.eval(), decoder.eval() logits = decoder(encoder(x, lens)) x_pred = torch.sigmoid(logits) > 0.5 x_pred = x_pred.masked_fill(~utils.mask(vocab_lens), -1) assert (x_pred == x_sparse).all().item()
def compute(self, batch: BatchData, outputs, step: int = None) -> Tuple[torch.Tensor, utils.TensorMap]: step = step or 0 logit, post, prior = outputs batch_size = batch.batch_size max_conv_len = batch.max_conv_len max_sent_len = batch.max_sent_len w_logit, zsent_post, zsent_prior = \ logit["sent"], post["sent"], prior["sent"] conv_lens, sent_lens = batch.conv_lens, batch.sent.lens1 conv_mask = utils.mask(conv_lens, max_conv_len) sent_lens = sent_lens.masked_fill(~conv_mask, 0) sent_mask = utils.mask(sent_lens, max_sent_len) kld_sent = zsent_post.kl_div(zsent_prior).masked_fill(~conv_mask, 0) w_target = (batch.sent.value.masked_fill(~sent_mask, -1).view(-1, max_sent_len))[..., 1:] sent_loss = self._ce( w_logit[:, :, :-1].contiguous().view(-1, len(self.vocabs.word)), w_target.contiguous().view(-1)).view(batch_size, max_conv_len, -1).sum(-1) kld_weight = self.kld_weight.get(step) loss_kld = kld_sent.sum(-1) loss_recon = sent_loss.sum(-1) nll = loss_recon + loss_kld if self.enable_kl: loss = loss_recon + kld_weight * loss_kld else: loss = loss_recon stats = { "nll": nll.mean(), "loss": loss.mean(), "loss-recon": loss_recon.mean(), "loss-sent": sent_loss.sum(-1).mean(), "loss-sent-turn": sent_loss.sum() / conv_lens.sum(), "loss-sent-word": sent_loss.sum() / sent_lens.sum(), "ppl-turn": (sent_loss.sum() / conv_lens.sum()).exp(), "ppl-word": (sent_loss.sum() / sent_lens.sum()).exp(), "kld-weight": torch.tensor(kld_weight), "kld-sent": kld_sent.sum(-1).mean(), "kld-sent-turn": kld_sent.sum() / conv_lens.sum(), "kld": loss_kld.mean() } return loss.mean(), stats
def forward(self, slot, x, lens): act_slot_tokens = self.act_slot_tensor[slot, :self.act_slot_lens[slot]] slot_emb, _, _ = self.act_slot_rnn(act_slot_tokens.unsqueeze(0)) slot_emb = slot_emb.squeeze(0).max(0)[0].unsqueeze(0).unsqueeze(0) slot_emb = slot_emb.expand(x.size(0), x.size(1), -1) h, _, _ = self.base_rnn(torch.cat([x, slot_emb], -1), lens) mask = ~utils.mask(lens, h.size(1)).unsqueeze(-1) c = self.dropout_layer(h).masked_fill(mask, float("-inf")).max(1)[0] return h, c
def compute(self, batch: BatchData, outputs, step: int = None) -> Tuple[torch.Tensor, utils.TensorMap]: step = step or 0 logit, post, prior = outputs batch_size = batch.batch_size max_conv_len = batch.max_conv_len s_logit, zstate_post, zstate_prior = \ logit["state"], post["state"], prior["state"] conv_lens, sent_lens = batch.conv_lens, batch.sent.lens1 conv_mask = utils.mask(conv_lens, max_conv_len) state_logit_mask = \ (((s_logit != float("-inf")) & (s_logit != float("inf"))) .masked_fill(~conv_mask.unsqueeze(-1), 0)) kld_state = zstate_post.kl_div(zstate_prior).masked_fill(~conv_mask, 0) s_target = utils.to_dense(idx=batch.state.value, lens=batch.state.lens1, max_size=self.num_asv) p_target = batch.speaker.value.masked_fill(~conv_mask, -1) state_loss = (self._bce(s_logit, s_target.float()).masked_fill( ~state_logit_mask, 0)).sum(-1) kld_weight = self.kld_weight.get(step) nll = state_loss + kld_state loss = state_loss + kld_weight * kld_state state_mi = \ (estimate_mi(zstate_post.view(batch_size * max_conv_len, -1)) .view(batch_size, max_conv_len).masked_fill(~conv_mask, 0).sum(-1)) stats = { "nll": nll.mean(), "state-mi": state_mi.mean(), "loss-state": state_loss.sum(-1).mean(), "loss-state-turn": state_loss.sum() / conv_lens.sum(), "loss-state-asv": state_loss.sum() / state_logit_mask.sum(), "kld-weight": torch.tensor(kld_weight), "kld-state": kld_state.sum(-1).mean(), "kld-state-turn": kld_state.sum() / conv_lens.sum(), "kld": kld_state.sum(-1).mean() } for spkr_idx, spkr in self.vocabs.speaker.i2f.items(): if spkr == "<unk>": continue spkr_mask = p_target == spkr_idx spkr_state_mask = \ state_logit_mask.masked_fill(~spkr_mask.unsqueeze(-1), 0) spkr_state_loss = state_loss.masked_fill(~spkr_mask, 0).sum() spkr_kld_state = kld_state.masked_fill(~spkr_mask, 0).sum() spkr_stats = { "loss-state": spkr_state_loss / batch_size, "loss-state-turn": spkr_state_loss / spkr_mask.sum(), "loss-state-asv": spkr_state_loss / spkr_state_mask.sum(), "kld-state": spkr_kld_state / batch_size, "kld-state-turn": spkr_kld_state / spkr_mask.sum(), } stats.update({f"{k}-{spkr}": v for k, v in spkr_stats.items()}) return loss.mean(), stats
def estimate_mi(z: MultiGaussian, z_samples=None, lens=None): zdim = z.mu.size(-1) size = z.mu.size() z = z.view(-1, zdim) if z_samples is None: z_samples = z.sample() ret = z.log_prob(z_samples) - estimate_agg_posterior(z, z_samples) if len(size) == 3: return (ret.view(*size[:-1]) .masked_fill(~utils.mask(lens, size[1]), 0).sum(-1)) else: return ret.view(*size[:-1])
def forward(self, x, lens=None, h=None): batch_size, seq_len = x.size(0), x.size(1) x = x.permute(1, 0, 2) if lens is not None: x = rnn.pack_padded_sequence(x, lens, enforce_sorted=False) if h is not None: h = self.unpack_state(h) o, h_prime = self.rnn(x, h) if lens is not None: o, _ = rnn.pad_packed_sequence(o, total_length=seq_len) o = o.permute(1, 0, 2) if lens is not None: o = o.masked_fill(~utils.mask(lens, seq_len).unsqueeze(-1), 0) return o, h_prime[0][-1], self.pack_state(*h_prime)
def forward(self, h, lens=None): """ Arguments: h: [batch_size x N x dim] Tensor lens (optional): [batch_size] LongTensor Returns: o: [batch_size x dim] Tensor """ a = self.linear2(self.tanh(self.linear1(h))).permute(0, 2, 1) if lens is not None: mask = ~utils.mask(lens, h.size(1)) mask[lens == 0] = 0 a = a.masked_fill(mask.unsqueeze(1), float("-inf")) o = torch.bmm(torch.softmax(a, 2), h) if lens is not None and (lens == 0).any().item(): o[lens == 0] = 0 return o
def reconstruction_loss(self, images, input, size_average=True): """ Implement section 4.1 'Reconstruction as a regularization method' in the paper. Implement Decoder structure in Figure 2 to reconstruct a digit from the DigitCaps layer representation. Based on naturomics's implementation. """ """ First, do masking. """ # Method 1: mask with y. # Note: we have not implement method 2 which is masking with true label. masked_caps = utils.mask(input, self.cuda_enabled) """ Second, reconstruct the images with 3 Fully Connected layers. """ vector_j = masked_caps.view(input.size(0), -1) # reshape the masked_caps tensor fc1_out = self.relu(self.fc1(vector_j)) fc2_out = self.relu(self.fc2(fc1_out)) decoded = self.sigmoid(self.fc3(fc2_out)) recon_img = decoded.view(-1, self.image_channel, self.image_height, self.image_width) """ Save reconstructed images. """ utils.save_image(recon_img, 'results/reconstructed_images.png') """ Calculate reconstruction loss. """ # Minimize the sum of squared differences between the # reconstructed image (outputs of the logistic units) and the input image (origin). error = (recon_img - images).view(recon_img.size(0), -1) squared = error**2 recon_error = torch.sum(squared, dim=1) # Mean squared error if size_average: recon_error = recon_error.mean() return recon_error
def attend(h, q, lens=None): """ Arguments: h: [batch_size x N x dim] FloatTensor q: [batch_size x M x dim] FloatTensor lens (optional): [batch_size] LongTensor Returns: [batch_size x M x dim] FloatTensor """ a = torch.bmm(h, q.permute(0, 2, 1)).permute(0, 2, 1) if lens is not None: mask = ~utils.mask(lens, h.size(1)) mask[lens == 0] = 0 a = a.masked_fill(mask.unsqueeze(1), float("-inf")) o = torch.bmm(torch.softmax(a, -1), h) if lens is not None and (lens == 0).any().item(): o[lens == 0] = 0 return o
def forward(self, h, q, lens=None): """ Arguments: h: [batch_size x N x hidden_dim] Tensor q: [batch_size x N x input_dim] Tensor lens (optional): [batch_size] LongTensor Returns: o: [batch_size x hidden_dim] Tensor """ a = self.linear(q).squeeze(-1) if lens is not None: mask = ~utils.mask(lens, h.size(1)) mask[lens == 0] = 0 a = a.masked_fill(mask, float("-inf")) o = torch.bmm(torch.softmax(a, -1).unsqueeze(1), h).squeeze(1) if lens is not None and (lens == 0).any().item(): o[lens == 0] = 0 return o
def forward(self, x, target): """ We send the outputs of the `DigitCaps` layer, which is a [batch_size, 10, 16] size tensor into the Decoder network, and reconstruct a [batch_size, fc3_output_size] size tensor representing the image. Args: x: [batch_size, 10, 16] The output of the digit capsule. target: [batch_size, 10] One-hot MNIST dataset labels. Returns: reconstruction: [batch_size, fc3_output_size] Tensor of reconstructed images. """ batch_size = target.size(0) """ First, do masking. """ # Method 1: mask with y. # Note: we have not implement method 2 which is masking with true label. # masked_caps shape: [batch_size, 10, 16, 1] masked_caps = utils.mask(x, self.cuda_enabled) """ Second, reconstruct the images with 3 Fully Connected layers. """ # vector_j shape: [batch_size, 160=10*16] vector_j = masked_caps.view(x.size(0), -1) # reshape the masked_caps tensor # Forward pass of the network fc1_out = self.relu(self.fc1(vector_j)) fc2_out = self.relu(self.fc2(fc1_out)) # shape: [batch_size, 1024] reconstruction = self.sigmoid( self.fc3(fc2_out)) # shape: [batch_size, fc3_output_size] assert reconstruction.size() == torch.Size( [batch_size, self.fc3_output_size]) return reconstruction
def compute_accuracy(self, pred: DoublyStacked1DTensor, gold: DoublyStacked1DTensor, turn_mask=None) -> utils.TensorMap: batch_size = pred.size(0) pred_dense = utils.to_dense(pred.value, pred.lens1, max_size=len(self.vocabs.goal_state.asv)) gold_dense = utils.to_dense(gold.value, gold.lens1, max_size=len(self.vocabs.goal_state.asv)) crt = (pred_dense == gold_dense).all(-1) conv_mask = utils.mask(pred.lens, pred.size(1)) if turn_mask is None: turn_mask = torch.ones_like(conv_mask).bool() turn_mask = turn_mask & conv_mask crt = crt & turn_mask num_turns = turn_mask.sum() stats = { "acc": (crt | ~turn_mask).all(-1).sum().float() / batch_size, "acc-turn": crt.sum().float() / num_turns, } return stats
def forward(self, h, x, lens=None): batch_size, seq_len = h.size(0), x.size(1) h, c = self.encode_hidden_state(h) h = h.view(batch_size, -1, self.hidden_dim) c = c.view(batch_size, -1, self.hidden_dim) h, c = h.permute(1, 0, 2).contiguous(), c.permute(1, 0, 2).contiguous() x = x.permute(1, 0, 2) nil_mask = None if lens is not None: nil_mask = lens == 0 if not nil_mask.any().item(): nil_mask = None if nil_mask is not None: lens[nil_mask] = 1 x = rnn.pack_padded_sequence(x, lens, enforce_sorted=False) o, _ = self.lstm(x, (h, c)) if lens is not None: o, _ = rnn.pad_packed_sequence(o, total_length=seq_len) o = o.permute(1, 0, 2) if lens is not None: if nil_mask is not None: lens[nil_mask] = 0 o = o.masked_fill(~utils.mask(lens).unsqueeze(-1), 0) return o
def get_tmp(self, tmp, size): return utils.mask(self.tmps[tmp], size)
def get_reg(self, reg, size): if reg in self.out_regs: val = utils.mask(self.out_regs[reg], size) return utils.mask(self.out_regs[reg], size) return utils.mask(self.in_regs[reg], size)
def test_jda(create_fn=create_vhda, gen_fn=vhda_gen): dataset = create_dummy_dataset() dataloader = create_dataloader( dataset, batch_size=2, ) model = create_fn(dataset) optimizer = op.Adam(p for p in model.parameters() if p.requires_grad) ce = nn.CrossEntropyLoss(ignore_index=-1, reduction="none") bce = nn.BCEWithLogitsLoss(reduction="none") model.reset_parameters() for eidx in range(300): model.train() for i, batch in enumerate(dataloader): batch: BatchData = batch optimizer.zero_grad() model.inference() w, p = batch.word, batch.speaker g, g_lens = batch.goal, batch.goal_lens s, s_lens = batch.turn, batch.turn_lens sent_lens, conv_lens = batch.sent_lens, batch.conv_lens batch_size, max_conv_len, max_sent_len = w.size() w_logit, p_logit, g_logit, s_logit, info = model(batch.to_dict()) w_target = w.masked_fill(~utils.mask(conv_lens).unsqueeze(-1), -1) w_target = w_target.view(-1, max_sent_len).masked_fill( ~utils.mask(sent_lens.view(-1)), -1 ).view(batch_size, max_conv_len, -1) recon_loss = ce( w_logit[:, :, :-1].contiguous().view(-1, w_logit.size(-1)), w_target[:, :, 1:].contiguous().view(-1) ).view(batch_size, max_conv_len, max_sent_len - 1).sum(-1).sum(-1) goal_loss = bce( g_logit, utils.to_dense(g, g_lens, g_logit.size(-1)).float() ) goal_loss = (goal_loss.masked_fill(~utils.mask(conv_lens) .unsqueeze(-1).unsqueeze(-1), 0) .sum(-1).sum(-1).sum(-1)) turn_loss = bce( s_logit, utils.to_dense(s, s_lens, s_logit.size(-1)).float() ) turn_loss = (turn_loss.masked_fill(~utils.mask(conv_lens) .unsqueeze(-1).unsqueeze(-1), 0) .sum(-1).sum(-1).sum(-1)) speaker_loss = ce( p_logit.view(-1, p_logit.size(-1)), p.masked_fill(~utils.mask(conv_lens), -1).view(-1) ).view(batch_size, max_conv_len).sum(-1) kld_loss = sum(v for k, v in info.items() if k in {"sent", "conv", "speaker", "goal", "turn"}) loss = (recon_loss + goal_loss + turn_loss + speaker_loss + kld_loss * min(0.3, max(0.01, i / 500))) print(f"[e{eidx + 1}] " f"loss={loss.mean().item(): 4.4f} " f"recon={recon_loss.mean().item(): 4.4f} " f"goal={goal_loss.mean().item(): 4.4f} " f"turn={turn_loss.mean().item(): 4.4f} " f"speaker={speaker_loss.mean().item(): 4.4f} " f"kld={kld_loss.mean().item(): 4.4f}") loss.mean().backward() optimizer.step() model.eval() model.genconv_post() batch_gen, info = gen_fn(model)(batch.to_dict()) print("Input: ") print(f"{dataset.processor.lexicalize(batch[0])}") print() print(f"Predicted (prob={info['logprob'][0].exp().item():.4f}): ") print(f"{dataset.processor.lexicalize(batch_gen[0])}") model.eval() model.genconv_post() for batch in dataloader: batch_gen, logprobs = gen_fn(model)(batch.to_dict()) for x, y in zip(map(dataset.processor.lexicalize, batch), map(dataset.processor.lexicalize, batch_gen)): assert x == y, f"{x}\n!=\n{y}"
def Iop_CmpNE64(self, left, right): return 1 if utils.mask(left, 64) != utils.mask(right, 64) else 0
def compute(self, batch: BatchData, outputs, step: int = None) -> Tuple[torch.Tensor, utils.TensorMap]: logit, post, prior = outputs batch_size = batch.batch_size max_conv_len = batch.max_conv_len max_sent_len = batch.max_sent_len w_logit, p_logit, g_logit, s_logit = \ (logit[k] for k in ("sent", "speaker", "goal", "state")) conv_lens, sent_lens = batch.conv_lens, batch.sent.lens1 conv_mask = utils.mask(conv_lens, max_conv_len) sent_lens = sent_lens.masked_fill(~conv_mask, 0) sent_mask = utils.mask(sent_lens, max_sent_len) goal_logit_mask = (((g_logit != float("-inf")) & (g_logit != float("inf"))).masked_fill( ~conv_mask.unsqueeze(-1), 0)) state_logit_mask = \ (((s_logit != float("-inf")) & (s_logit != float("inf"))) .masked_fill(~conv_mask.unsqueeze(-1), 0)) w_target = (batch.sent.value.masked_fill(~sent_mask, -1).view(-1, max_sent_len))[..., 1:] g_target = utils.to_dense(idx=batch.goal.value, lens=batch.goal.lens1, max_size=self.num_asv) s_target = utils.to_dense(idx=batch.state.value, lens=batch.state.lens1, max_size=self.num_asv) p_target = batch.speaker.value.masked_fill(~conv_mask, -1) goal_loss = (self._bce(g_logit, g_target.float()).masked_fill( ~goal_logit_mask, 0)).sum(-1) state_loss = (self._bce(s_logit, s_target.float()).masked_fill( ~state_logit_mask, 0)).sum(-1) spkr_loss = self._ce(p_logit.view(-1, self.vocabs.num_speakers), p_target.view(-1)).view(batch_size, max_conv_len) sent_loss = self._ce( w_logit[:, :, :-1].contiguous().view(-1, len(self.vocabs.word)), w_target.contiguous().view(-1)).view(batch_size, max_conv_len, -1).sum(-1) loss_recon = (sent_loss.sum(-1) + state_loss.sum(-1) + goal_loss.sum(-1) + spkr_loss.sum(-1)) loss = nll = loss_recon stats = { "nll": nll.mean(), "loss": loss.mean(), "loss-recon": loss_recon.mean(), "loss-sent": sent_loss.sum(-1).mean(), "loss-sent-turn": sent_loss.sum() / conv_lens.sum(), "loss-sent-word": sent_loss.sum() / sent_lens.sum(), "ppl-turn": (sent_loss.sum() / conv_lens.sum()).exp(), "ppl-word": (sent_loss.sum() / sent_lens.sum()).exp(), "loss-goal": goal_loss.sum(-1).mean(), "loss-goal-turn": goal_loss.sum() / conv_lens.sum(), "loss-goal-asv": goal_loss.sum() / goal_logit_mask.sum(), "loss-state": state_loss.sum(-1).mean(), "loss-state-turn": state_loss.sum() / conv_lens.sum(), "loss-state-asv": state_loss.sum() / state_logit_mask.sum(), "loss-spkr": spkr_loss.sum(-1).mean(), "loss-spkr-turn": spkr_loss.sum() / conv_lens.sum() } for spkr_idx, spkr in self.vocabs.speaker.i2f.items(): if spkr == "<unk>": continue spkr_mask = p_target == spkr_idx spkr_sent_lens = sent_lens.masked_fill(~spkr_mask, 0) spkr_goal_mask = \ goal_logit_mask.masked_fill(~spkr_mask.unsqueeze(-1), 0) spkr_state_mask = \ state_logit_mask.masked_fill(~spkr_mask.unsqueeze(-1), 0) spkr_sent_loss = sent_loss.masked_fill(~spkr_mask, 0).sum() spkr_goal_loss = goal_loss.masked_fill(~spkr_mask, 0).sum() spkr_state_loss = state_loss.masked_fill(~spkr_mask, 0).sum() spkr_spkr_loss = spkr_loss.masked_fill(~spkr_mask, 0).sum() spkr_stats = { "loss-sent": spkr_sent_loss / batch_size, "loss-sent-turn": spkr_sent_loss / spkr_mask.sum(), "loss-sent-word": spkr_sent_loss / spkr_sent_lens.sum(), "ppl-turn": (spkr_sent_loss / spkr_mask.sum()).exp(), "ppl-word": (spkr_sent_loss / spkr_sent_lens.sum()).exp(), "loss-goal": spkr_goal_loss / batch_size, "loss-goal-turn": spkr_goal_loss / spkr_mask.sum(), "loss-goal-asv": spkr_goal_loss / spkr_goal_mask.sum(), "loss-state": spkr_state_loss / batch_size, "loss-state-turn": spkr_state_loss / spkr_mask.sum(), "loss-state-asv": spkr_state_loss / spkr_state_mask.sum(), "loss-spkr": spkr_spkr_loss / batch_size, "loss-spkr-turn": spkr_spkr_loss / spkr_mask.sum() } stats.update({f"{k}-{spkr}": v for k, v in spkr_stats.items()}) return loss.mean(), stats
def compute(self, batch: BatchData, outputs, step: int = None ) -> Tuple[torch.Tensor, utils.TensorMap]: step = step or 0 logit, post, prior = outputs batch_size = batch.batch_size max_conv_len = batch.max_conv_len max_sent_len = batch.max_sent_len w_logit, p_logit, g_logit, s_logit = \ (logit[k] for k in ("sent", "speaker", "goal", "state")) zconv_post, zsent_post = (post[k] for k in ("conv", "sent")) zconv_prior, zsent_prior = (prior[k] for k in ("conv", "sent")) conv_lens, sent_lens = batch.conv_lens, batch.sent.lens1 conv_mask = utils.mask(conv_lens, max_conv_len) sent_lens = sent_lens.masked_fill(~conv_mask, 0) sent_mask = utils.mask(sent_lens, max_sent_len) kld_conv = zconv_post.kl_div() kld_sent = zsent_post.kl_div(zsent_prior).masked_fill(~conv_mask, 0) w_target = (batch.sent.value .masked_fill(~sent_mask, -1) .view(-1, max_sent_len))[..., 1:] sent_loss = self._ce( w_logit[:, :, :-1].contiguous().view(-1, len(self.vocabs.word)), w_target.contiguous().view(-1) ).view(batch_size, max_conv_len, -1).sum(-1) kld_weight = self.kld_weight.get(step) loss_kld = kld_sent.sum(-1) + kld_conv loss_recon = sent_loss.sum(-1) nll = loss_recon + loss_kld conv_mi = estimate_mi(zconv_post) sent_mi = \ (estimate_mi(zsent_post.view(batch_size * max_conv_len, -1)) .view(batch_size, max_conv_len).masked_fill(~conv_mask, 0).sum(-1)) if self.enable_kl: if self.kl_mode == "kl": loss = loss_recon + kld_weight * loss_kld elif self.kl_mode == "kl-mi": loss = loss_recon + kld_weight * (loss_kld - conv_mi) elif self.kl_mode == "kl-mi+": loss = loss_recon + kld_weight * (loss_kld - conv_mi - sent_mi) else: raise ValueError(f"unexpected kl mode: {self.kl_mode}") else: loss = loss_recon stats = { "nll": nll.mean(), "conv-mi": conv_mi.mean(), "sent-mi": sent_mi.mean(), "loss": loss.mean(), "loss-recon": loss_recon.mean(), "loss-sent": sent_loss.sum(-1).mean(), "loss-sent-turn": sent_loss.sum() / conv_lens.sum(), "loss-sent-word": sent_loss.sum() / sent_lens.sum(), "ppl-turn": (sent_loss.sum() / conv_lens.sum()).exp(), "ppl-word": (sent_loss.sum() / sent_lens.sum()).exp(), "kld-weight": torch.tensor(kld_weight), "kld-sent": kld_sent.sum(-1).mean(), "kld-sent-turn": kld_sent.sum() / conv_lens.sum(), "kld-conv": kld_conv.sum(-1).mean(), "kld": loss_kld.mean() } return loss.mean(), stats
def Iop_64to32(self, argument): return utils.mask(argument, 32)
def Iop_Shr8(self, left, right): return utils.mask(left >> right, 8)
def Iop_CmpNE64(self, left, right): return 1 if utils.mask(left, 64) != utils.mask(right, 64) else 0 def Iop_CmpNE32(self, left, right): return 1 if utils.mask(left, 32) != utils.mask(right, 32) else 0
def Ico_U32(self, constant): return utils.mask(constant.value, 32)
def get_anomalies_sequential(video_reader, reid_model_path, fbf_results_dict, static_results_dict, ignore_matrix_gen=None, reid_model_name="resnet50", start_frame=1, frame_interval=20, abnormal_duration_thresh=60, detect_thresh=5, undetect_thresh=8, score_thresh=0.3, light_thresh=0.8, anomaly_score_thresh=0.7, similarity_thresh=0.95, suspicious_time_thresh=18, verbose=False, anomaly_nms_thresh=0.8): """ Performs the anomaly detection. Sequential version video_reader: VideoReader object for raw video reid_model_path: path to re-ID model checkpoint fbf_results_dict: ResultsDict object for frame-by-frame/raw video detection results static_results_dict: ResultsDict object for static/background detection results ignore_matrix_gen: generator yielding ignore matrix, must have the same interval as frame_interval. Or single numpy array, or path to .npy file. reid_model_name: backbone used for reid model start_frame: video frame to start from frame_interval: interval between frames to do calculations on abnormal_duration_thresh: duration (in seconds) to consider an object abnormal detect_thresh: duration (in frames) to consider an object for tracking undetect_thresh: duration (in frames) to stop considering an object for tracking score_thresh: detection score threshold for bounding boxes light_thresh: brightness threshold (not sure what it does) anomaly_score_thresh: threshold to consider an object an anomaly similarity_thresh: threshold for object re-ID suspicious_time_thresh: duration (in seconds) for an object to be considered suspicious verbose: verbose printing anomaly_nms_thresh: IoU threshold for anomaly NMS. """ def get_ignore_gen(ign_matrix): """ Handles different inputs for ignore matrix :param ign_matrix: :return: """ if isinstance(ign_matrix, types.GeneratorType): return ign_matrix # load/create matrix if ign_matrix is None: matrix = np.ones((h, w), dtype=bool) # Dont ignore anything elif type(ign_matrix) == str: # filename matrix = np.load(ign_matrix).astype(bool) else: raise TypeError("Invalid ignore matrix type:", type(ign_matrix)) return (matrix for _ in iter(int, 1)) # infinite generator # Get video data num_frames, framerate, image_shape = video_reader.nframes, video_reader.framerate, video_reader.img_shape # load model reid_model = ReidExtractor(reid_model_name, reid_model_path) # Set up information matrices h, w, _ = image_shape ignore_matrix_gen = get_ignore_gen(ignore_matrix_gen) detect_count_matrix = np.zeros((h, w)) undetect_count_matrix = np.zeros((h, w)) start_time_matrix = np.zeros((h, w)) end_time_matrix = np.zeros((h, w)) score_matrix = np.zeros((h, w)) state_matrix = np.zeros( (h, w), dtype=bool ) # State matrix, 0/1 distinguishes suspicious candidate states if verbose: print( f"total frames: {num_frames}, framerate: {framerate}, height: {h}, width: {w}" ) print("-------------------------") ### Main loop start = False tmp_start = False all_results = [] anomaly_now = {} for frame in range(start_frame, num_frames, frame_interval): try: ignore_matrix = next(ignore_matrix_gen) # if frame % (10*30) == 0: # plt.imshow(ignore_matrix) # plt.show() except StopIteration: pass # keep same ignore matrix # Comment out if not using crop boxes, not needed # if fbf_results_dict.max_frame < static_results_dict.max_frame: # fbf_results_dict.gen_next() # create tmp_score, tmp_detect static_results = static_results_dict[frame] if static_results is not None: boxes = static_results.loc[ static_results["score"] > score_thresh, ["x1", "y1", "x2", "y2", "score"]].values else: boxes = [] tmp_score, tmp_detect = add_boxes(boxes, ignore_matrix) ### plotting # img = video_reader.get_frame(frame) # cmap = plt.get_cmap("viridis") # for x1, y1, x2, y2, score in boxes: # x1, y1, x2, y2 = map(int, [x1, y1, x2, y2]) # col = tuple(int(c * 255) for c in cmap(score)[:3]) # cv.rectangle(img, (x1, y1), (x2, y2), col, thickness=2) # # if frame % 12 == 0: # plt.imshow(img) # plt.show() ### if verbose: print(f"frame: {frame}") if len(boxes) > 0: print("\tboxes:", len(boxes)) score_matrix += tmp_score # add running totals detect_count_matrix += tmp_detect # Update detection matrices undetect_count_matrix += ~tmp_detect undetect_count_matrix[tmp_detect] = 0 # Update time matrices start_time_matrix[ detect_count_matrix == 1] = -600 if frame == 1 else frame # why -600 for frame 1? end_time_matrix[detect_count_matrix > 0] = frame # Update state matrices state_matrix[detect_count_matrix > detect_thresh] = True # Detect anomaly time_delay = utils.mask(end_time_matrix - start_time_matrix, state_matrix) delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape) # print(f"\tmax delay: {time_delay.max()}, start: {start_time_matrix[delay_max_idx]}, end: {end_time_matrix[delay_max_idx]}, state: {state_matrix[delay_max_idx]}") if not start and time_delay.max( ) / framerate > abnormal_duration_thresh: # and score_matrix[delay_max_idx]/detect_count_matrix[delay_max_idx]>0.8: delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape) # backtrack the start time time_frame = int(start_time_matrix[delay_max_idx] / 5) * 5 # + 1 # why 5s and 1? G = np.where( detect_count_matrix < detect_count_matrix[delay_max_idx] - 2, 0, 1) # What does G represent?, why -2? region = utils.search_region(G, delay_max_idx) # vehicle reid if 'start_time' in anomaly_now and ( time_frame / framerate - anomaly_now['end_time']) < 30: # why 30? f1_frame_num = max(1, anomaly_now['start_time'] * framerate) f2_frame_num = max(1, time_frame) similarity = reid_model.similarity( video_reader.get_frame(f1_frame_num), video_reader.get_frame(f2_frame_num), anomaly_now["region"], region) if similarity > similarity_thresh: time_frame = int(anomaly_now['start_time'] * framerate / 5) * 5 # + 1 # why 5s and 1? else: anomaly_now['region'] = region else: anomaly_now['region'] = region # IoU stuff max_iou = 1 count = 1 start_time = time_frame tmp_len = 1 raio = 1 while (max_iou > 0.1 or tmp_len < 40 or raio > 0.6) and time_frame > 1: # why 0.1, 40, 0.6? raio = count / tmp_len print("time frame:", time_frame) fbf_results = fbf_results_dict[time_frame] if fbf_results is not None: bboxes = fbf_results[["x1", "y1", "x2", "y2", "score"]].values max_iou = utils.compute_iou(anomaly_now['region'], bboxes) else: max_iou = 0 time_frame -= 5 # why 5? if max_iou > 0.3: # why 0.3? count += 1 if max_iou > 0.5: # why 0.5? # they mention 0.5 IoU in the paper for NMS, might not be this start_time = time_frame tmp_len += 1 # back track start_time, until brightness at that spot falls below a threshold for time_frame in range(start_time, 1, -5): # print(f"\ttimeframe: {time_frame}") tmp_im = video_reader.get_frame(time_frame) if utils.compute_brightness( tmp_im[region[1]:region[3], region[0]:region[2]]) <= light_thresh: break start_time = time_frame anomaly_now['start_time'] = max(0, start_time / framerate) anomaly_now['end_time'] = max( 0, end_time_matrix[delay_max_idx] / framerate) start = True elif not tmp_start and time_delay.max( ) > suspicious_time_thresh * framerate: time_frame = start_time_matrix[delay_max_idx] G = np.where( detect_count_matrix < detect_count_matrix[delay_max_idx] - 2, 0, 1) # what does G represent? region = utils.search_region(G, delay_max_idx) # vehicle reid if 'start_time' in anomaly_now and ( time_frame / framerate - anomaly_now['end_time']) < 30: # why 30? f1_frame_num = max(1, anomaly_now['start_time'] * framerate) f2_frame_num = max(1, time_frame) similarity = reid_model.similarity( video_reader.get_frame(f1_frame_num), video_reader.get_frame(f2_frame_num), anomaly_now["region"], region) if similarity > similarity_thresh: time_frame = int( anomaly_now['start_time'] * framerate / 5) * 5 + 1 region = anomaly_now['region'] anomaly_now['region'] = region anomaly_now['start_time'] = max(0, time_frame / framerate) anomaly_now['end_time'] = max( 0, end_time_matrix[delay_max_idx] / framerate) tmp_start = True if start and time_delay.max() / framerate > abnormal_duration_thresh: delay_max_idx = np.unravel_index(time_delay.argmax(), time_delay.shape) if undetect_count_matrix[delay_max_idx] > undetect_thresh: anomaly_score = score_matrix[ delay_max_idx] / detect_count_matrix[delay_max_idx] print("\t", anomaly_now, anomaly_score) if anomaly_score > anomaly_score_thresh: anomaly_now['end_time'] = end_time_matrix[ delay_max_idx] / framerate anomaly_now['score'] = anomaly_score all_results.append(anomaly_now) anomaly_now = {} start = False elif tmp_start and time_delay.max( ) > suspicious_time_thresh * framerate: if undetect_count_matrix[delay_max_idx] > undetect_thresh: anomaly_score = score_matrix[ delay_max_idx] / detect_count_matrix[delay_max_idx] if anomaly_score > anomaly_score_thresh: anomaly_now['end_time'] = end_time_matrix[ delay_max_idx] / framerate anomaly_now['score'] = anomaly_score tmp_start = False # undetect matrix change state_matrix state_matrix[undetect_count_matrix > undetect_thresh] = False undetect_count_matrix[undetect_count_matrix > undetect_thresh] = 0 # update matrix tmp_detect |= state_matrix detect_count_matrix = utils.mask(detect_count_matrix, tmp_detect) score_matrix = utils.mask(score_matrix, tmp_detect) # Add all anomalies to the results list print("---", start, time_delay.max(), score_matrix[delay_max_idx], detect_count_matrix[delay_max_idx]) if start and time_delay.max() > abnormal_duration_thresh * framerate: anomaly_score = score_matrix[delay_max_idx] / detect_count_matrix[ delay_max_idx] if anomaly_score > anomaly_score_thresh: anomaly_now[ 'end_time'] = end_time_matrix[delay_max_idx] / framerate anomaly_now['score'] = anomaly_score all_results.append(anomaly_now) anomaly_now = {} start = False # Apply Non-Maximal Supression to the results if all_results: nms_out = utils.anomaly_nms(all_results, anomaly_nms_thresh) # final_result = {'start_time': 892, 'score': 0} # why 892? # for nms_start_time, nms_end_time in nms_out[:, 5:7]: # if nms_start_time < final_result["start_time"]: # final_result["start_time"] = max(0, int(nms_start_time - 1)) # final_result["score"] = 1 # final_result["end_time"] = nms_end_time final_results = pd.DataFrame(nms_out, columns=[ "x1", "y1", "x2", "y2", "score", "start_time", "end_time" ]) return final_results return None
def reject(imfile, catfile, threshold): """Reject noisy detections. Parameters ---------- imfile : str The path to the radio image file catfile : str The path to the source catalog, as obtained from detect.py threshold : float The signal-to-noise threshold below which sources are rejected """ # Extract information from filename outfile = os.path.basename(catfile).split('cat_')[1].split('.dat')[0] region = outfile.split('region')[1].split('_band')[0] band = outfile.split('band')[1].split('_val')[0] min_value = outfile.split('val')[1].split('_delt')[0] min_delta = outfile.split('delt')[1].split('_pix')[0] min_npix = outfile.split('pix')[1] print("\nSource rejection for region {} in band {}".format(region, band)) print("Loading image file") contfile = fits.open(imfile) data = contfile[0].data.squeeze() mywcs = wcs.WCS(contfile[0].header).celestial catalog = Table(Table.read(catfile, format='ascii'), masked=True) beam = radio_beam.Beam.from_fits_header(contfile[0].header) pixel_scale = np.abs( mywcs.pixel_scale_matrix.diagonal().prod())**0.5 * u.deg ppbeam = (beam.sr / (pixel_scale**2)).decompose().value data = data / ppbeam # Remove existing region files if os.path.isfile('./reg/reg_' + outfile + '_annulus.reg'): os.remove('./reg/reg_' + outfile + '_annulus.reg') if os.path.isfile('./reg/reg_' + outfile + '_filtered.reg'): os.remove('./reg/reg_' + outfile + '_filtered.reg') # Load in manually accepted and rejected sources override_accepted = [] override_rejected = [] if os.path.isfile('./.override/accept_' + outfile + '.txt'): override_accepted = np.loadtxt('./.override/accept_' + outfile + '.txt').astype('int') if os.path.isfile('./.override/reject_' + outfile + '.txt'): override_rejected = np.loadtxt('./.override/reject_' + outfile + '.txt').astype('int') print("\nManually accepted sources: ", set(override_accepted)) print("Manually rejected sources: ", set(override_rejected)) print('\nCalculating RMS values within aperture annuli') pb = ProgressBar(len(catalog)) data_cube = [] masks = [] rejects = [] snr_vals = [] mean_backgrounds = [] for i in range(len(catalog)): x_cen = catalog['x_cen'][i] * u.deg y_cen = catalog['y_cen'][i] * u.deg major_fwhm = catalog['major_fwhm'][i] * u.deg minor_fwhm = catalog['minor_fwhm'][i] * u.deg position_angle = catalog['position_angle'][i] * u.deg dend_flux = catalog['dend_flux_band{}'.format(band)][i] annulus_width = 1e-5 * u.deg center_distance = 1e-5 * u.deg # Define some ellipse properties in pixel coordinates position = coordinates.SkyCoord(x_cen, y_cen, frame='icrs', unit=(u.deg, u.deg)) pix_position = np.array(position.to_pixel(mywcs)) pix_major_fwhm = major_fwhm / pixel_scale pix_minor_fwhm = minor_fwhm / pixel_scale # Cutout section of the image we care about, to speed up computation time size = (center_distance + annulus_width + major_fwhm) * 2.2 cutout = Cutout2D(data, position, size, mywcs, mode='partial') cutout_center = regions.PixCoord(cutout.center_cutout[0], cutout.center_cutout[1]) # Define the aperture regions needed for SNR ellipse_reg = regions.EllipsePixelRegion( cutout_center, pix_major_fwhm * 2., pix_minor_fwhm * 2., angle=position_angle ) # Make sure you're running the dev version of regions, otherwise the position angles will be in radians! innerann_reg = regions.CirclePixelRegion( cutout_center, center_distance / pixel_scale + pix_major_fwhm) outerann_reg = regions.CirclePixelRegion( cutout_center, center_distance / pixel_scale + pix_major_fwhm + annulus_width / pixel_scale) # Make masks from aperture regions ellipse_mask = mask(ellipse_reg, cutout) annulus_mask = mask(outerann_reg, cutout) - mask(innerann_reg, cutout) # Plot annulus and ellipse regions data_cube.append(cutout.data) masks.append([annulus_mask, ellipse_mask]) # Calculate the SNR and aperture flux sums bg_rms = rms(cutout.data[annulus_mask.astype('bool')]) peak_flux = np.max(cutout.data[ellipse_mask.astype('bool')]) flux_rms_ratio = peak_flux / bg_rms snr_vals.append(flux_rms_ratio) # Reject bad sources below some SNR threshold rejected = False if flux_rms_ratio <= threshold: rejected = True # Process manual overrides if catalog['_idx'][i] in override_accepted: rejected = False if catalog['_idx'][i] in override_rejected: rejected = True rejects.append(int(rejected)) # Add non-rejected source ellipses to a new region file fname = './reg/reg_' + outfile + '_filtered.reg' with open(fname, 'a') as fh: if os.stat(fname).st_size == 0: fh.write("icrs\n") if not rejected: fh.write("ellipse({}, {}, {}, {}, {}) # text={{{}}}\n".format( x_cen.value, y_cen.value, major_fwhm.value, minor_fwhm.value, position_angle.value, i)) pb.update() # Plot the grid of sources plot_grid(data_cube, masks, rejects, snr_vals, catalog['_idx']) plt.suptitle( 'region={}, band={}, min_value={}, min_delta={}, min_npix={}, threshold={:.4f}' .format(region, band, min_value, min_delta, min_npix, threshold)) plt.show(block=False) # Get overrides from user print( 'Manual overrides example: type "r319, a605" to manually reject source #319 and accept source #605.' ) overrides = input( "\nType manual override list, or press enter to continue:\n").split( ', ') accepted_list = [ s[1:] for s in list(filter(lambda x: x.startswith('a'), overrides)) ] rejected_list = [ s[1:] for s in list(filter(lambda x: x.startswith('r'), overrides)) ] # Save the manually accepted and rejected sources fname = './.override/accept_' + outfile + '.txt' with open(fname, 'a') as fh: for num in accepted_list: fh.write('\n' + str(num)) fname = './.override/reject_' + outfile + '.txt' with open(fname, 'a') as fh: for num in rejected_list: fh.write('\n' + str(num)) print( "Manual overrides written to './.override/' and saved to source catalog. New overrides will be displayed the next time the rejection script is run." ) # Process the new overrides, to be saved into the catalog rejects = np.array(rejects) acc = np.array([a[-2:] for a in accepted_list], dtype=int) rej = np.array([r[-2:] for r in rejected_list], dtype=int) rejects[acc] = 0 rejects[rej] = 1 # Save the catalog with new columns for SNR catalog.add_column(Column(snr_vals), name='snr_band' + band) catalog.add_column(np.invert(catalog.mask['snr_band' + band]).astype(int), name='detected_band' + band) catalog.add_column(Column(rejects), name='rejected') catalog.write('./cat/cat_' + outfile + '_filtered.dat', format='ascii')
def predict(self, batch, ontology): pred = [list() for _ in range(batch.batch_size)] loss = None for act_slot, (ont_idx, ont) in ontology.items(): as_idx = torch.tensor(self.vocabs.speaker_state["user"] .act_slot[act_slot]).to(self.device) ont_idx, ont = ont_idx.to(self.device), ont.to(self.device) logit = self.model( as_idx, *batch.sent.tensors, *self.prepare_system_acts(batch.system_acts), *ont.tensors ) # ont_idx: [num_ont] -> [batch_size x num_ont x state_lat] # s: [batch_size x state_lat] -> # [batch_size x num_ont x state_lat] # target: [batch_size x num_ont] s = batch.belief_state target = \ ((ont_idx.unsqueeze(0).unsqueeze(-1) == s.value.unsqueeze(1)) .masked_fill(~utils.mask(s.lens).unsqueeze(1), 0).any(-1)) current_loss = self._bce(logit, target.float()) if self.loss == "mean": current_loss = current_loss.mean(-1) elif self.loss == "sum": current_loss = current_loss.sum(-1) else: raise ValueError(f"unsupported loss method: {self.loss}") if loss is None: loss = current_loss else: loss += current_loss for batch_idx, val_idx in \ (torch.sigmoid(logit) > 0.5).nonzero().tolist(): pred[batch_idx].append( (ont_idx[val_idx].item(), logit[batch_idx, val_idx])) def to_dialog_state(data: Sequence[Tuple[ActSlotValue, float]]): state = DialogState() as_map = collections.defaultdict(list) for asv, score in data: as_map[(asv.act, asv.slot)].append((asv, score)) for (act, slt), data in as_map.items(): if act == "request" and slt == "slot": state.update(asv for asv, _ in data) elif act == "inform": state.add(max(data, key=lambda x: x[1])[0]) return state pred = [[(self.processor.vocabs.speaker_state["user"].asv[idx], score) for idx, score in v] for v in pred] pred = list(map(to_dialog_state, pred)) pred_inform = [{sv.slot: sv.value for sv in p.get("inform")} for p in pred] pred_request = [{sv.value for sv in p.get("request")} for p in pred] # DSTC2: 'this' resolution pred = [ (DSTTurn(turn.wizard, turn.user.clone(inform=pi, request=pr)) .resolve_this().user.state) for turn, pi, pr in zip(batch.raw, pred_inform, pred_request) ] return loss, pred
def get_mem(self, address, size): if address in self.out_mem: return utils.mask(self.out_mem[address], size) return utils.mask(self.in_mem[address], size)
def Iop_Add32(self, left, right): return utils.mask(left + right, 32) def Iop_Add8(self, left, right): return utils.mask(left + right, 8)
def Ico_U8(self, constant): return utils.mask(constant.value, 8)
def Iop_Shl8(self, left, right): return utils.mask(left << right, 8)
def Ico_U64(self, constant): return utils.mask(constant.value, 64)
def Iop_Sub64(self, left, right): return utils.mask(left - right) def Iop_Sub32(self, left, right): return utils.mask(left - right, 32)
def Iop_8Uto64(self, argument): return utils.mask(argument)
def Iop_Shl32(self, left, right): return utils.mask(left << right, 32) def Iop_CmpEQ64(self, left, right): return 1 if utils.mask(left, 64) == utils.mask(right, 64) else 0
def Iop_Add8(self, left, right): return utils.mask(left + right, 8) def Iop_Sub64(self, left, right): return utils.mask(left - right)
def compute(self, batch: BatchData, outputs, step: int = None) -> Tuple[torch.Tensor, utils.TensorMap]: step = step or 0 logit, post, prior = outputs batch_size = batch.batch_size max_conv_len = batch.max_conv_len max_sent_len = batch.max_sent_len max_goal_len = batch.max_goal_len max_state_len = batch.max_state_len w_logit, p_logit, s_logit = \ (logit[k] for k in ("sent", "speaker", "state")) zconv_post, zstate_post, zsent_post, zspkr_post = \ (post[k] for k in ("conv", "state", "sent", "speaker")) zconv_prior, zstate_prior, zsent_prior, zspkr_prior = \ (prior[k] for k in ("conv", "state", "sent", "speaker")) conv_lens, sent_lens = batch.conv_lens, batch.sent.lens1 conv_mask = utils.mask(conv_lens, max_conv_len) sent_lens = sent_lens.masked_fill(~conv_mask, 0) sent_mask = utils.mask(sent_lens, max_sent_len) state_logit_mask = \ (((s_logit != float("-inf")) & (s_logit != float("inf"))) .masked_fill(~conv_mask.unsqueeze(-1), 0)) kld_conv = zconv_post.kl_div() kld_state = zstate_post.kl_div(zstate_prior).masked_fill(~conv_mask, 0) kld_sent = zsent_post.kl_div(zsent_prior).masked_fill(~conv_mask, 0) kld_spkr = zspkr_post.kl_div(zspkr_prior).masked_fill(~conv_mask, 0) w_target = (batch.sent.value.masked_fill(~sent_mask, -1).view(-1, max_sent_len))[..., 1:] s_target = utils.to_dense(idx=batch.state.value, lens=batch.state.lens1, max_size=self.num_asv) p_target = batch.speaker.value.masked_fill(~conv_mask, -1) state_loss = (self._bce(s_logit, s_target.float()).masked_fill( ~state_logit_mask, 0)).sum(-1) spkr_loss = self._ce(p_logit.view(-1, self.vocabs.num_speakers), p_target.view(-1)).view(batch_size, max_conv_len) sent_loss = self._ce( w_logit[:, :, :-1].contiguous().view(-1, len(self.vocabs.word)), w_target.contiguous().view(-1)).view(batch_size, max_conv_len, -1).sum(-1) kld_weight = self.kld_weight.get(step) loss_kld = (kld_conv + kld_sent.sum(-1) + kld_state.sum(-1) + kld_spkr.sum(-1)) loss_recon = (sent_loss.sum(-1) + state_loss.sum(-1) + spkr_loss.sum(-1)) nll = loss_recon + loss_kld conv_mi = estimate_mi(zconv_post) sent_mi = \ (estimate_mi(zsent_post.view(batch_size * max_conv_len, -1)) .view(batch_size, max_conv_len).masked_fill(~conv_mask, 0).sum(-1)) spkr_mi = \ (estimate_mi(zspkr_post.view(batch_size * max_conv_len, -1)) .view(batch_size, max_conv_len).masked_fill(~conv_mask, 0).sum(-1)) state_mi = \ (estimate_mi(zstate_post.view(batch_size * max_conv_len, -1)) .view(batch_size, max_conv_len).masked_fill(~conv_mask, 0).sum(-1)) if self.enable_kl: if self.kl_mode == "kl-mi": loss = loss_recon + kld_weight * (loss_kld - conv_mi) elif self.kl_mode == "kl-mi+": loss = loss_recon + kld_weight * (loss_kld - conv_mi - sent_mi - spkr_mi - state_mi) else: loss = loss_recon + kld_weight * loss_kld else: loss = loss_recon stats = { "nll": nll.mean(), "conv-mi": conv_mi.mean(), "sent-mi": sent_mi.mean(), "state-mi": state_mi.mean(), "spkr-mi": spkr_mi.mean(), "loss": loss.mean(), "loss-recon": loss_recon.mean(), "loss-sent": sent_loss.sum(-1).mean(), "loss-sent-turn": sent_loss.sum() / conv_lens.sum(), "loss-sent-word": sent_loss.sum() / sent_lens.sum(), "ppl-turn": (sent_loss.sum() / conv_lens.sum()).exp(), "ppl-word": (sent_loss.sum() / sent_lens.sum()).exp(), "loss-state": state_loss.sum(-1).mean(), "loss-state-turn": state_loss.sum() / conv_lens.sum(), "loss-state-asv": state_loss.sum() / state_logit_mask.sum(), "loss-spkr": spkr_loss.sum(-1).mean(), "loss-spkr-turn": spkr_loss.sum() / conv_lens.sum(), "kld-weight": torch.tensor(kld_weight), "kld-sent": kld_sent.sum(-1).mean(), "kld-sent-turn": kld_sent.sum() / conv_lens.sum(), "kld-conv": kld_conv.sum(-1).mean(), "kld-state": kld_state.sum(-1).mean(), "kld-state-turn": kld_state.sum() / conv_lens.sum(), "kld-spkr": kld_spkr.sum(-1).mean(), "kld-spkr-turn": kld_spkr.sum() / conv_lens.sum(), "kld": loss_kld.mean() } for spkr_idx, spkr in self.vocabs.speaker.i2f.items(): if spkr == "<unk>": continue spkr_mask = p_target == spkr_idx spkr_sent_lens = sent_lens.masked_fill(~spkr_mask, 0) spkr_state_mask = \ state_logit_mask.masked_fill(~spkr_mask.unsqueeze(-1), 0) spkr_sent_loss = sent_loss.masked_fill(~spkr_mask, 0).sum() spkr_state_loss = state_loss.masked_fill(~spkr_mask, 0).sum() spkr_spkr_loss = spkr_loss.masked_fill(~spkr_mask, 0).sum() spkr_kld_sent = kld_sent.masked_fill(~spkr_mask, 0).sum() spkr_kld_state = kld_state.masked_fill(~spkr_mask, 0).sum() spkr_kld_spkr = kld_spkr.masked_fill(~spkr_mask, 0).sum() spkr_stats = { "loss-sent": spkr_sent_loss / batch_size, "loss-sent-turn": spkr_sent_loss / spkr_mask.sum(), "loss-sent-word": spkr_sent_loss / spkr_sent_lens.sum(), "ppl-turn": (spkr_sent_loss / spkr_mask.sum()).exp(), "ppl-word": (spkr_sent_loss / spkr_sent_lens.sum()).exp(), "loss-state": spkr_state_loss / batch_size, "loss-state-turn": spkr_state_loss / spkr_mask.sum(), "loss-state-asv": spkr_state_loss / spkr_state_mask.sum(), "loss-spkr": spkr_spkr_loss / batch_size, "loss-spkr-turn": spkr_spkr_loss / spkr_mask.sum(), "kld-sent": spkr_kld_sent / batch_size, "kld-sent-turn": spkr_kld_sent / spkr_mask.sum(), "kld-state": spkr_kld_state / batch_size, "kld-state-turn": spkr_kld_state / spkr_mask.sum(), "kld-spkr": spkr_kld_spkr / batch_size, "kld-spkr-turn": spkr_kld_spkr / spkr_mask.sum(), } stats.update({f"{k}-{spkr}": v for k, v in spkr_stats.items()}) return loss.mean(), stats
def Iop_Shl64(self, left, right): return utils.mask(left << right) def Iop_Shl32(self, left, right): return utils.mask(left << right, 32)
def forward(self, x, lens=None): if lens is not None: x = x.masked_fill(~utils.mask(lens, x.size(-1)), len(self.vocab)) return self.embedding(x)
def Iop_CmpEQ64(self, left, right): return 1 if utils.mask(left, 64) == utils.mask(right, 64) else 0 def Iop_CmpEQ32(self, left, right): return 1 if utils.mask(left, 32) == utils.mask(right, 32) else 0
def Iop_CmpNE32(self, left, right): return 1 if utils.mask(left, 32) != utils.mask(right, 32) else 0
def Iop_CmpNE32(self, left, right): return 1 if utils.mask(left, 32) != utils.mask(right, 32) else 0 if __name__ == "__main__":
def _apply_mask(image,mask,output): masked = utils.mask(image[0], mask[0], args.side) utils.save_file(masked, image[1], image[2], output)