def iter_asr(batch: DSTTestBatchData) -> Iterable[DSTBatchData]: asr, asr_score = batch.asr, batch.asr_score assert (utils.compare_tensors(asr.lens, asr_score.lens) and asr.size(0) == asr_score.size(0)) for i in range(asr.size(0)): asr_item = asr[i] asr_score_item = asr_score[i] num_asr = asr_item.size(0) yield DSTBatchData( sent=asr_item, system_acts=utils.pad_stack([batch.system_acts[i]] * num_asr), belief_state=utils.pad_stack([batch.belief_state[i]] * num_asr), slot=utils.pad_stack([batch.slot[i]] * num_asr), raw=[batch.raw[i]] * num_asr ), asr_score_item
def get(self) -> TensorMap: pred_goal = utils.stack_stacked1dtensors(self._pred_goal) pred_state = utils.stack_stacked1dtensors(self._pred_state) gold_goal = utils.stack_stacked1dtensors(self._gold_goal) gold_state = utils.stack_stacked1dtensors(self._gold_state) spkr_value = utils.pad_stack(self._spkr).value stats = dict() stats.update({ f"goal-{k}": v for k, v in self.compute_accuracy(pred_goal, gold_goal).items() }) stats.update({ f"state-{k}": v for k, v in self.compute_accuracy(pred_state, gold_state).items() }) for spkr_idx, spkr in self.vocabs.speaker.i2f.items(): if spkr == "<unk>": continue stats.update({ f"goal-{k}-{spkr}": v for k, v in self.compute_accuracy( pred_goal, gold_goal, spkr_value == spkr_idx).items() }) stats.update({ f"state-{k}-{spkr}": v for k, v in self.compute_accuracy( pred_state, gold_state, spkr_value == spkr_idx).items() }) return stats
def create_act_slot_tensor(self, vocab): act_slots = [] for as_idx, (act, slot) in self.ontology.act_slot.i2f.items(): tokens = (list(nltk.casual_tokenize(act)) + list(nltk.casual_tokenize(slot)) + ["<eos>"]) tokens = [vocab[token] for token in tokens] act_slots.append((as_idx, torch.LongTensor(tokens))) act_slots = list(sorted(act_slots, key=lambda x: x[0])) act_slots = utils.pad_stack([act_slot[1] for act_slot in act_slots]) return act_slots
def __call__(self, batch, pad_override=False): if not len(batch): raise ValueError(f"must provide at least one sample") sample = batch[0] sample_type = type(sample) if sample_type == dict: return { k: self([s[k] for s in batch], pad_override or (k in self.pad_keys)) for k in sample } elif sample_type == tuple: return tuple( self([s[i] for s in batch], pad_override) for i in range(len(sample))) elif sample_type == int: return torch.LongTensor(batch) elif sample_type == float: return torch.FloatTensor(batch) elif sample_type == utils.Stacked1DTensor: x = utils.pad_stack([t.value for t in batch]) s = utils.pad_stack([t.lens for t in batch]) return utils.DoublyStacked1DTensor(value=x.value, lens=s.lens, lens1=s.value) elif sample_type == utils.DoublyStacked1DTensor: x = utils.pad_stack([t.value for t in batch]) s1 = utils.pad_stack([t.lens for t in batch]) s2 = utils.pad_stack([t.lens1 for t in batch]) return utils.TriplyStacked1DTensor(value=x.value, lens=s1.value, lens1=s2.value, lens2=s2.lens) elif isinstance(sample, torch.Tensor): return self.stack(batch, pad_override) elif isinstance(sample, np.ndarray): return self.stack(list(map(torch.tensor, batch)), pad_override) else: return batch
def tensorize_state_vocab(self, mode="state", speaker=None, tensorizer=None) -> utils.Stacked1DTensor: if mode not in {"state", "goal", "goal_state"}: raise ValueError(f"unsupported mode: {mode}") if not self.is_initialized: raise RuntimeError(f"vocabulary unset") if speaker is None: vocab = getattr(self.vocabs, mode).asv else: vocab = getattr(self.vocabs, f"speaker_{mode}")[speaker].asv state = sorted(vocab.i2f.items(), key=lambda x: x[0]) tensors = list( map(tensorizer or self.tensorize_asv, (v for k, v in state))) return utils.pad_stack(tensors)
def tensorize_state_dict(self, tensor=None, speaker=None, tensorizer=None): if tensor is None: tensor = self.tensorize_state_vocab("state", speaker, tensorizer) ont = dict() if speaker is None: vocab = self.vocabs.state.asv else: vocab = self.vocabs.speaker_state[speaker].asv for idx, asv in vocab.i2f.items(): if asv == self.asv_pad: continue key = (asv.act, asv.slot) if key not in ont: ont[key] = [] ont[key].append((torch.tensor(idx), tensor[idx])) return {k: (torch.stack([x[0] for x in v]), utils.pad_stack([x[1] for x in v])) for k, v in ont.items()}
def tensorize_asr(self, asr: Mapping[str, float]): sents, scores = zip(*asr.items()) return (utils.pad_stack(list(map(self.tensorize_sent, sents))), torch.tensor(scores))
def stack(tensors, pad=False): if pad: return utils.pad_stack(tensors) else: return torch.stack(tensors)