Ejemplo n.º 1
0
Archivo: run.py Proyecto: kaniblu/vhda
 def iter_asr(batch: DSTTestBatchData) -> Iterable[DSTBatchData]:
     asr, asr_score = batch.asr, batch.asr_score
     assert (utils.compare_tensors(asr.lens, asr_score.lens)
             and asr.size(0) == asr_score.size(0))
     for i in range(asr.size(0)):
         asr_item = asr[i]
         asr_score_item = asr_score[i]
         num_asr = asr_item.size(0)
         yield DSTBatchData(
             sent=asr_item,
             system_acts=utils.pad_stack([batch.system_acts[i]] * num_asr),
             belief_state=utils.pad_stack([batch.belief_state[i]] * num_asr),
             slot=utils.pad_stack([batch.slot[i]] * num_asr),
             raw=[batch.raw[i]] * num_asr
         ), asr_score_item
Ejemplo n.º 2
0
 def get(self) -> TensorMap:
     pred_goal = utils.stack_stacked1dtensors(self._pred_goal)
     pred_state = utils.stack_stacked1dtensors(self._pred_state)
     gold_goal = utils.stack_stacked1dtensors(self._gold_goal)
     gold_state = utils.stack_stacked1dtensors(self._gold_state)
     spkr_value = utils.pad_stack(self._spkr).value
     stats = dict()
     stats.update({
         f"goal-{k}": v
         for k, v in self.compute_accuracy(pred_goal, gold_goal).items()
     })
     stats.update({
         f"state-{k}": v
         for k, v in self.compute_accuracy(pred_state, gold_state).items()
     })
     for spkr_idx, spkr in self.vocabs.speaker.i2f.items():
         if spkr == "<unk>":
             continue
         stats.update({
             f"goal-{k}-{spkr}": v
             for k, v in self.compute_accuracy(
                 pred_goal, gold_goal, spkr_value == spkr_idx).items()
         })
         stats.update({
             f"state-{k}-{spkr}": v
             for k, v in self.compute_accuracy(
                 pred_state, gold_state, spkr_value == spkr_idx).items()
         })
     return stats
Ejemplo n.º 3
0
 def create_act_slot_tensor(self, vocab):
     act_slots = []
     for as_idx, (act, slot) in self.ontology.act_slot.i2f.items():
         tokens = (list(nltk.casual_tokenize(act)) +
                   list(nltk.casual_tokenize(slot)) + ["<eos>"])
         tokens = [vocab[token] for token in tokens]
         act_slots.append((as_idx, torch.LongTensor(tokens)))
     act_slots = list(sorted(act_slots, key=lambda x: x[0]))
     act_slots = utils.pad_stack([act_slot[1] for act_slot in act_slots])
     return act_slots
Ejemplo n.º 4
0
 def __call__(self, batch, pad_override=False):
     if not len(batch):
         raise ValueError(f"must provide at least one sample")
     sample = batch[0]
     sample_type = type(sample)
     if sample_type == dict:
         return {
             k: self([s[k] for s in batch], pad_override
                     or (k in self.pad_keys))
             for k in sample
         }
     elif sample_type == tuple:
         return tuple(
             self([s[i] for s in batch], pad_override)
             for i in range(len(sample)))
     elif sample_type == int:
         return torch.LongTensor(batch)
     elif sample_type == float:
         return torch.FloatTensor(batch)
     elif sample_type == utils.Stacked1DTensor:
         x = utils.pad_stack([t.value for t in batch])
         s = utils.pad_stack([t.lens for t in batch])
         return utils.DoublyStacked1DTensor(value=x.value,
                                            lens=s.lens,
                                            lens1=s.value)
     elif sample_type == utils.DoublyStacked1DTensor:
         x = utils.pad_stack([t.value for t in batch])
         s1 = utils.pad_stack([t.lens for t in batch])
         s2 = utils.pad_stack([t.lens1 for t in batch])
         return utils.TriplyStacked1DTensor(value=x.value,
                                            lens=s1.value,
                                            lens1=s2.value,
                                            lens2=s2.lens)
     elif isinstance(sample, torch.Tensor):
         return self.stack(batch, pad_override)
     elif isinstance(sample, np.ndarray):
         return self.stack(list(map(torch.tensor, batch)), pad_override)
     else:
         return batch
Ejemplo n.º 5
0
 def tensorize_state_vocab(self,
                           mode="state",
                           speaker=None,
                           tensorizer=None) -> utils.Stacked1DTensor:
     if mode not in {"state", "goal", "goal_state"}:
         raise ValueError(f"unsupported mode: {mode}")
     if not self.is_initialized:
         raise RuntimeError(f"vocabulary unset")
     if speaker is None:
         vocab = getattr(self.vocabs, mode).asv
     else:
         vocab = getattr(self.vocabs, f"speaker_{mode}")[speaker].asv
     state = sorted(vocab.i2f.items(), key=lambda x: x[0])
     tensors = list(
         map(tensorizer or self.tensorize_asv, (v for k, v in state)))
     return utils.pad_stack(tensors)
Ejemplo n.º 6
0
 def tensorize_state_dict(self, tensor=None, speaker=None, tensorizer=None):
     if tensor is None:
         tensor = self.tensorize_state_vocab("state", speaker, tensorizer)
     ont = dict()
     if speaker is None:
         vocab = self.vocabs.state.asv
     else:
         vocab = self.vocabs.speaker_state[speaker].asv
     for idx, asv in vocab.i2f.items():
         if asv == self.asv_pad:
             continue
         key = (asv.act, asv.slot)
         if key not in ont:
             ont[key] = []
         ont[key].append((torch.tensor(idx), tensor[idx]))
     return {k: (torch.stack([x[0] for x in v]),
                 utils.pad_stack([x[1] for x in v]))
             for k, v in ont.items()}
Ejemplo n.º 7
0
 def tensorize_asr(self, asr: Mapping[str, float]):
     sents, scores = zip(*asr.items())
     return (utils.pad_stack(list(map(self.tensorize_sent, sents))),
             torch.tensor(scores))
Ejemplo n.º 8
0
 def stack(tensors, pad=False):
     if pad:
         return utils.pad_stack(tensors)
     else:
         return torch.stack(tensors)