Esempio n. 1
0
    def aggregate_preds(self, new_batch, context=None):
        if new_batch is None:
            return
        tree_preds = new_batch[0]  # bsz X beam_size X seq_len
        length_preds = new_batch[1]
        target_vocab = self.tensorizers["trg_seq_tokens"].vocab
        target_pad_token = target_vocab.get_pad_index()
        target_bos_token = target_vocab.get_bos_index()
        target_eos_token = target_vocab.get_eos_index()

        cleaned_preds = [
            self._remove_tokens(
                pred, [target_pad_token, target_eos_token, target_bos_token])
            for pred in self._make_simple_list(tree_preds)
        ]
        self.aggregate_data(self.all_preds, cleaned_preds)

        pred_trees = [
            self.stringify_annotation_tree(pred[0], target_vocab)
            for pred in cleaned_preds
        ]

        beam_pred_trees = [[
            CompositionalMetricReporter.tree_to_metric_node(
                self.stringify_annotation_tree(pred, target_vocab))
            for pred in beam
        ] for beam in cleaned_preds]

        top_non_invalid_trees = [
            self.get_annotation_from_string(
                self.get_top_non_invalid(
                    [stringify(pred, target_vocab) for pred in beam]))
            for beam in cleaned_preds
        ]

        top_extracted_trees = [
            self.get_annotation_from_string(
                self.get_top_extract(
                    [stringify(pred, target_vocab) for pred in beam]))
            for beam in cleaned_preds
        ]

        self.aggregate_data(self.all_pred_trees, pred_trees)
        self.aggregate_data(self.all_target_length_preds, length_preds)
        self.aggregate_data(self.all_beam_preds, beam_pred_trees)
        self.aggregate_data(self.all_top_non_invalid, top_non_invalid_trees)
        self.aggregate_data(self.all_top_extract, top_extracted_trees)
Esempio n. 2
0
    def aggregate_targets(self, new_batch, context=None):
        if new_batch is None:
            return
        target_vocab = self.tensorizers["trg_seq_tokens"].vocab
        target_pad_token = target_vocab.get_pad_index()
        target_bos_token = target_vocab.get_bos_index()
        target_eos_token = target_vocab.get_eos_index()

        cleaned_targets = [
            self._remove_tokens(
                target, [target_pad_token, target_eos_token, target_bos_token]
            )
            for target in self._make_simple_list(new_batch[0])
        ]

        self.aggregate_data(self.all_targets, cleaned_targets)
        self.aggregate_data(self.all_target_lens, new_batch[1])

        target_res = [stringify(target, target_vocab) for target in cleaned_targets]

        self.aggregate_data(self.all_target_trees, target_res)
Esempio n. 3
0
    def aggregate_preds(self, new_batch, context=None):
        if new_batch is None:
            return
        tree_preds = new_batch[0]  # bsz X beam_size X seq_len
        length_preds = new_batch[1]
        target_vocab = self.tensorizers["trg_seq_tokens"].vocab
        target_pad_token = target_vocab.get_pad_index()
        target_bos_token = target_vocab.get_bos_index()
        target_eos_token = target_vocab.get_eos_index()
        cleaned_preds = [
            self._remove_tokens(
                pred, [target_pad_token, target_eos_token, target_bos_token]
            )
            for pred in self._make_simple_list(tree_preds)
        ]
        self.aggregate_data(self.all_preds, cleaned_preds)

        beam_pred_res = [
            [stringify(pred, target_vocab) for pred in beam] for beam in cleaned_preds
        ]

        self.aggregate_data(self.all_target_length_preds, length_preds)
        self.aggregate_data(self.all_beam_preds, beam_pred_res)