Example #1
0
    def forward(self, insts: Dict[str, List[Union[List[str], InternalParseNode]]], return_charts: bool = False)\
            -> Union[torch.Tensor, Tuple[List[InternalParseNode], List[np.ndarray]], List[np.ndarray]]:
        """forward func of the model.
        Args:
            insts: input insts, including 'pos_tags', 'snts', 'gold_trees'
        Returns:
            pred tensor outputed by model
        """
        pos_tags, snts = insts['pos_tags'], insts['snts']
        snts_len = [len(pos_tag) for pos_tag in pos_tags]
        batch_size, seq_len = len(snts_len), max(snts_len)
        embeddings, mask = self.embeddings(pos_tags, snts)
        assert (batch_size,
                seq_len + 2) == embeddings.shape[0:2] == mask.shape[0:2]
        words_repr = self.encoder(embeddings, mask)
        assert (batch_size, seq_len + 1) == words_repr.shape[0:2]
        spans_repr = words_repr.unsqueeze(1) - words_repr.unsqueeze(
            2)  # [batch_size, seq_len+1, seq_len+1, dim]
        assert (batch_size, seq_len + 1, seq_len + 1) == spans_repr.shape[0:3]

        joint_repr = self.joint_label_classifier(spans_repr)
        parsing_repr = self.parsing_label_classifier(spans_repr)
        ner_repr = joint_repr - parsing_repr

        joint_labels_score = self.joint_classifier(joint_repr)
        parsing_labels_score = self.parsing_classifier(parsing_repr)
        # ner_labels_score = self.ner_classifier(ner_repr[:, :-1, :-1, :])
        ner_labels_score = self.ner_classifier(ner_repr[:, :-1, 1:, :])

        empty_label_score = torch.zeros(
            (batch_size, seq_len + 1, seq_len + 1, 1), device=self.device)
        joint_charts = torch.cat([empty_label_score, joint_labels_score],
                                 dim=3)
        parsing_charts = torch.cat([empty_label_score, parsing_labels_score],
                                   dim=3)
        ner_labels_score = torch.cat(
            [ner_labels_score, empty_label_score[:, :-1, :-1, :]], dim=3)
        joint_charts_np = joint_charts.cpu().detach().numpy()
        parsing_charts_np = parsing_charts.cpu().detach().numpy()

        # compute loss and generate tree

        # Just return the charts, for ensembling
        if return_charts:
            ret_charts = []
            for i, snt_len in enumerate(snts_len):
                ret_charts.append(joint_charts[i, :snt_len + 1, :snt_len +
                                               1, :].cpu().numpy())
            return ret_charts

        # when model test, just return trees and scores
        if not self.training:
            trees = []
            scores = []
            for i, snt_len in enumerate(snts_len):
                chart_np = joint_charts_np[i, :snt_len + 1, :snt_len + 1, :]
                score, p_i, p_j, p_label, _ = self.parse_from_chart(
                    snt_len, chart_np, self.joint_labels_vocab)
                pos_tag, snt = pos_tags[i], snts[i]
                tree = self.generate_tree(p_i, p_j, p_label, pos_tag, snt)
                trees.append(tree)
                scores.append(score)
            return trees, scores

        # when model train, return loss
        # During training time, the forward pass needs to be computed for every
        # cell of the chart, but the backward pass only needs to be computed for
        # cells in either the predicted or the gold parse tree. It's slightly
        # faster to duplicate the forward pass for a subset of the chart than it
        # is to perform a backward pass that doesn't take advantage of sparsity.
        # Since this code is not undergoing algorithmic changes, it makes sense
        # to include the optimization even though it may only be a 10% speedup.
        # Note that no dropout occurs in the label portion of the network
        # cross_loss = torch.tensor(0., device=self.device)
        joint_golds = insts['joint_gold_trees']
        parsing_golds = insts['parsing_gold_trees']
        p_is, p_js, g_is, g_js, p_labels, g_labels, batch_ids, paugment_total_joint = [], [], [], [], [], [], [], 0.0
        p_is_parsing, p_js_parsing, g_is_parsing, g_js_parsing, p_labels_parsing, g_labels_parsing, batch_ids_parsing,\
            paugment_total_parsing = [], [], [], [], [], [], [], 0.0
        # ner_is, ner_js, ner_labels, ner_batch_ids = [], [], [], []
        for i, snt_len in enumerate(snts_len):

            # joint parser
            chart_np = joint_charts_np[i, :snt_len + 1, :snt_len + 1, :]
            p_i, p_j, p_label, p_augment, g_i, g_j, g_label =\
                self.parse_from_chart(snt_len, chart_np, self.joint_labels_vocab, joint_golds[i])
            paugment_total_joint += p_augment
            p_is.extend(p_i.tolist())
            p_js.extend(p_j.tolist())
            p_labels.extend(p_label.tolist())
            g_is.extend(g_i.tolist())
            g_js.extend(g_j.tolist())
            g_labels.extend(g_label.tolist())
            batch_ids.extend([i for _ in range(len(p_i))])

            # parsing parser
            chart_np = parsing_charts_np[i, :snt_len + 1, :snt_len + 1, :]
            p_i, p_j, p_label, p_augment, g_i, g_j, g_label =\
                self.parse_from_chart(snt_len, chart_np, self.parsing_labels_vocab, parsing_golds[i])
            paugment_total_parsing += p_augment
            p_is_parsing.extend(p_i.tolist())
            p_js_parsing.extend(p_j.tolist())
            p_labels_parsing.extend(p_label.tolist())
            g_is_parsing.extend(g_i.tolist())
            g_js_parsing.extend(g_j.tolist())
            g_labels_parsing.extend(g_label.tolist())
            batch_ids_parsing.extend([i for _ in range(len(p_i))])

            # ner idx
            # ner_i, ner_j, ner_label = self.generate_ner_spans(joint_golds[i])
            # ner_is.extend(ner_i)
            # ner_js.extend([j-1 for j in ner_j])
            # # ner_js.extend([j for j in ner_j])
            # ner_labels.extend(ner_label)
            # ner_batch_ids.extend([i for _ in range(len(ner_i))])

        p_scores_joint = torch.sum(joint_charts[batch_ids, p_is, p_js,
                                                p_labels])
        g_scores_joint = torch.sum(joint_charts[batch_ids, g_is, g_js,
                                                g_labels])
        loss_joint = p_scores_joint - g_scores_joint + paugment_total_joint

        p_scores_parsing = torch.sum(parsing_charts[batch_ids_parsing,
                                                    p_is_parsing, p_js_parsing,
                                                    p_labels_parsing])
        g_scores_parsing = torch.sum(parsing_charts[batch_ids_parsing,
                                                    g_is_parsing, g_js_parsing,
                                                    g_labels_parsing])
        loss_parsing = p_scores_parsing - g_scores_parsing + paugment_total_parsing

        # ner loss
        spans_mask = [[[0] * i + [1] * (snt_len - i) + [0] *
                       (seq_len - snt_len) if i < snt_len else [0] * seq_len
                       for i in range(seq_len)] for snt_len in snts_len]
        spans_mask = np.array(spans_mask, dtype=np.bool)
        # spans_mask = np.array(spans_mask, dtype=np.bool) * (np.random.rand(batch_size, seq_len, seq_len) < 1.0)
        spans_label_idx = []
        for idx, gold_tree in enumerate(joint_golds):
            label_idx_np = np.full((snts_len[idx], snts_len[idx]),
                                   len(NER_LABELS),
                                   dtype=np.int)
            ner_i, ner_j, ner_label = self.generate_ner_spans(gold_tree)
            for label_idx, start_i, end_j in zip(ner_label, ner_i, ner_j):
                label_idx_np[start_i, end_j - 1] = label_idx
                spans_mask[idx, start_i, end_j - 1] = True
            spans_label_idx.extend(label_idx_np[spans_mask[
                idx, :snts_len[idx], :snts_len[idx]]].tolist())
        assert np.sum(np.array(spans_mask)) == len(spans_label_idx)

        target = torch.tensor(spans_label_idx,
                              dtype=torch.long,
                              device=self.device)
        spans_mask_tensor = torch.tensor(spans_mask,
                                         dtype=torch.bool,
                                         device=self.device).unsqueeze(3)
        ner_loss = self.criterion_ner(
            torch.masked_select(ner_labels_score, spans_mask_tensor).view(
                -1,
                len(NER_LABELS) + 1), target)

        # ner_score: torch.Tensor = ner_labels_score[ner_batch_ids, ner_is, ner_js, :]
        # assert ner_score.shape[0] == len(ner_labels)
        # ner_loss = self.criterion_ner(ner_score, torch.tensor(ner_labels, dtype=torch.long, device=self.device))

        loss = loss_joint + self.lambda_scaler * (
            (self.alpha_scaler + 1.) * loss_parsing +
            (1. - self.alpha_scaler) * ner_loss)
        return loss
Example #2
0
    def forward(self, insts: Dict[str, List[Union[List[str], InternalParseNode]]], return_charts: bool = False)\
            -> Union[torch.Tensor, Tuple[List[InternalParseNode], List[np.ndarray]], List[np.ndarray]]:
        """forward func of the model.
        Args:
            insts: input insts, including 'pos_tags', 'snts', 'gold_trees'
        Returns:
            pred tensor outputed by model
        """
        pos_tags, snts = insts['pos_tags'], insts['snts']
        snts_len = [len(pos_tag) for pos_tag in pos_tags]
        batch_size, seq_len = len(snts_len), max(snts_len)
        embeddings, mask = self.embeddings(pos_tags, snts)
        assert (batch_size,
                seq_len + 2) == embeddings.shape[0:2] == mask.shape[0:2]
        words_repr = self.encoder(embeddings, mask)
        assert (batch_size, seq_len + 1) == words_repr.shape[0:2]
        spans_repr = words_repr.unsqueeze(1) - words_repr.unsqueeze(
            2)  # [batch_size, seq_len+1, seq_len+1, dim]
        assert (batch_size, seq_len + 1, seq_len + 1) == spans_repr.shape[0:3]

        joint_labels_score = self.joint_label_classifier(spans_repr)
        parsing_labels_score = self.parsing_label_classifier(spans_repr)
        ner_labels_score = self.ner_label_classifier(spans_repr)

        empty_label_score = torch.zeros(
            (batch_size, seq_len + 1, seq_len + 1, 1), device=self.device)
        joint_charts = torch.cat([empty_label_score, joint_labels_score],
                                 dim=3)
        parsing_charts = torch.cat([empty_label_score, parsing_labels_score],
                                   dim=3)
        empty_label_score = torch.full(
            (batch_size, seq_len + 1, seq_len + 1, 1), 0., device=self.device)
        ner_labels_score = torch.cat([ner_labels_score, empty_label_score],
                                     dim=3)
        joint_charts_np = joint_charts.cpu().detach().numpy()
        parsing_charts_np = parsing_charts.cpu().detach().numpy()

        # compute loss and generate tree

        # Just return the charts, for ensembling
        if return_charts:
            ret_charts = []
            for i, snt_len in enumerate(snts_len):
                ret_charts.append(joint_charts[i, :snt_len + 1, :snt_len +
                                               1, :].cpu().numpy())
            return ret_charts

        # when model test, just return trees and scores
        if not self.training:
            trees = []
            scores = []
            for i, snt_len in enumerate(snts_len):
                chart_np = joint_charts_np[i, :snt_len + 1, :snt_len + 1, :]
                score, p_i, p_j, p_label, _ = self.parse_from_chart(
                    snt_len, chart_np, self.joint_labels_vocab)
                pos_tag, snt = pos_tags[i], snts[i]
                tree = self.generate_tree(p_i, p_j, p_label, pos_tag, snt)
                trees.append(tree)
                scores.append(score)
            return trees, scores

        # when model train, return loss
        # During training time, the forward pass needs to be computed for every
        # cell of the chart, but the backward pass only needs to be computed for
        # cells in either the predicted or the gold parse tree. It's slightly
        # faster to duplicate the forward pass for a subset of the chart than it
        # is to perform a backward pass that doesn't take advantage of sparsity.
        # Since this code is not undergoing algorithmic changes, it makes sense
        # to include the optimization even though it may only be a 10% speedup.
        # Note that no dropout occurs in the label portion of the network
        # cross_loss = torch.tensor(0., device=self.device)
        joint_golds = insts['joint_gold_trees']
        parsing_golds = insts['parsing_gold_trees']
        p_is, p_js, g_is, g_js, p_labels, g_labels, batch_ids, paugment_total_joint = [], [], [], [], [], [], [], 0.0
        p_is_parsing, p_js_parsing, g_is_parsing, g_js_parsing, p_labels_parsing, g_labels_parsing, batch_ids_parsing,\
            paugment_total_parsing = [], [], [], [], [], [], [], 0.0
        ner_is, ner_js, ner_labels, ner_batch_ids = [], [], [], []
        for i, snt_len in enumerate(snts_len):

            # joint parser
            chart_np = joint_charts_np[i, :snt_len + 1, :snt_len + 1, :]
            p_i, p_j, p_label, p_augment, g_i, g_j, g_label =\
                self.parse_from_chart(snt_len, chart_np, self.joint_labels_vocab, joint_golds[i])
            paugment_total_joint += p_augment
            p_is.extend(p_i.tolist())
            p_js.extend(p_j.tolist())
            p_labels.extend(p_label.tolist())
            g_is.extend(g_i.tolist())
            g_js.extend(g_j.tolist())
            g_labels.extend(g_label.tolist())
            batch_ids.extend([i for _ in range(len(p_i))])

            # parsing parser
            chart_np = parsing_charts_np[i, :snt_len + 1, :snt_len + 1, :]
            p_i, p_j, p_label, p_augment, g_i, g_j, g_label =\
                self.parse_from_chart(snt_len, chart_np, self.parsing_labels_vocab, parsing_golds[i])
            paugment_total_parsing += p_augment
            p_is_parsing.extend(p_i.tolist())
            p_js_parsing.extend(p_j.tolist())
            p_labels_parsing.extend(p_label.tolist())
            g_is_parsing.extend(g_i.tolist())
            g_js_parsing.extend(g_j.tolist())
            g_labels_parsing.extend(g_label.tolist())
            batch_ids_parsing.extend([i for _ in range(len(p_i))])

            # cross loss
            # cross_spans = self.generate_cross_label_spans(golds[i])
            # for constit, constit_gold, ner, ner_gold, span_start, span_end in cross_spans:
            #     constit_idx = self.cross_label_idx[constit]
            #     ner_idx = self.cross_label_idx[ner]
            #     cross_constit_loss = self.log_softmax(charts[i, span_start, span_end, constit_idx])[constit_gold]
            #     cross_ner_loss = self.log_softmax(charts[i, span_start, span_end, ner_idx])[ner_gold]
            #     cross_loss = cross_loss - cross_constit_loss - cross_ner_loss

            # ner idx
            ner_i, ner_j, ner_label = self.generate_ner_spans(joint_golds[i])
            ner_is.extend(ner_i)
            ner_js.extend(ner_j)
            ner_labels.extend(ner_label)
            ner_batch_ids.extend([i for _ in range(len(ner_i))])

        p_scores_joint = torch.sum(joint_charts[batch_ids, p_is, p_js,
                                                p_labels])
        g_scores_joint = torch.sum(joint_charts[batch_ids, g_is, g_js,
                                                g_labels])
        loss_joint = p_scores_joint - g_scores_joint + paugment_total_joint

        p_scores_parsing = torch.sum(parsing_charts[batch_ids_parsing,
                                                    p_is_parsing, p_js_parsing,
                                                    p_labels_parsing])
        g_scores_parsing = torch.sum(parsing_charts[batch_ids_parsing,
                                                    g_is_parsing, g_js_parsing,
                                                    g_labels_parsing])
        loss_parsing = p_scores_parsing - g_scores_parsing + paugment_total_parsing

        ner_score: torch.Tensor = ner_labels_score[ner_batch_ids, ner_is,
                                                   ner_js, :]
        assert ner_score.shape[0] == len(ner_labels)
        # ner_loss = torch.sum(torch.log2(self.softmax(ner_score)[[i for i in range(len(ner_labels))], ner_labels]))
        ner_loss = self.criterion_ner(
            ner_score,
            torch.tensor(ner_labels, dtype=torch.long, device=self.device))

        loss = loss_joint + self.lambda_scaler * (
            self.alpha_scaler * loss_parsing +
            (1. - self.alpha_scaler) * ner_loss)
        return loss
Example #3
0
    def forward(self, insts: Dict[str, List[Union[List[str], InternalParseNode]]], return_charts: bool = False)\
            -> Union[torch.Tensor, Tuple[List[InternalParseNode], List[np.ndarray]], List[np.ndarray]]:
        """forward func of the model.
        Args:
            insts: input insts, including 'pos_tags', 'snts', 'gold_trees'
        Returns:
            pred tensor outputed by model
        """
        pos_tags, snts = insts['pos_tags'], insts['snts']
        snts_len = [len(pos_tag) for pos_tag in pos_tags]
        batch_size, seq_len = len(snts_len), max(snts_len)
        embeddings, mask = self.embeddings(pos_tags, snts)
        assert (batch_size,
                seq_len + 2) == embeddings.shape[0:2] == mask.shape[0:2]
        words_repr = self.encoder(embeddings, mask)
        assert (batch_size, seq_len + 1) == words_repr.shape[0:2]
        spans_repr = words_repr.unsqueeze(1) - words_repr.unsqueeze(
            2)  # [batch_size, seq_len+1, seq_len+1, dim]
        assert (batch_size, seq_len + 1, seq_len + 1) == spans_repr.shape[0:3]
        labels_score = self.label_classifier(spans_repr)
        charts = torch.cat([
            torch.zeros((batch_size, seq_len + 1, seq_len + 1, 1),
                        device=self.device), labels_score
        ],
                           dim=3)
        charts_np = charts.cpu().detach().numpy()

        # compute loss and generate tree

        # Just return the charts, for ensembling
        if return_charts:
            ret_charts = []
            for i, snt_len in enumerate(snts_len):
                ret_charts.append(charts[i, :snt_len + 1, :snt_len +
                                         1, :].cpu().numpy())
            return ret_charts

        # when model test, just return trees and scores
        if not self.training:
            trees = []
            scores = []
            for i, snt_len in enumerate(snts_len):
                chart_np = charts_np[i, :snt_len + 1, :snt_len + 1, :]
                score, p_i, p_j, p_label, _ = self.parse_from_chart(
                    snt_len, chart_np)
                pos_tag, snt = pos_tags[i], snts[i]
                tree = self.generate_tree(p_i, p_j, p_label, pos_tag, snt)
                trees.append(tree)
                scores.append(score)
            return trees, scores

        # when model train, return loss
        # During training time, the forward pass needs to be computed for every
        # cell of the chart, but the backward pass only needs to be computed for
        # cells in either the predicted or the gold parse tree. It's slightly
        # faster to duplicate the forward pass for a subset of the chart than it
        # is to perform a backward pass that doesn't take advantage of sparsity.
        # Since this code is not undergoing algorithmic changes, it makes sense
        # to include the optimization even though it may only be a 10% speedup.
        # Note that no dropout occurs in the label portion of the network
        golds = insts['gold_trees']
        p_is, p_js, g_is, g_js, p_labels, g_labels, batch_ids, paugment_total = [], [], [], [], [], [], [], 0.0
        for i, snt_len in enumerate(snts_len):
            chart_np = charts_np[i, :snt_len + 1, :snt_len + 1, :]
            p_i, p_j, p_label, p_augment, g_i, g_j, g_label = self.parse_from_chart(
                snt_len, chart_np, golds[i])
            paugment_total += p_augment
            p_is.extend(p_i.tolist())
            p_js.extend(p_j.tolist())
            p_labels.extend(p_label.tolist())
            g_is.extend(g_i.tolist())
            g_js.extend(g_j.tolist())
            g_labels.extend(g_label.tolist())
            batch_ids.extend([i for _ in range(len(p_i))])

        p_scores = torch.sum(charts[batch_ids, p_is, p_js, p_labels])
        g_scores = torch.sum(charts[batch_ids, g_is, g_js, g_labels])
        loss = p_scores - g_scores + paugment_total
        return loss