Example #1
0
    def test_breakups_extraction_2(self):
        direction = Direction.BACKWARD
        breakups = [
            False, True, True, False, False, False, False, False, True, True,
            False, False, False, False, True
        ]

        self.assertEqual(
            get_dt_breakups_feature(self.doc_with_1_sent, direction), breakups)
Example #2
0
    def test_breakups_extraction_3(self):
        direction = Direction.FORWARD
        sent_1_breakups = [
            True, False, False, False, True, True, True, True, False, False,
            True, True, True, True, False
        ]
        sent_2_breakups = [
            True, True, True, True, False, True, False, True, True, True, True,
            False, False
        ]
        breakups = sent_1_breakups + sent_2_breakups

        self.assertEqual(
            get_dt_breakups_feature(self.doc_with_2_sent, direction), breakups)
Example #3
0
    def _get_features(self, doc: Document) -> dict:
        new_token_features = dict()

        if "dt_head_distances" not in doc.token_features:
            warn(
                "SyntacticFeatureComputer was called on doc without dependency tree, some features won't be included"
            )
        else:
            new_token_features["dt_depths"] = get_dt_depths_feature(doc)

            for direction in [Direction.FORWARD, Direction.BACKWARD]:
                new_token_features["dt_breakups_" +
                                   direction.value] = get_dt_breakups_feature(
                                       doc, direction=direction)
                new_token_features[
                    "dt_deltas_" + direction.value] = get_dt_deltas_feature(
                        doc,
                        direction=direction,
                        precomputed_depths=new_token_features["dt_depths"])

        new_token_features["borders"] = get_sentence_borders_feature(doc)
        new_token_features.update(get_morph_features(doc, self.morph_features))

        return new_token_features