def test_breakups_extraction_2(self): direction = Direction.BACKWARD breakups = [ False, True, True, False, False, False, False, False, True, True, False, False, False, False, True ] self.assertEqual( get_dt_breakups_feature(self.doc_with_1_sent, direction), breakups)
def test_breakups_extraction_3(self): direction = Direction.FORWARD sent_1_breakups = [ True, False, False, False, True, True, True, True, False, False, True, True, True, True, False ] sent_2_breakups = [ True, True, True, True, False, True, False, True, True, True, True, False, False ] breakups = sent_1_breakups + sent_2_breakups self.assertEqual( get_dt_breakups_feature(self.doc_with_2_sent, direction), breakups)
def _get_features(self, doc: Document) -> dict: new_token_features = dict() if "dt_head_distances" not in doc.token_features: warn( "SyntacticFeatureComputer was called on doc without dependency tree, some features won't be included" ) else: new_token_features["dt_depths"] = get_dt_depths_feature(doc) for direction in [Direction.FORWARD, Direction.BACKWARD]: new_token_features["dt_breakups_" + direction.value] = get_dt_breakups_feature( doc, direction=direction) new_token_features[ "dt_deltas_" + direction.value] = get_dt_deltas_feature( doc, direction=direction, precomputed_depths=new_token_features["dt_depths"]) new_token_features["borders"] = get_sentence_borders_feature(doc) new_token_features.update(get_morph_features(doc, self.morph_features)) return new_token_features