def test_fallback_labeling(self): file = "res/TIGER/tiger21/tigertraindev_root_attach.export" corpus = np.sentence_names_to_hybridtrees( [str(x) for x in range(50) if x % 10 > 1], file, disconnect_punctuation=False) labeling = tl.FrequencyBiasedTerminalLabeling(tl.FormTerminals(), tl.PosTerminals(), corpus=corpus, threshold=2) print(labeling.fine_label_count) token1 = mt.ConstituentTerminal('Milliardär', 'NN') token2 = mt.ConstituentTerminal('Tisch', 'NN') label1 = labeling.token_label(token1) label2 = labeling.token_label(token2) f = io.StringIO() json.dump(labeling.serialize(), f) f.seek(0) print(f.getvalue()) instance2 = tl.deserialize_labeling(json.load(f)) self.assertTrue(isinstance(instance2, labeling.__class__)) self.assertEqual(label1, instance2.token_label(token1)) self.assertEqual(label2, instance2.token_label(token2))
def test_composition_labeling(self): complex = tl.CompositionalTerminalLabeling(tl.FormTerminals(), tl.PosTerminals(), binding_string='/') token = mt.ConstituentTerminal('Tisch', 'NN') label = complex.token_label(token) print(token, label) serialization = complex.serialize() print(serialization) instance2 = tl.deserialize_labeling(serialization) self.assertTrue(isinstance(instance2, complex.__class__)) self.assertEqual(label, instance2.token_label(token))