Exemple #1
0
 def setUp(self):
     """Load data and train classifiers"""
     segmentation_extractor = BaselineSegmentationFeatureExtractor()
     segmentation_train_instances = (
         segmentation_extractor.extract_features(inst)
         for inst in load_segmentation_instances(
             os.path.join(SENTENCE_SPLIT_DIR, "train.json")))
     self.segmentation_dev_instances = [
         segmentation_extractor.extract_features(inst)
         for inst in load_segmentation_instances(
             os.path.join(SENTENCE_SPLIT_DIR, "dev.json"))
     ]
     self.segmentation_classifier = NaiveBayesClassifier(2.0)
     self.segmentation_classifier.train(segmentation_train_instances)
Exemple #2
0
 def setUp(self) -> None:
     # Create instance counter and count the instances
     self.inst_counter = InstanceCounter()
     feature_extractor = SegmentationTestFeatureExtractor()
     self.inst_counter.count_instances(
         feature_extractor.extract_features(inst)
         for inst in load_segmentation_instances(
             os.path.join(SENTENCE_SPLIT_DIR, "dev.json")))
Exemple #3
0
    def test_tuned_segmentation(self):
        segmentation_extractor = TunedSegmentationFeatureExtractor()
        self.assertIsNotNone(segmentation_extractor.k)

        segmentation_train_instances = (
            segmentation_extractor.extract_features(inst)
            for inst in load_segmentation_instances(
                os.path.join(SENTENCE_SPLIT_DIR, "train.json")))
        self.segmentation_dev_instances = [
            segmentation_extractor.extract_features(inst)
            for inst in load_segmentation_instances(
                os.path.join(SENTENCE_SPLIT_DIR, "dev.json"))
        ]
        self.segmentation_classifier = NaiveBayesClassifier(
            segmentation_extractor.k)
        self.segmentation_classifier.train(segmentation_train_instances)
        predicted, expected = self.segmentation_classifier.test(
            self.segmentation_dev_instances)
        acc, prec, rec, f1_score, report = classification_report(
            predicted, expected, "y")
        print(
            f"Tuned segmentation performance for k of {segmentation_extractor.k}:"
        )
        print(report)
Exemple #4
0
    def test_sentence_split_feature_extractor(self):
        """Test feature extractor for sentence segmentation"""
        label_set = frozenset(["y", "n"])
        seg_feature_extractor = BaselineSegmentationFeatureExtractor()
        instances = load_segmentation_instances(
            os.path.join(SENTENCE_SPLIT_DIR, "dev.json"))
        classification_instance = seg_feature_extractor.extract_features(
            next(instances))
        self.assertEqual(ClassificationInstance, type(classification_instance))
        self.assertEqual(list, type(classification_instance.features))
        self.assertGreaterEqual(3, len(classification_instance.features))
        self.assertEqual(str, type(classification_instance.features[0]))
        self.assertEqual("n", classification_instance.label)
        self.assertSetEqual(
            {"split_tok=.", "left_tok=D", "right_tok=Forrester"},
            set(classification_instance.features),
        )

        for inst in instances:
            classify_inst = seg_feature_extractor.extract_features(inst)
            self.assertIn(classify_inst.label, label_set)