Example #1
0
    def test_all_and_no_boundaries(self):
        '''
        Test whether all segments versus no segments produces 1.0.
        '''

        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
        b = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        self.assertEqual(pk(a, b, **self.kwargs), Decimal('1.0'))
        self.assertEqual(pk(b, a, **self.kwargs), Decimal('1.0'))
Example #2
0
    def test_all_and_no_boundaries(self):
        '''
        Test whether all segments versus no segments produces 1.0.
        '''

        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
        b = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        self.assertEqual(pk(a, b, **self.kwargs), Decimal('1.0'))
        self.assertEqual(pk(b, a, **self.kwargs), Decimal('1.0'))
Example #3
0
    def test_no_boundaries(self):
        '''
        Test whether no segments versus some segments produce 1.0.
        '''

        a = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        b = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        self.assertEqual(pk(b, a, **self.kwargs), Decimal('1.0'))
        self.assertEqual(pk(a, b, **self.kwargs),
                         Decimal('0.3636363636363636363636363636'))
Example #4
0
    def test_extra_boundary(self):
        '''
        Test whether 1/3 segments that are non-existent produces 0.091.
        '''

        a = [1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
        b = [1, 1, 1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4]
        self.assertEqual(pk(a, b, **self.kwargs),
                         Decimal('0.09090909090909090909090909091'))
        self.assertEqual(pk(b, a, **self.kwargs),
                         Decimal('0.09090909090909090909090909091'))
Example #5
0
    def test_extra_boundary(self):
        '''
        Test whether 1/3 segments that are non-existent produces 0.091.
        '''

        a = [1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
        b = [1, 1, 1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4]
        self.assertEqual(pk(a, b, **self.kwargs),
                         Decimal('0.09090909090909090909090909091'))
        self.assertEqual(pk(b, a, **self.kwargs),
                         Decimal('0.09090909090909090909090909091'))
Example #6
0
    def test_identical(self):
        '''
        Test whether identical segmentations produce 0.0.
        '''

        a = [1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
        b = [1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
        one_minus_kwargs = dict(TestPk.kwargs)
        one_minus_kwargs['one_minus'] = True
        self.assertEqual(pk(a, b, **self.kwargs), Decimal('0.0'))
        self.assertEqual(pk(a, b, **one_minus_kwargs), Decimal('1.0'))
Example #7
0
    def test_no_boundaries(self):
        '''
        Test whether no segments versus some segments produce 1.0.
        '''

        a = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        b = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        self.assertEqual(pk(b, a, **self.kwargs),
                         Decimal('1.0'))
        self.assertEqual(pk(a, b, **self.kwargs),
                         Decimal('0.3636363636363636363636363636'))
Example #8
0
    def test_identical(self):
        '''
        Test whether identical segmentations produce 0.0.
        '''

        a = [1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
        b = [1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
        one_minus_kwargs = dict(TestPk.kwargs)
        one_minus_kwargs['one_minus'] = True
        self.assertEqual(pk(a, b, **self.kwargs), Decimal('0.0'))
        self.assertEqual(pk(a, b, **one_minus_kwargs), Decimal('1.0'))
Example #9
0
    def test_full_miss_and_misaligned(self):
        '''
        Test whether a full miss and a translated boundary out of 4 produces
        0.273.
        '''

        a = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        b = [1, 1, 1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4]
        self.assertEqual(pk(a, b, **self.kwargs),
                         Decimal('0.2727272727272727272727272727'))
        self.assertEqual(pk(b, a, **self.kwargs),
                         Decimal('0.2727272727272727272727272727'))
Example #10
0
    def test_all_boundaries(self):
        '''
        Test whether all segments versus some segments produces 7/11 = 0.636
        erroneous windows.
        '''

        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
        b = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        self.assertEqual(pk(a, b, **self.kwargs),
                         Decimal('0.6363636363636363636363636364'))
        self.assertEqual(pk(b, a, **self.kwargs),
                         Decimal('0.6363636363636363636363636364'))
Example #11
0
    def test_full_miss_and_misaligned(self):
        '''
        Test whether a full miss and a translated boundary out of 4 produces
        0.273.
        '''

        a = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        b = [1, 1, 1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4]
        self.assertEqual(pk(a, b, **self.kwargs),
                         Decimal('0.2727272727272727272727272727'))
        self.assertEqual(pk(b, a, **self.kwargs),
                         Decimal('0.2727272727272727272727272727'))
Example #12
0
    def test_all_boundaries(self):
        '''
        Test whether all segments versus some segments produces 7/11 = 0.636
        erroneous windows.
        '''

        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
        b = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        self.assertEqual(pk(a, b, **self.kwargs),
                         Decimal('0.6363636363636363636363636364'))
        self.assertEqual(pk(b, a, **self.kwargs),
                         Decimal('0.6363636363636363636363636364'))
Example #13
0
    def test_translated_boundary(self):
        '''
        Test whether 2/3 total segments participate in mis-alignment produces
        0.182.
        '''

        a = [1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
        b = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        self.assertEqual(pk(a, b, **self.kwargs),
                         Decimal('0.1818181818181818181818181818'))
        self.assertEqual(pk(b, a, **self.kwargs),
                         Decimal('0.1818181818181818181818181818'))
Example #14
0
    def test_translated_boundary(self):
        '''
        Test whether 2/3 total segments participate in mis-alignment produces
        0.182.
        '''

        a = [1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]
        b = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        self.assertEqual(pk(a, b, **self.kwargs),
                         Decimal('0.1818181818181818181818181818'))
        self.assertEqual(pk(b, a, **self.kwargs),
                         Decimal('0.1818181818181818181818181818'))
Example #15
0
 def test_long_format(self):
     hypothesis = (2, 31, 4, 1, 1, 3, 11, 5, 21, 4, 2, 1, 17, 26, 16, 1, 17,
                   4, 3, 7, 7, 6, 12, 1, 6, 25, 2, 4, 3, 16, 8)
     reference = (2, 36, 1, 3, 10, 1, 5, 21, 4, 3, 59, 8, 10, 4, 3, 7, 13,
                  12, 7, 27, 4, 3, 24)
     self.assertAlmostEqual(pk(hypothesis, reference),
                            Decimal('0.1532567049808429118773946360'))
Example #16
0
 def test_large_disagreement(self):
     '''
     Calculate mean permuted pairwise Pk on a theoretical dataset
     containing large disagreement.
     '''
     self.assertAlmostEquals(summarize(pk(LARGE_DISAGREEMENT)),
                             (1.0, 0.0, 0.0, 0.0, 8))
Example #17
0
 def test_dataset_kwargs(self):
     '''
     Calculate mean permuted pairwise Pk on a theoretical dataset
     containing complete agreement.
     '''
     self.assertAlmostEquals(summarize(pk(dataset=COMPLETE_AGREEMENT)),
                             (0.0, 0.0, 0.0, 0.0, 48))
Example #18
0
 def test_long_format(self):
     hypothesis = (
         2,
         31,
         4,
         1,
         1,
         3,
         11,
         5,
         21,
         4,
         2,
         1,
         17,
         26,
         16,
         1,
         17,
         4,
         3,
         7,
         7,
         6,
         12,
         1,
         6,
         25,
         2,
         4,
         3,
         16,
         8)
     reference = (
         2,
         36,
         1,
         3,
         10,
         1,
         5,
         21,
         4,
         3,
         59,
         8,
         10,
         4,
         3,
         7,
         13,
         12,
         7,
         27,
         4,
         3,
         24)
     self.assertAlmostEqual(
         pk(hypothesis, reference),
         Decimal('0.1532567049808429118773946360'))
Example #19
0
 def test_boundary_format_nltk(self):
     '''
     Test the nltk boundary format.
     '''
     value = pk('0100100000',
                '0101000000',
                window_size=2,
                boundary_format=BoundaryFormat.nltk)
     self.assertAlmostEqual(Decimal('0.2222222'), value)
Example #20
0
    def test_parts(self):
        '''
        Test parts.
        '''

        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
        b = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        metric_kwargs = dict(self.kwargs)
        metric_kwargs['return_parts'] = True
        self.assertEqual(pk(a, b, **metric_kwargs), (7, 11))
Example #21
0
 def test_boundary_format_nltk(self):
     '''
     Test the nltk boundary format.
     '''
     value = pk(
         '0100100000',
         '0101000000',
         window_size=2,
         boundary_format=BoundaryFormat.nltk)
     self.assertAlmostEqual(Decimal('0.2222222'), value)
Example #22
0
 def test_kazantseva2012_g5(self):
     '''
     Calculate permuted pairwise Pk on Group 5 from the dataset
     collected in [KazantsevaSzpakowicz2012]_.
     '''
     self.assertAlmostEquals(
         summarize(pk(KAZANTSEVA2012_G5)),
         (Decimal('0.35530058282396693'), Decimal('0.11001760846099215'),
          Decimal('0.012103874171476172'), Decimal('0.015879673965138168'),
          48))
Example #23
0
    def test_all_kwargs_hyp_ref(self):
        '''
        Test whether a full miss and a translated boundary out of 4 produces
        0.273.
        '''

        metric_kwargs = dict(self.kwargs)
        metric_kwargs['hypothesis'] = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        metric_kwargs['reference'] = [1, 1, 1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4]
        self.assertEqual(pk(**metric_kwargs),
                         Decimal('0.2727272727272727272727272727'))
Example #24
0
 def test_kazantseva2012_g2(self):
     '''
     Calculate mean permuted pairwise Pk on Group 2 from the dataset
     collected in [KazantsevaSzpakowicz2012]_.
     '''
     self.assertAlmostEquals(
         summarize(pk(KAZANTSEVA2012_G2)),
         (Decimal('0.2882256923776327507173609771'),
          Decimal('0.1454395656787966169084191445'),
          Decimal('0.02115266726483699483402909754'),
          Decimal('0.01327675514600517730547602481'), 120))
Example #25
0
    def test_all_kwargs_hyp_ref(self):
        '''
        Test whether a full miss and a translated boundary out of 4 produces
        0.273.
        '''

        metric_kwargs = dict(self.kwargs)
        metric_kwargs['hypothesis'] = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        metric_kwargs['reference'] = [1, 1, 1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4]
        self.assertEqual(pk(**metric_kwargs),
                         Decimal('0.2727272727272727272727272727'))
Example #26
0
    def test_parts(self):
        '''
        Test parts.
        '''

        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
        b = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
        metric_kwargs = dict(self.kwargs)
        metric_kwargs['return_parts'] = True
        self.assertEqual(pk(a, b, **metric_kwargs),
                         (7, 11))
Example #27
0
 def test_kazantseva2012_g5(self):
     '''
     Calculate permuted pairwise Pk on Group 5 from the dataset
     collected in [KazantsevaSzpakowicz2012]_.
     '''
     self.assertAlmostEquals(summarize(pk(KAZANTSEVA2012_G5)),
                            (Decimal('0.35530058282396693'),
                             Decimal('0.11001760846099215'),
                             Decimal('0.012103874171476172'),
                             Decimal('0.015879673965138168'),
                             48))
Example #28
0
 def test_pk_datasets(self):
     '''
     Test pk upon two datasets.
     '''
     hypothesis = HYPOTHESIS_STARGAZER
     reference = HEARST_1997_STARGAZER
     value = pk(hypothesis, reference)
     self.assertAlmostEquals(float(value['stargazer,h1,1']), 0.26315789)
     self.assertAlmostEquals(float(value['stargazer,h2,1']), 0.36842105)
     self.assertAlmostEquals(float(value['stargazer,h1,2']), 0.42105263)
     self.assertAlmostEquals(float(value['stargazer,h2,2']), 0.42105263)
Example #29
0
 def test_kazantseva2012_g2(self):
     '''
     Calculate mean permuted pairwise Pk on Group 2 from the dataset
     collected in [KazantsevaSzpakowicz2012]_.
     '''
     self.assertAlmostEquals(summarize(pk(KAZANTSEVA2012_G2)),
                            (Decimal('0.2882256923776327507173609771'),
                             Decimal('0.1454395656787966169084191445'),
                             Decimal('0.02115266726483699483402909754'),
                             Decimal('0.01327675514600517730547602481'),
                             120))
Example #30
0
 def test_dataset_kwargs(self):
     '''
     Calculate mean permuted pairwise Pk on a theoretical dataset
     containing complete agreement.
     '''
     self.assertAlmostEquals(summarize(pk(dataset=COMPLETE_AGREEMENT)),
                            (0.0,
                             0.0,
                             0.0,
                             0.0,
                             48))
Example #31
0
 def test_large_disagreement(self):
     '''
     Calculate mean permuted pairwise Pk on a theoretical dataset
     containing large disagreement.
     '''
     self.assertAlmostEquals(summarize(pk(LARGE_DISAGREEMENT)),
                            (1.0,
                             0.0,
                             0.0,
                             0.0,
                             8))
Example #32
0
 def test_pk_datasets(self):
     '''
     Test pk upon two datasets.
     '''
     hypothesis = HYPOTHESIS_STARGAZER
     reference = HEARST_1997_STARGAZER
     value = pk(hypothesis, reference)
     self.assertAlmostEquals(float(value['stargazer,h1,1']), 0.26315789)
     self.assertAlmostEquals(float(value['stargazer,h2,1']), 0.36842105)
     self.assertAlmostEquals(float(value['stargazer,h1,2']), 0.42105263)
     self.assertAlmostEquals(float(value['stargazer,h2,2']), 0.42105263)
Example #33
0
 def test_nltk(self):
     '''
     Runs Pk tests from https://github.com/nltk/nltk/blob/master/nltk/test/segmentation.doctest
     '''
     # Originally 0.0
     self.assertAlmostEqual(
         pk('1000100', '1000100', window_size=3,
            boundary_format=BoundaryFormat.nltk),
         Decimal('0.0'))
     # Originally 0.5
     self.assertAlmostEqual(
         pk('010', '100', window_size=2,
            boundary_format=BoundaryFormat.nltk),
         Decimal('0.5'))
     # Originally 0.64
     self.assertAlmostEqual(
         pk('111111', '100100', window_size=2,
            boundary_format=BoundaryFormat.nltk),
         Decimal('0.4'))
     # Originally 0.04
     self.assertAlmostEqual(
         pk('000000', '100100', window_size=2,
            boundary_format=BoundaryFormat.nltk),
         Decimal('0.6'))
     # Originally 0.25
     self.assertAlmostEqual(
         pk('111111', '100100', window_size=3,
            boundary_format=BoundaryFormat.nltk),
         Decimal('0'))
     # Originally 0.25
     self.assertAlmostEqual(
         pk('000000', '100100', window_size=3,
            boundary_format=BoundaryFormat.nltk),
         Decimal('1'))
Example #34
0
def example_in_paper_test():
    gold = [2, 3, 6]
    h_list = [[5, 6], [2, 2, 7], [2, 3, 3, 3], [1, 1, 3, 1, 5]]
    for n, h in enumerate(h_list):
        cm = boundary_confusion_matrix(h, gold)
        print("第%d次实验" % int(n + 1))
        # The P, R and F values are different from those in the normal method because it will correct the near missing.
        print("P=%.4f, R=%.4f, F=%.4f" %
              (precision(cm), recall(cm), fmeasure(cm)))
        print("1-Pk=%.3f, 1-WD=%.3f, B=%.3f, S=%.3f" %
              (pk(h, gold,
                  one_minus=True), WD(h, gold, one_minus=True,
                                      window_size=2), B(h, gold), S(h, gold)))
Example #35
0
 def test_nltk(self):
     '''
     Runs Pk tests from https://github.com/nltk/nltk/blob/master/nltk/test/segmentation.doctest
     '''
     # Originally 0.0
     self.assertAlmostEqual(
         pk('1000100',
            '1000100',
            window_size=3,
            boundary_format=BoundaryFormat.nltk), Decimal('0.0'))
     # Originally 0.5
     self.assertAlmostEqual(
         pk('010',
            '100',
            window_size=2,
            boundary_format=BoundaryFormat.nltk), Decimal('0.5'))
     # Originally 0.64
     self.assertAlmostEqual(
         pk('111111',
            '100100',
            window_size=2,
            boundary_format=BoundaryFormat.nltk), Decimal('0.4'))
     # Originally 0.04
     self.assertAlmostEqual(
         pk('000000',
            '100100',
            window_size=2,
            boundary_format=BoundaryFormat.nltk), Decimal('0.6'))
     # Originally 0.25
     self.assertAlmostEqual(
         pk('111111',
            '100100',
            window_size=3,
            boundary_format=BoundaryFormat.nltk), Decimal('0'))
     # Originally 0.25
     self.assertAlmostEqual(
         pk('000000',
            '100100',
            window_size=3,
            boundary_format=BoundaryFormat.nltk), Decimal('1'))
Example #36
0
 def test_return_parts_dataset(self):
     '''
     Test one minus.
     '''
     value = pk([2, 3, 6], [2, 2, 7], return_parts=True)
     self.assertEqual((2, 9), value)
Example #37
0
 def test_return_parts(self):
     '''
     Test one minus.
     '''
     value = pk(KAZANTSEVA2012_G5, return_parts=True)
     self.assertEqual((3, 10), value['ch1,an3,an1'])
Example #38
0
 def test_return_parts_dataset(self):
     '''
     Test one minus.
     '''
     value = pk([2, 3, 6], [2, 2, 7], return_parts=True)
     self.assertEqual((2, 9), value)
Example #39
0
 def test_one_minus(self):
     '''
     Test one minus.
     '''
     value = pk([2, 3, 6], [2, 2, 7], one_minus=True)
     self.assertAlmostEqual(Decimal('0.77777777'), value)
Example #40
0
 def test_one_minus(self):
     '''
     Test one minus.
     '''
     value = pk([2, 3, 6], [2, 2, 7], one_minus=True)
     self.assertAlmostEqual(Decimal('0.77777777'), value)
Example #41
0
 def test_window_size_specified(self):
     '''
     Test when window size is specified.
     '''
     value = pk([2, 3, 6], [2, 2, 7], window_size=2)
     self.assertAlmostEqual(Decimal('0.2222222'), value)
Example #42
0
 def test_return_parts(self):
     '''
     Test one minus.
     '''
     value = pk(KAZANTSEVA2012_G5, return_parts=True)
     self.assertEqual((3, 10), value['ch1,an3,an1'])
Example #43
0
 def test_window_size_specified(self):
     '''
     Test when window size is specified.
     '''
     value = pk([2, 3, 6], [2, 2, 7], window_size=2)
     self.assertAlmostEqual(Decimal('0.2222222'), value)