Ejemplo n.º 1
0
    def test_windiff(self):
        h = []
        h.append(("5 words sentence of data.", 0))
        h.append(("short.", 1))
        h.append(("extra segmented sen.", 1))
        h.append(("last and very very very very very long sen.", 1))


        gold = []
        gold.append(("5 words sentence of data.", 1))
        gold.append(("short.", 1))
        gold.append(("extra segmented sen.", 0))
        gold.append(("last and very very very very very long sen.", 1))


        gold = accuracy.get_seg_boundaries(gold)
        h = accuracy.get_seg_boundaries(h)

        window_size = 3

        acc = accuracy.win_diff(gold, h, window_size = window_size)
        self.assertEquals(float(acc), 0.6)
        
        window_size = 5
        expected = float(1)- float(8) / 13

        acc = accuracy.win_diff(gold, h, window_size=window_size)
        self.assertEquals("{0:.5f}".format(float(acc)), "{0:.5f}".format(expected))
Ejemplo n.º 2
0
    def test_pk_perefct_seg(self):
        sentences_class = []
        sentences_class.append(("first sen is 5 words.", 0))
        sentences_class.append(("sec sen.", 0))
        sentences_class.append(("third sen is a very very very long sentence.", 1))
        sentences_class.append(("the forth one is single segment.", 1))

        gold = accuracy.get_seg_boundaries(sentences_class)
        h = accuracy.get_seg_boundaries(sentences_class)

        # with specified window size
        for window_size in range(1, 15, 1):
            acc = accuracy.pk(gold, h, window_size=window_size)
            self.assertEquals(acc, 1)

        # with default window size
        acc = accuracy.pk(gold, h)
        self.assertEquals(acc, 1)
Ejemplo n.º 3
0
    def test_get_boundaries2(self):
        sentences_class = []
        sentences_class.append(("first sen is 5 words.", 0))
        sentences_class.append(("sec sen.", 0))
        sentences_class.append(("third sen is a very very very long sentence.", 1))
        sentences_class.append(("the forth one is single segment.", 1))


        expected = [16, 6]
        result = accuracy.get_seg_boundaries(sentences_class)

        for i, num in enumerate(result):
            self.assertTrue(num == expected[i])
Ejemplo n.º 4
0
    def test_pk_false_neg(self):
        h = []
        h.append(("5 words sentence of data.", 0))
        h.append(("2 sentences same seg.", 1))

        gold = []
        gold.append(("5 words sentence of data.", 1))
        gold.append(("2 sentences same seg.", 1))


        gold = accuracy.get_seg_boundaries(gold)
        h = accuracy.get_seg_boundaries(h)

        window_size = 3
        comparison_count = 6

        # with default window size
        acc = accuracy.pk(gold, h)
        self.assertEquals(acc, window_size / comparison_count)

        window_size = 4
        acc = accuracy.pk(gold, h)
        self.assertEquals(acc, window_size / comparison_count)
Ejemplo n.º 5
0
    def test_get_boundaries(self):
        sentences_class = []
        sentences_class.append(("first sen.", 1))
        sentences_class.append(("sec sen.", 1))
        sentences_class.append(("third sen.", 0))
        sentences_class.append(("forth sen.", 1))
        sentences_class.append(("fifth sen.", 0))
        sentences_class.append(("sixth sen.", 0))
        sentences_class.append(("seventh sen.", 1))

        expected = [2, 2, 4, 6]
        result = accuracy.get_seg_boundaries(sentences_class)

        for i, num in enumerate(result):
            self.assertTrue(num == expected[i])