コード例 #1
0
class TestNode(unittest.TestCase):
    def setUp(self):
        self.my_tree = Tree(
            5, Tree(3, Tree(2), Tree(5)),
            Tree(7, Tree(1), Tree(0, Tree(2), Tree(8, None, Tree(5)))))
        self.my_median = 4.0
        self.my_mean = 3.8
        self.my_sum = 38
        self.str_my_tree = "[5[3[2[][]][5[][]]][7[1[][]][0[2[][]][8[][5[][]]]]]]"

    def test_str_representation(self):
        self.assertEqual(self.my_tree.__str__(), self.str_my_tree)

    def test_positive_median_calculation(self):
        self.assertEqual(self.my_tree.median_value(), self.my_median)

    def test_positive_sum_calculation(self):
        self.assertEqual(self.my_tree.sum_subtree_values(), self.my_sum)

    def test_positive_mean_calculation(self):
        self.assertEqual(self.my_tree.mean_value(), self.my_mean)
コード例 #2
0
ファイル: cyk.py プロジェクト: kaushalv274/NLP
def func(sen_size, time):
    d = json.load(open("d.text"))
    rev_d = json.load(open("rd.text"))
    kTOKENIZER = TreebankWordTokenizer()

    for line in fileinput.input():
        tokens = kTOKENIZER.tokenize(line)

        n = len(tokens)
        #d  //Dictionary which contains production rule A -> BC and A -> literal and their log probability
        #rev_d  // This dictionary contains reverse production rules. BC -> A and literal -> A with probability

        dd = [[defaultdict(float) for _ in range(n)] for _ in range(n)]
        back = [[defaultdict(float) for _ in range(n)] for _ in range(n)]
        for index, token in enumerate(tokens):
            if token not in rev_d:
                tokens[index] = '<unk>'

        start_time = timeit.default_timer()
        for index, token in enumerate(tokens):
            j = index
            temp_dict = rev_d[token]
            for k, v in temp_dict.iteritems():
                dd[j][j][k] = v
                back[j][j][k] = token
            for i in range(j - 1, -1, -1):
                for k in range(i, j):
                    left_b = dd[i][k].keys()
                    right_b = dd[k + 1][j].keys()
                    # print left_b , i,j,k
                    # print right_b, i, j, k
                    for B in left_b:
                        for C in right_b:
                            r_side = B + ' ' + C
                            if r_side in rev_d:
                                for key, value in rev_d[r_side].iteritems():
                                    if key not in dd[i][j]:
                                        dd[i][j][key] = value + dd[i][k][
                                            B] + dd[k + 1][j][C]
                                        back[i][j][key] = str(
                                            k) + ' ' + B + ' ' + C
                                    elif dd[i][j][key] < (value + dd[i][k][B] +
                                                          dd[k + 1][j][C]):
                                        dd[i][j][key] = value + dd[i][k][
                                            B] + dd[k + 1][j][C]
                                        back[i][j][key] = str(
                                            k) + ' ' + B + ' ' + C

        # for i in range(n+1):
        # 	for j in range(n+1):
        elapsed = timeit.default_timer() - start_time
        time.append(elapsed * 1000000)
        sen_size.append(n)
        maxi = -100000000
        top_word = ''
        for k, v in dd[0][n - 1].iteritems():
            if v > maxi:
                top_word = k
        #print back[0][n-1]
        if len(top_word) > 0:
            rr = Node(top_word, [])
            printpath(back, top_word, 0, n - 1, rr)
            t = Tree(rr)
            #t.pre_terminal_rev()
            t.restore_unit()
            t.unbinarize()
            #t.rev_head_annotate()
            #out_file.write(t.__str__()+'\n')
            print t.__str__()
        else:
            #out_file.write('\n')
            print
コード例 #3
0
ファイル: test_tree.py プロジェクト: mkgilbert/GDriveMgr
 def test_string_empty_tree(self):
     t2 = Tree(None)
     self.assertEqual(t2.__str__(), '|---Google_Drive\n')