Example #1
0
    def test_skipped_zero_or_more(self):
        tree = Tree(" ((((a, a, b)), (c, d), (e, f)), (g, h, i)) ; ")
        pattern = TreePattern(" ( a, b, d*) ;")

        result = (pattern.find_match(tree))
        # false test
        self.assertTrue(len(list(result)) == 0)
Example #2
0
    def test_skipped_zero_or_more(self):
        tree = Tree(" ((((a, a, b)), (c, d), (e, f)), (g, h, i)) ; ")
        pattern = TreePattern(" ( a, b, d*) ;")

        result = (pattern.find_match(tree))
        # false test
        self.assertTrue(len(list(result)) == 0)
Example #3
0
    def test_constraintes_pattern(self):
        tree = Tree(" ((((a, b)), (c, d), (e, f)), (g, h, i)) ; ")

        (tree & 'a').dist = 0.2
        (tree & 'b').dist = 0.4
        (tree & 'c').dist = 0.5
        (tree & 'd').dist = 0.6
        (tree & 'e').dist = 0.7
        (tree & 'f').dist = 0.8
        (tree & 'g').dist = 0.9

        pattern = TreePattern(" ('@.dist > 0.5+'); ", quoted_node_names=True)

        result = pattern.find_match(tree)
        expected = [(tree & 'e').up, (tree & 'g').up]

        found = True
        count = 0

        for node in result:
            count += 1
            found &= node in expected

        found &= count == 2

        self.assertTrue(found)
Example #4
0
    def test_constraintes_pattern(self):
        tree = Tree(" ((((a, b)), (c, d), (e, f)), (g, h, i)) ; ")

        (tree&'a').dist = 0.2
        (tree&'b').dist = 0.4
        (tree&'c').dist = 0.5
        (tree&'d').dist = 0.6
        (tree&'e').dist = 0.7
        (tree&'f').dist = 0.8
        (tree&'g').dist = 0.9


        pattern = TreePattern(" ('@.dist > 0.5+'); ", quoted_node_names=True)

        result = pattern.find_match(tree)
        expected = [(tree&'e').up, (tree&'g').up]

        found = True
        count = 0

        for node in result:
            count += 1
            found &= node in expected

        found &= count == 2

        self.assertTrue(found)
Example #5
0
    def test_simple_plus(self):
        tree = Tree(" (((a, a, b, qq), (a, b, c, ww)), (b, b, a, ee));", format=8)
        pattern = TreePattern(" (qq, a+)^ ;")

        result = pattern.find_match(tree)
        expected = (tree&'qq').up
        self.assertTrue(len(list(result)) > 0 )
Example #6
0
    def test_double_match(self):
        tree = Tree(" (((a, a, b), (c, c, d) ), (e, e, f), (g, h, i)) ; ")
        pattern = TreePattern(" ((a+, b)^, (e+, f)^);")

        result = (pattern.find_match(tree))

        #self.assertTrue(len(list(result)) > 0 )
        self.assertEqual(next(result), tree)
Example #7
0
    def test_double_match(self):
        tree = Tree(" (((a, a, b), (c, c, d) ), (e, e, f), (g, h, i)) ; ")
        pattern = TreePattern( " ((a+, b)^, (e+, f)^);")

        result = (pattern.find_match(tree))

        #self.assertTrue(len(list(result)) > 0 )
        self.assertEqual(next(result), tree)
Example #8
0
    def test_simple_plus(self):
        tree = Tree(" (((a, a, b, qq), (a, b, c, ww)), (b, b, a, ee));",
                    format=8)
        pattern = TreePattern(" (qq, a+)^ ;")

        result = pattern.find_match(tree)
        expected = (tree & 'qq').up
        self.assertTrue(len(list(result)) > 0)
Example #9
0
    def test_simple_complete_topology(self):
        pattern = TreePattern("((e, i, f)d)^ ; ")
        true_match = [8, 9]
        match = []

        for num, tree in enumerate(self.trees):
            result = pattern.find_match(tree)
            if (len(list(result)) > 0):
                match += [num + 1]

        self.assertTrue(true_match == match)
Example #10
0
    def test_simple_complete_topology(self):
        pattern = TreePattern("((e, i, f)d)^ ; ")
        true_match = [8, 9]
        match = []

        for num, tree in enumerate(self.trees):
            result = pattern.find_match(tree)
            if (len(list(result)) > 0):
                match += [num+1]

        self.assertTrue(true_match == match)
Example #11
0
    def test_star_and_logical_constraints(self):
        tree = Tree(" (((a, b), (c, d), (e, f)), (g, h, i)) ; ")

        (tree&'a').dist = 0.2
        (tree&'b').dist = 0.4
        (tree&'c').dist = 0.5
        (tree&'d').dist = 0.6
        (tree&'e').dist = 0.7
        (tree&'f').dist = 0.8
        (tree&'g').dist = 0.9

        pattern = TreePattern(""" ('g', '@.dist == 1+', 'fls_node*'); """, quoted_node_names=True)
        result = pattern.find_match(tree)

        self.assertEqual(next(result), (tree&'g').up)
Example #12
0
    def test_star_and_logical_constraints(self):
        tree = Tree(" (((a, b), (c, d), (e, f)), (g, h, i)) ; ")

        (tree & 'a').dist = 0.2
        (tree & 'b').dist = 0.4
        (tree & 'c').dist = 0.5
        (tree & 'd').dist = 0.6
        (tree & 'e').dist = 0.7
        (tree & 'f').dist = 0.8
        (tree & 'g').dist = 0.9

        pattern = TreePattern(""" ('g', '@.dist == 1+', 'fls_node*'); """,
                              quoted_node_names=True)
        result = pattern.find_match(tree)

        self.assertEqual(next(result), (tree & 'g').up)
Example #13
0
    def test_constraints_and_loose(self):
        tree = Tree(" (((((a, b), (c, d), (e, f)), (g, h, i)))) ; ")

        (tree&'a').dist = 0.2
        (tree&'b').dist = 0.4
        (tree&'c').dist = 0.5
        (tree&'d').dist = 0.6
        (tree&'e').dist = 0.7
        (tree&'f').dist = 0.7
        (tree&'g').dist = 0.9

        pattern = TreePattern(""" ('@.dist == 0.2', 'b')'^', ('@.dist > 0.5', '@.dist == 0.7+')'^' ; """, quoted_node_names=True)
        result = pattern.find_match(tree)
        res = next(result)
        #self.assertEqual(res, ((tree&'f').up).up)
        self.assertTrue( len(list(result)) > 0 )
Example #14
0
    def test_constraints_and_loose(self):
        tree = Tree(" (((((a, b), (c, d), (e, f)), (g, h, i)))) ; ")

        (tree & 'a').dist = 0.2
        (tree & 'b').dist = 0.4
        (tree & 'c').dist = 0.5
        (tree & 'd').dist = 0.6
        (tree & 'e').dist = 0.7
        (tree & 'f').dist = 0.7
        (tree & 'g').dist = 0.9

        pattern = TreePattern(
            """ ('@.dist == 0.2', 'b')'^', ('@.dist > 0.5', '@.dist == 0.7+')'^' ; """,
            quoted_node_names=True)
        result = pattern.find_match(tree)
        res = next(result)
        #self.assertEqual(res, ((tree&'f').up).up)
        self.assertTrue(len(list(result)) > 0)
Example #15
0
    def test_exact_number_and_topology(self):
        tree = Tree(" ((a, a, b)p1, ((c, c, c, d)p2, (e, f, g)p3)p4)p5 ;",
                    format=1)
        p1 = TreePattern(" ('a{2,2}', 'b')'p1' ;", quoted_node_names=True)
        p2 = TreePattern(" ('c{1,5}', 'd')'p2' ;", quoted_node_names=True)
        p3 = TreePattern(" ('c{2,3}', d, 'ww{0,3}')p2 ;")
        p4 = TreePattern(" ('c{3,3}', 'd{0,5}', 'ww{0,3}')p2;")
        p5 = TreePattern(" ('c{1,2}', 'd{0,1}', 'ww*')p2;")

        patterns = [p1, p2, p3, p4, p5]
        true_match = [True, True, True, True, False]
        match = True

        for num, pattern in enumerate(patterns):
            result = pattern.find_match(tree)
            found = len(list(result)) > 0
            match &= found == true_match[num]

        self.assertTrue(match)
Example #16
0
    def test_two_terminal_nodes(self):
        # The presense of leaves e, f as sister nodes.
        pattern = TreePattern(" (e, f)^; ")
        true_match = [4, 5, 6, 7, 8, 9, 10, 11]
        matches = []
        for num, tree in enumerate(self.trees):
            one_use = deepcopy(pattern)

            result = one_use.find_match(tree)
            try:
                res = next(result)
            except:
                res = None

            if res:
                matches += [num + 1]

        self.assertTrue(matches == true_match)
Example #17
0
    def test_one_terminal_node(self):
        # The c node exists as leaf (not internal)
        pattern = TreePattern(" (c)^; ")
        true_match = [1, 2, 3, 4, 5, 6]
        matches = []
        for num, tree in enumerate(self.trees):

            one_use = deepcopy(pattern)

            result = one_use.find_match(tree)
            try:
                res = next(result)
            except:
                res = None

            if res:
                matches += [num + 1]

        self.assertTrue(matches == true_match)
Example #18
0
    def test_exact_number_of_repeat(self):
        tree = Tree(
            "((a, a, a, b, c), (d, d, qq), (e, e, e, ww, e, e, e, e, e)); ")
        p1 = TreePattern(" (b, c, 'a{1,3}') ;")
        p2 = TreePattern(" (b, c, 'a{2,3}') ;")
        p3 = TreePattern(" (b, c, 'a{3,3}') ;")
        p4 = TreePattern(" (b, c, 'a{4,5}') ;")
        p5 = TreePattern(" (ww, 'e{1,8}') ;")
        p6 = TreePattern(" (ww, 'e{7,9}') ;")
        p7 = TreePattern(" (ww, 'e{1,3}') ;")

        patterns = [p1, p2, p3, p4, p5, p6, p7]
        true_match = [True, True, True, False, True, True, False]
        match = True

        for num, pattern in enumerate(patterns):
            result = pattern.find_match(tree)
            found = len(list(result)) > 0
            match &= found == true_match[num]

        self.assertTrue(match)
Example #19
0
 def test_simple_parent_two_children_false(self):
     tree = Tree(" (((b, c)a, (b, c)a), (e, f)d) ;", format=1)
     pattern = TreePattern("(b,c)qq ;")
     result = pattern.find_match(tree)
     self.assertTrue(len(list(result)) == 0)
Example #20
0
    def test_all(self):

        test = True

        t1 = Tree(" ((a, a, b)p1, ((c, c, c, d)p2, (e, f, g)p3)p4)p5 ;",
                  format=1)
        p1 = TreePattern(" ('c+', 'd')'p2' ;", quoted_node_names=True)
        test &= len(list(p1.find_match(t1))) > 0

        # Should  match
        t1 = Tree(" (((F, G)E, (C, D)B), A);", format=8)
        p1 = TreePattern("('@.support > 0', '@.support > 0')'B' ;")
        test &= len(list(p1.find_match(t1))) > 0

        # Should NOT match
        t1 = Tree(" (((F, G)E, (C, D)B), A);", format=8)
        p1 = TreePattern("('@.support > 0', '@.support > 0{2,3}')'B' ;")
        test &= len(list(p1.find_match(t1))) == 0

        # Should  match
        t1 = Tree(" (((F, G)E, (C, D)B), A);", format=8)
        p1 = TreePattern("('C', '@.support > 0')'B' ;")
        test &= len(list(p1.find_match(t1))) > 0

        # Should not match
        t1 = Tree("(((A, A, A), (B,C)), K);")
        p1 = TreePattern("(((A, A+, A, A), (B,C)), K);")
        test &= len(list(p1.find_match(t1))) == 0

        # Should match
        t1 = Tree("(((A, A, A), (B,C)), K);")
        p1 = TreePattern("(((A, A+, A), (B,C)), K);")
        test &= len(list(p1.find_match(t1))) > 0

        # Should match
        t1 = Tree("(((A, A, A), (B,C)), K);")
        p1 = TreePattern("(((A, A+), (B,C)), K);")
        test &= len(list(p1.find_match(t1))) > 0

        # ^ after a ) means that the two children of that node can be connected by
        # any number of internal up/down nodes
        t1 = Tree("(  ((B,Z), (D,F)), G);")
        p1 = TreePattern("( (B,Z), G)^;")
        test &= len(list(p1.find_match(t1))) > 0

        t1 = Tree("(  ((G, ((B,Z),A)), (D,G)), C);")
        p1 = TreePattern("(((B,Z)^,C), G)^;")
        test &= len(list(p1.find_match(t1))) == 0

        t1 = Tree("(  ((G, ((B,Z),A)), (D,G)), C);")
        p1 = TreePattern("(((B,Z)^,G), C)^;")
        test &= len(list(p1.find_match(t1))) > 0

        t1 = Tree("(((A, (B,C,D)), ((B,C), A)), F);")
        p1 = TreePattern("((C,B,D*), A);")
        test &= len(list(p1.find_match(t1))) > 0

        t1 = Tree("(((A, (B,C,D, D, D)), ((B,C), A)), F);")
        p1 = TreePattern("((C,B,'D{2,3}'), A);")
        test &= len(list(p1.find_match(t1))) > 0

        t1 = Tree("(a, b, b, a);")
        p1 = TreePattern("(a+, b+);")
        test &= len(list(p1.find_match(t1))) > 0

        t1 = Tree("((a, b), c);")
        p1 = TreePattern("((a, b, d*), c);")
        test &= len(list(p1.find_match(t1))) > 0

        t1 = Tree("(  (((B,H), (B,B,H), C), A), (K, J));")
        p1 = TreePattern("((C, (B+,H)+), A);")
        test &= len(list(p1.find_match(t1))) > 0

        self.assertTrue(test)
Example #21
0
    def test_all(self):

        test = True

        t1 = Tree(" ((a, a, b)p1, ((c, c, c, d)p2, (e, f, g)p3)p4)p5 ;", format=1)
        p1 = TreePattern(" ('c+', 'd')'p2' ;",quoted_node_names=True)
        test &= len(list(p1.find_match(t1))) > 0


        # Should  match
        t1 = Tree(" (((F, G)E, (C, D)B), A);", format=8)
        p1  = TreePattern("('@.support > 0', '@.support > 0')'B' ;")
        test &= len(list(p1.find_match(t1))) > 0



        # Should NOT match
        t1 = Tree(" (((F, G)E, (C, D)B), A);", format=8)
        p1  = TreePattern("('@.support > 0', '@.support > 0{2,3}')'B' ;")
        test &= len(list(p1.find_match(t1))) == 0



        # Should  match
        t1 = Tree(" (((F, G)E, (C, D)B), A);", format=8)
        p1  = TreePattern("('C', '@.support > 0')'B' ;")
        test &= len(list(p1.find_match(t1))) > 0


        # Should not match
        t1 = Tree("(((A, A, A), (B,C)), K);")
        p1 = TreePattern("(((A, A+, A, A), (B,C)), K);")
        test &= len(list(p1.find_match(t1))) == 0


        # Should match
        t1 = Tree("(((A, A, A), (B,C)), K);")
        p1 = TreePattern("(((A, A+, A), (B,C)), K);")
        test &= len(list(p1.find_match(t1))) > 0


        # Should match
        t1 = Tree("(((A, A, A), (B,C)), K);")
        p1 = TreePattern("(((A, A+), (B,C)), K);")
        test &= len(list(p1.find_match(t1))) > 0


        # ^ after a ) means that the two children of that node can be connected by
        # any number of internal up/down nodes
        t1 = Tree("(  ((B,Z), (D,F)), G);")
        p1 = TreePattern("( (B,Z), G)^;")
        test &= len(list(p1.find_match(t1))) > 0


        t1 = Tree("(  ((G, ((B,Z),A)), (D,G)), C);")
        p1 = TreePattern("(((B,Z)^,C), G)^;")
        test &= len(list(p1.find_match(t1))) == 0


        t1 = Tree("(  ((G, ((B,Z),A)), (D,G)), C);")
        p1 = TreePattern("(((B,Z)^,G), C)^;")
        test &= len(list(p1.find_match(t1))) > 0


        t1 = Tree("(((A, (B,C,D)), ((B,C), A)), F);")
        p1 = TreePattern("((C,B,D*), A);")
        test &= len(list(p1.find_match(t1))) > 0


        t1 = Tree("(((A, (B,C,D, D, D)), ((B,C), A)), F);")
        p1 = TreePattern("((C,B,'D{2,3}'), A);")
        test &= len(list(p1.find_match(t1))) > 0


        t1 = Tree("(a, b, b, a);")
        p1 = TreePattern("(a+, b+);")
        test &= len(list(p1.find_match(t1))) > 0


        t1 = Tree("((a, b), c);")
        p1 = TreePattern("((a, b, d*), c);")
        test &= len(list(p1.find_match(t1))) > 0


        t1 = Tree("(  (((B,H), (B,B,H), C), A), (K, J));")
        p1 = TreePattern("((C, (B+,H)+), A);")
        test &= len(list(p1.find_match(t1))) > 0


        self.assertTrue(test)
Example #22
0
def run(args):
    # a list of stats objects. one for every pattern
    all_stats = []

    if vars(args)["src_trees"] is None and vars(args)["src_tree_list"] is None:
        logging.error('Please specify a tree to search (i.e. -t) ')
        sys.exit(-1)
    if not vars(args)["pattern_trees"] and not vars(args)["pattern_tree_list"]:
        logging.error('Please specify a pattern to search for. (i.e. -p)')
        sys.exit(-1)


    pattern_length = len(list(pattern_tree_iterator(args)))

    for pattern_num, p in enumerate(pattern_tree_iterator(args)):
        try :
            pattern = TreePattern(p, quoted_node_names=vars(args)["quoted_node_names"])
        except:
            logging.error("Could not create pattern from newick.")
            continue

        stats = match_stats("pattern_" + str(pattern_num))

        # handle file creation
        if vars(args)["output"]:
            filename = vars(args)["output"]
            if pattern_length > 1:
                if '.' in vars(args)["output"]:
                    filename = filename.replace('.', str(pattern_num) + '.')
                else:
                    filename += str(pattern_num)

            outputfile = open(filename, 'w')

        if vars(args)["verbosity"] and int(vars(args)["verbosity"][0]) > 2:
            print("pattern_{} is: ".format(pattern_num))
            print(pattern)

        # for every tree
        if vars(args)["verbosity"] and vars(args)["verbosity"][0] > 2 and not vars(args)["output"]:
            print("match(es) for pattern_{}:".format(pattern_num))

        for n, nw in enumerate(src_tree_iterator(args)):
            stats.total += 1
            try:
                t = PhyloTree(nw, format=args.tree_format)
            except:
                logging.error("Could not creat tree from newick format.")
                stats.errors += 1
                continue

            matches = list(pattern.find_match(t))
            match_length=len(matches)
            if match_length > 0:
                stats.matched += 1
            else:
                stats.not_matched += 1

            if args.render:
                image = args.render
                if vars(args)["whole_tree"]:
                    if pattern_length > 1:  # multiple patterns
                        if '.' in image:
                            image = image.replace('.', str(pattern_num) + '.')
                        else:
                            image += str(pattern_num)
                    ts = TreeStyle()
                    ts.show_leaf_name = True
                    for n in t.traverse():
                        nstyle = NodeStyle()
                        if n in matches:
                            nstyle["fgcolor"] = "green"
                            nstyle["size"] = 7
                        else:
                            nstyle["fgcolor"] = "red"
                            nstyle["size"] = 5
                        n.set_style(nstyle)

                    t.render(image, tree_style=ts, layout=lambda x: None)
                else:
                    if pattern_length > 1:  # multiple patterns
                        if match_length > 1:  # one file per match on each pattern
                            for m, match in enumerate(matches):
                                if '.' in image:
                                    image = image.replace('.', str(pattern_num) + '_' + str(m) + '.')
                                else:
                                    image += str(pattern_num) + str(m)
                                match.render(image)
                        elif match_length == 1:  # One match on multiple patterns
                            if '.' in image:
                                image = image.replace('.', str(pattern_num) + '.')
                            else:
                                image += str(pattern_num)
                            matches[0].render(image)
                        else:
                            if vars(args)["verbosity"] and vars(args)["verbosity"][0] > 1:
                                print("No matches for pattern {} tree {}".format(pattern_num, n))
                    else:  # one pattern
                        if match_length > 1:  # one file per match on one pattern
                            for m, match in enumerate(matches):
                                if '.' in image:
                                    image = image.replace('.', '_' + str(m) + '.')
                                else:
                                    image += str(m)
                                match.render(image)
                        elif match_length == 1:  # one file for one match
                            matches[0].render(image)
                        else:
                            if vars(args)["verbosity"] and vars(args)["verbosity"][0] > 1:
                                print("No matches for tree {}".format(n))

            if vars(args)["output"]:
                if vars(args)["asciioutput"]:
                    if vars(args)["whole_tree"] and match_length > 0:
                        outputfile.write(str(t))
                    else:
                        for match in matches:
                            outputfile.write(str(match) + '\n')
                else:  #args.taboutput
                    if vars(args)["whole_tree"]:
                        outputfile.whrite(t.write(features=[]))
                    else:
                        outputfile.write('\t'.join([match.write(features=[]) for match in matches]))

            if not vars(args)["output"] and not args.render:
                if vars(args)["asciioutput"]:
                    if vars(args)["whole_tree"] and match_length > 0:
                        print(t)
                    else:
                        for match in matches:
                            print(match)
                else:
                    if vars(args)["whole_tree"] and match_length > 0:
                        print(t.write(features=[]))
                    else:
                        for match in matches:
                            print(match.write(features=[]))

        all_stats += [stats]
        if vars(args)["verbosity"] and vars(args)["verbosity"][0] > 3:
            print("{}".format(stats))

        if vars(args)["output"]:
            outputfile.close()

    concentrated = match_stats("\nSummarize")
    concentrated.total = sum([ stat.total for stat in all_stats])
    concentrated.num_of_patterns = len(all_stats)
    concentrated.num_of_trees = concentrated.total / concentrated.num_of_patterns
    concentrated.matched = sum([stat.matched for stat in all_stats])
    concentrated.not_matched = sum([stat.not_matched for stat in all_stats])
    concentrated.errors = sum([stat.errors for stat in all_stats])

    if vars(args)["verbosity"] and vars(args)["verbosity"][0] > 1:
        print("{}".format(concentrated))
Example #23
0
 def test_simple_parent_two_children_false(self):
     tree = Tree(" (((b, c)a, (b, c)a), (e, f)d) ;", format=1)
     pattern = TreePattern("(b,c)qq ;")
     result = pattern.find_match(tree)
     self.assertTrue(len(list(result)) == 0)