Esempio n. 1
0
    def testReroot(self):
        newick = "((t5,t6),((t4,(t2,t1)),t3));"
        d = dataio.trees_from_newick([newick])
        tree = d.trees_blocks[0][0]
        taxa_block = d.taxa_blocks[0]
        ref = dataio.trees_from_newick(
            [newick], taxa_block=taxa_block).trees_blocks[0][0]
        encode_splits(ref)

        o_newick = "((t2, t1),((t4,(t5,t6)),t3));"
        o_tree = dataio.trees_from_newick(
            [o_newick], taxa_block=taxa_block).trees_blocks[0][0]
        encode_splits(o_tree)
        self.assertEqual(symmetric_difference(o_tree, ref), 2)

        taxa_labels = ["t%d" % i for i in xrange(1, 7)]
        for leaf_name in taxa_labels:
            f = lambda x: x.label == leaf_name
            nd = tree.find_taxon_node(f)
            tree.to_outgroup_position(nd)
            r_newick = str(tree)
            r_tree = dataio.trees_from_newick(
                [r_newick], taxa_block=taxa_block).trees_blocks[0][0]
            encode_splits(r_tree)
            self.assertEqual(symmetric_difference(r_tree, ref), 0)
Esempio n. 2
0
    def testChangeTranslate(self):
        f = """#NEXUS
Begin taxa ;
    dimensions ntax = 4;
    taxlabels a b c d ;
end;
begin trees;
    translate 
        1 a,
        2 b,
        3 c,
        4 d;
    tree t = (1,2,(3,4));
end;
begin trees;
    translate 
        1 d,
        2 b,
        3 c,
        4 a;
    tree t = (4,2,(3,1));
end;
"""
        d = Dataset()
        d.read(StringIO(f), format="NEXUS")
        t = d.trees_blocks[0][0]
        s = d.trees_blocks[1][0]
        self.assertEqual(t.taxa_block, s.taxa_block)
        encode_splits(s)
        encode_splits(t)
        self.assertEqual(treedists.symmetric_difference(t, s), 0)
Esempio n. 3
0
 def kernelOfTest(self, trees):
     expected = trees[-1]
     input = trees[:-1]
     output = strict_consensus_merge(input)
     encode_splits(output)
     encode_splits(expected)
     if symmetric_difference(expected, output) != 0:
         self.fail("\n%s\n!=\n%s" % (str(output), str(expected)))
Esempio n. 4
0
    def testSymmDiff(self):
        newick = "((t5,t6),((t4,(t2,t1)),t3));"
        d = dataio.trees_from_newick([newick])
        ref = d.trees_blocks[0][0]
        taxa_block = d.taxa_blocks[0]

        encode_splits(ref)

        o_newick = "((t1,t2),((t4,(t5,t6)),t3));"
        o_tree = dataio.trees_from_newick(
            [o_newick], taxa_block=taxa_block).trees_blocks[0][0]
        encode_splits(o_tree)

        self.assertEqual(treedists.symmetric_difference(o_tree, ref), 2)
Esempio n. 5
0
 def check_tree(self, tree_str):          
     d1 = datasets.Dataset()
     tree1 = d1.trees_from_string(tree_str, format="newick")[0]
     pa, edge_lens = to_parent_array(tree1, True, False)
     _LOG.info('Original tree: %s' % tree_str)
     cmd = self.prog_path + " " + " ".join(pa)
     stdout, stderr, returncode = run_program(cmd)
     assert returncode == 0, "Program exited with error:\n%s" % stderr
     _LOG.info('Returned tree: %s' % stdout)
     tree2 = d1.trees_from_string(stdout, format="newick")[0]
     splits.encode_splits(tree1)
     splits.encode_splits(tree2)
     d = treedists.symmetric_difference(tree1, tree2)
     assert d == 0, "Symmetric distance = %d:\n%s;\n%s;" % (d, tree_str, stdout)
Esempio n. 6
0
 def testRandomlyReorient(self):
     n = '(Basichlsac,(Lamprothma,Mougeotisp),(((Haplomitr2,Petalaphy),((Angiopteri,(((Azollacaro,((Dennstasam,(Oleandrapi,Polypodapp)),Dicksonant)),Vittarifle),Botrychbit)),(Isoetesmel,((((Agathismac,Agathisova),Pseudotsu),(((Libocedrus,Juniperusc),Callitris),Athrotaxi)),((Liriodchi,Nelumbo),Sagittari))))),Thuidium));'
     m = [n, n]
     dataset = dataio.trees_from_newick(m)
     trees = [i[0] for i in dataset.trees_blocks]
     ref = trees[0]
     changing = trees[1]
     rng = DebuggingRandom()
     encode_splits(ref)
     encode_splits(changing)
     for i in xrange(50):
         randomly_reorient_tree(changing, rng=rng, splits=True)
         self.assertNotEqual(str(changing), n)
         changing.debug_check_tree(logger_obj=_LOG, splits=True)
         if symmetric_difference(ref, changing) != 0:
             self.fail("\n%s\n!=\n%s" % (str(ref), str(changing)))
    def test3Feb2009MajRuleBug(self):
        if not is_test_enabled(TestLevel.NORMAL,
                               _LOG,
                               module_name=__name__,
                               message="skipping sumtree argument tests"):
            return
        fn1 = dendropy.tests.data_source_path("maj-rule-bug1.tre")
        fn2 = dendropy.tests.data_source_path("maj-rule-bug2.tre")
        d = Dataset()

        tb1 = d.read_trees(open(fn1, "rU"),
                           format="NEXUS",
                           encode_splits=True,
                           rooted=RootingInterpretation.UNROOTED)
        tb2 = d.read_trees(open(fn2, "rU"),
                           format="NEXUS",
                           encode_splits=True,
                           rooted=RootingInterpretation.UNROOTED)
        taxa1 = d.taxa_blocks[0]
        self.assertEqual(taxa1, tb2[0].taxa_block)

        firstSD = SplitDistribution(taxa_block=taxa1)
        secondSD = SplitDistribution(taxa_block=taxa1)

        for o, t in itertools.izip(tb1, tb2):
            #encode_splits(o)
            #encode_splits(t)
            firstSD.count_splits_on_tree(o)
            secondSD.count_splits_on_tree(t)

        ts = TreeSummarizer()
        n_times = 1  # keep set to 1 except for benchmarking tree_from_splits
        for i in xrange(n_times):
            firstMR = ts.tree_from_splits(firstSD, min_freq=0.5)
            secondMR = ts.tree_from_splits(secondSD, min_freq=0.5)
        self.assertEqual(0, symmetric_difference(firstMR, secondMR))
Esempio n. 8
0
def main():
    """
    Main CLI handler.
    """
    
    parser = OptionParser(usage=_prog_usage, 
        add_help_option=True, 
        version=_prog_version, 
        description=_prog_description)    
       
    parser.add_option('-d', '--database',
        action='store',
        dest='db_uri',
        type='string', # also 'float', 'string' etc.
        default=None,
        metavar='URI',
        help='[MANDATORY] database URI (e.g. "postgres://*****:*****@localhost/demodb")')
        
    parser.add_option('-q', '--quiet',
        action='store_true',
        dest='quiet',
        default=False,
        help='suppress progress messages')  
        
    parser.add_option('-e', '--echo',
        action='store_true',
        dest='echo',
        default=False,
        help='echo database communications')
        
    (opts, args) = parser.parse_args()

    if opts.db_uri is None:
        sys.stderr.write('Database URI needs to be specified ("-d" flag; see "--help").\n')
        sys.exit(1)
        
    if len(args) == 0:
        sys.stderr.write("Tree file(s) not specified.\n")
        sys.exit(1)        
    
    src_fpaths = []
    for a in args:
        f = os.path.expandvars(os.path.expanduser(a))
#         src_fpaths.append(f)
        if not os.path.exists(f):
            sys.stderr.write('File not found: "%s"\n' % f)
            sys.exit(1)
        elif not os.path.isfile(f):
            sys.stderr.write('Directory specified instead of file: "%s"\n' % f)
            sys.exit(1)
        else:
            src_fpaths.append(f)
            
    for f in src_fpaths:
    
        ## initial read ##
        if not opts.quiet:
            sys.stderr.write("Pre-import parse ...\n")
        ds1 = datasets.Dataset()
        ds1.read(open(f, "rU"), "nexml")
        tree_list = []
        for trees_block in ds1.trees_blocks:
            for tree in trees_block:
                tree_list.append(tree)
    
        ## import ##
        cmd = ["python", "biosql-insert.py", 
                '-d %s' % opts.db_uri, 
                '-b %s' % TEST_BIODB]
        if opts.quiet:
            cmd.append("-q")
        if opts.echo:
            cmd.append("-e")
        cmd.append(f)
        cmd = " ".join(cmd)
        if not opts.quiet:
            sys.stderr.write("Executing import: %s\n" % cmd)
        input_p = subprocess.Popen([cmd],
                                shell=True,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)                               
        stdout, stderr = input_p.communicate()
        if input_p.returncode:
            sys.stderr.write('*** IMPORT ERROR ***\n')
            sys.stderr.write(stderr)
            sys.exit(1)
            
        names = stdout.split("\n")
        for idx, name in enumerate(names):
            if name:
                tree_list[idx].name = name
            
        for idx, model_tree in enumerate(tree_list):   
            ## export ##
            cmd = ["python", "biosql-gettree.py", 
                    '-d %s' % opts.db_uri, 
                    '-b %s' % TEST_BIODB]
            if opts.quiet:
                cmd.append("-q")
            if opts.echo:
                cmd.append("-e")
            cmd.append(tree.name)
            cmd = " ".join(cmd)
            if not opts.quiet:
                sys.stderr.write("Executing export: %s\n" % cmd)
            export_p = subprocess.Popen([cmd],
                                    shell=True,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)        
            stdout, stderr = export_p.communicate()
            if export_p.returncode:
                sys.stderr.write('*** EXPORT ERROR ***\n')
                sys.stderr.write(stderr)
                sys.exit(1)
                
            ds2 = datasets.Dataset()
            result_tree = ds2.trees_from_string(stdout, "nexml")[0]
            
            ## compare ##
            if not opts.quiet:
                sys.stderr.write("Comparing splits ...\n")
            taxa_block = model_tree.taxa_block
            result_tree.normalize_taxa(taxa_block)
            assert model_tree.taxa_block is result_tree.taxa_block
            splits.encode_splits(model_tree)
            splits.encode_splits(result_tree)
            sd = treedists.symmetric_difference(model_tree, result_tree)
            if not opts.quiet:
                sys.stderr.write("Symmetric distance = %d\n" % sd)
            rfd = treedists.robinson_foulds_distance(model_tree, result_tree)
            if not opts.quiet:
                sys.stderr.write("Weighted Robinson-Fould's distance = %d\n" % rfd)            
            if abs(rfd) < 0.0001:
                sys.stdout.write("%s (%d/%d): SUCCESS\n" % (f, idx+1, len(tree_list)))
            else:
                sys.stdout.write("%s (%d/%d): FAIL\n" % (f, idx+1, len(tree_list)))
    def _do_add_taxon_incremental_step(self, full_dataset, inp_trees):
        culled = self._write_garli_input(full_dataset)
        culled_taxa = culled.taxa_blocks[0]
        self.set_active_taxa(culled_taxa)
        next_round_trees = []

        for tree_ind, tree in enumerate(inp_trees):
            tree_model_list = self.add_to_tree(tree, culled, tree_ind, self.add_tree_stopgen)
            to_save = []
            for tm in tree_model_list:
                print "Tree %d for %d taxa: %f" % (tree_ind, self.curr_n_taxa, tm.score)
                step_add_tree = tm.tree
                encode_splits(step_add_tree)
                split = 1 << (self.curr_n_taxa - 1)
                e = find_edge_from_split(step_add_tree.seed_node, split)
                assert e is not None, "Could not find split %s.  Root mask is %s" % (bin(split)[2:], bin(step_add_tree.seed_node.edge.clade_mask)[2:])


                nt_list = self.check_neighborhood_after_addition(tm, e.head_node, self.first_neighborhood, culled, tree_ind)
                deeper_search_start = []
                better_tm = tm
                for nt in nt_list:
                    encode_splits(nt.tree)
                    if symmetric_difference(nt.tree, step_add_tree) != 0:
                        deeper_search_start.append(nt)
                    elif nt.score > better_tm.score:
                        better_tm = nt


                if deeper_search_start:
                    entire_neighborhood = [better_tm] + deeper_search_start
                    for alt_tm in deeper_search_start:
                        e = find_edge_from_split(alt_tm.tree.seed_node, split)

                        assert e is not None, "Could not find split %s.  Root mask is %s" % (bin(split)[2:], bin(alt_tm.tree.seed_node.edge.clade_mask)[2:])

                        nt_list = self.check_neighborhood_after_addition(alt_tm, e.head_node, self.first_neighborhood + self.neighborhood_incr, culled, tree_ind)
                        for nt in nt_list:
                            encode_splits(nt.tree)
                            entire_neighborhood.append(nt)
                    entire_neighborhood.sort(reverse=True)
                    to_add = []
                    for nt in entire_neighborhood:
                        found = False
                        for x in to_add:
                            if symmetric_difference(x.tree, nt.tree) == 0:
                                found = True
                                break
                        if not found:
                            to_add.append(nt)
                    to_save.extend(to_add)
                else:
                    to_save.append(better_tm)

            # this is where we should evaluate which trees need to be maintained for the next round.
            next_round_trees.extend(to_save)
        
        next_round_trees = self.select_trees_for_next_round(culled, next_round_trees)
        
        del full_dataset.trees_blocks[:]
        full_dataset.trees_blocks.append([i.tree for i in next_round_trees])
        o = open("incrgarli.tre", "w")
        write_tree_file(o, next_round_trees, culled)
        o.close()
        return next_round_trees
Esempio n. 10
0
    def _do_add_taxon_incremental_step(self, full_dataset, inp_trees):
        culled = self._write_garli_input(full_dataset)
        culled_taxa = culled.taxa_blocks[0]
        self.set_active_taxa(culled_taxa)
        next_round_trees = []

        for tree_ind, tree in enumerate(inp_trees):
            tree_model_list = self.add_to_tree(tree, culled, tree_ind,
                                               self.add_tree_stopgen)
            to_save = []
            for tm in tree_model_list:
                print "Tree %d for %d taxa: %f" % (tree_ind, self.curr_n_taxa,
                                                   tm.score)
                step_add_tree = tm.tree
                encode_splits(step_add_tree)
                split = 1 << (self.curr_n_taxa - 1)
                e = find_edge_from_split(step_add_tree.seed_node, split)
                assert e is not None, "Could not find split %s.  Root mask is %s" % (
                    bin(split)[2:], bin(
                        step_add_tree.seed_node.edge.clade_mask)[2:])

                nt_list = self.check_neighborhood_after_addition(
                    tm, e.head_node, self.first_neighborhood, culled, tree_ind)
                deeper_search_start = []
                better_tm = tm
                for nt in nt_list:
                    encode_splits(nt.tree)
                    if symmetric_difference(nt.tree, step_add_tree) != 0:
                        deeper_search_start.append(nt)
                    elif nt.score > better_tm.score:
                        better_tm = nt

                if deeper_search_start:
                    entire_neighborhood = [better_tm] + deeper_search_start
                    for alt_tm in deeper_search_start:
                        e = find_edge_from_split(alt_tm.tree.seed_node, split)

                        assert e is not None, "Could not find split %s.  Root mask is %s" % (
                            bin(split)[2:],
                            bin(alt_tm.tree.seed_node.edge.clade_mask)[2:])

                        nt_list = self.check_neighborhood_after_addition(
                            alt_tm, e.head_node,
                            self.first_neighborhood + self.neighborhood_incr,
                            culled, tree_ind)
                        for nt in nt_list:
                            encode_splits(nt.tree)
                            entire_neighborhood.append(nt)
                    entire_neighborhood.sort(reverse=True)
                    to_add = []
                    for nt in entire_neighborhood:
                        found = False
                        for x in to_add:
                            if symmetric_difference(x.tree, nt.tree) == 0:
                                found = True
                                break
                        if not found:
                            to_add.append(nt)
                    to_save.extend(to_add)
                else:
                    to_save.append(better_tm)

            # this is where we should evaluate which trees need to be maintained for the next round.
            next_round_trees.extend(to_save)

        next_round_trees = self.select_trees_for_next_round(
            culled, next_round_trees)

        del full_dataset.trees_blocks[:]
        full_dataset.trees_blocks.append([i.tree for i in next_round_trees])
        o = open("incrgarli.tre", "w")
        write_tree_file(o, next_round_trees, culled)
        o.close()
        return next_round_trees