Esempio n. 1
0
def ml4(aln, true_tree):
    '''
	Input a true tree and an alignment
	Calculate the likelihood of all possible unrooted 4-taxon trees
	Return True if the ML tree is the true tree
	Return False otherwise
	'''

    # all trees with unit branch lengths
    all_trees = [
        LoadTree(treestring='((a,b),(c,d))'),
        LoadTree(treestring='((a,c),(b,d))'),
        LoadTree(treestring='((a,d),(b,c))')
    ]

    # optimise lf for all trees
    sm = JC69()

    results = []
    for t in all_trees:
        lf = sm.makeLikelihoodFunction(t)
        lf.setAlignment(aln)
        lf.optimise(local=True)
        results.append(lf.getLogLikelihood())

    # get the ml tree and compare to true tree
    ml_tree = all_trees[results.index(max(results))]

    return ml_tree.sameTopology(true_tree)
Esempio n. 2
0
 def test_balanced(self):
     """balancing an unrooted tree"""
     t = LoadTree(
         treestring='((a,b),((c1,(c2,(c3,(c4,(c5,(c6,c7)))))),(d,e)),f)')
     b = LoadTree(
         treestring='(c1,(c2,(c3,(c4,(c5,(c6,c7))))),((d,e),((a,b),f)))')
     self.assertEqual(str(t.balanced()), str(b))
Esempio n. 3
0
 def test_sameShape(self):
     """test topology assessment"""
     t1 = LoadTree(treestring="(((s1,s5),s3),s2,s4);")
     t2 = LoadTree(treestring="((s1,s5),(s2,s4),s3);")
     t3 = LoadTree(treestring="((s1,s4),(s2,s5),s3);")
     assert t1.sameTopology(t2), (t1, t2)
     assert not t1.sameTopology(t3), (t1, t3)
     assert not t2.sameTopology(t3), (t2, t3)
Esempio n. 4
0
def inflate_likelihood_function(data, model=None):
    supported_subs_models = ('GeneralStationary', 'General',
        'DiscreteSubstitutionModel', 'General_with_gaps')
    if not model is None:
        model = model()
    elif data['name'] == 'GTR':
        if data['with_rate']:
            model = GTR(optimise_motif_probs=True, with_rate=True,
                    distribution='gamma')
        else:
            model = GTR(optimise_motif_probs=True)
    elif data['name'] == 'General_with_gaps':
        assert not data['with_rate'], data['name'] + ' plus Gamma not supported'
        model = General(DNA.Alphabet, optimise_motif_probs=True,
                model_gaps=True, recode_gaps=False, name='General_with_gaps')
    elif data['name'] in supported_subs_models:
        assert not data['with_rate'], data['name'] + ' plus Gamma not supported'
        model = eval(data['name'])(DNA.Alphabet, optimise_motif_probs=True, 
                model_gaps=False, recode_gaps=True, name=data['name'])
    else:
        st = 'inflate_likelihood_function: unsupported model ' + data['name']
        raise NotImplementedError(st)
    
    if 'tree' in data:
        tree = LoadTree(treestring=data['tree'].encode('utf-8'))
    else:
        tip_names = [tip_name.encode('utf-8') for tip_name in data['tip_names']]
        tree = LoadTree(tip_names=tip_names)
    
    if data['with_rate']:
        lf = model.makeLikelihoodFunction(tree, bins=4)
    else:
        lf = model.makeLikelihoodFunction(tree)
    with lf.updatesPostponed():
        lf.setMotifProbs(data['mprobs'])
        params = data['params']
        for param in data['params']:
            dimensions = lf.defn_for[param].valid_dimensions
            if len(dimensions) == 0:
                lf.setParamRule(param, init=params[param])
            elif 'edge' in dimensions and 'bin' in dimensions:
                for edge, bins in params[param].items():
                    for bin, init in bins.items():
                        lf.setParamRule(param, edge=edge, bin=bin, init=init)
            elif 'edge' in dimensions:
                for edge, init in params[param].items():
                    lf.setParamRule(param, edge=edge, init=init)
            elif 'bin' in dimensions:
                for bin, init in params[param].items():
                    lf.setParamRule(param, bin=bin, init=init)

        if 'dependencies' in data:
            for param, scopes in data['dependencies'].items():
                for scope in scopes:
                    lf.setParamRule(param, is_independent=False, **scope)

    return lf
Esempio n. 5
0
 def test_limited_wls(self):
     """testing (well, exercising at least), wls with constrained start"""
     init = LoadTree(treestring='((a,c),b,d)')
     reconstructed = wls(self.dists, start=init)
     self.assertEqual(len(reconstructed.getTipNames()), 5)
     init2 = LoadTree(treestring='((a,d),b,c)')
     reconstructed = wls(self.dists, start=[init, init2])
     self.assertEqual(len(reconstructed.getTipNames()), 5)
     init3 = LoadTree(treestring='((a,d),b,e)')
     self.assertRaises(Exception, wls, self.dists, start=[init, init3])
     # if start tree has all seq names, should raise an error
     self.assertRaises(Exception, wls, self.dists,
             start=[LoadTree(treestring='((a,c),b,(d,e))')])
Esempio n. 6
0
 def test_getEdgeNamesUseOutgroup(self):
     t1 = LoadTree(treestring="((A,B)ab,(F,(C,D)cd)cdf,E)root;")
     # a, e, ogroup f
     t2 = LoadTree(treestring="((E,(A,B)ab)abe,F,(C,D)cd)root;")
     expected = ['A', 'B', 'E', 'ab']
     for t in [t1, t2]:
         edges = t.getEdgeNames('A',
                                'E',
                                getstem=False,
                                getclade=True,
                                outgroup_name="F")
         edges.sort()
         self.assertEqual(expected, edges)
Esempio n. 7
0
    def test_trees(self):
        treestring = "((A:.1,B:.22)ab:.3,((C:.4,D:.5)cd:.55,E:.6)cde:.7,F:.2)"
        for edge in 'ABCDEF':
            treestring = treestring.replace(edge, edge + edge.lower() * 10)
        t = LoadTree(treestring=treestring)
        for klass in [
                UnrootedDendrogram,
                SquareDendrogram,
                ContemporaneousDendrogram,
                ShelvedDendrogram,
                #        StraightDendrogram,
                #        ContemporaneousStraightDendrogram
        ]:
            dendro = klass(t)
            dendro.getConnectingNode(
                'Ccccccccccc', 'Eeeeeeeeeee').setCollapsed(color="green",
                                                           label="C, D and E")
            do(klass.__name__,
               dendro,
               shade_param="length",
               show_params=["length"])

        def callback(edge):
            return ["blue", "red"][edge.Name.startswith("A")]

        do("Highlight edge A",
           UnrootedDendrogram(t),
           edge_color_callback=callback)
Esempio n. 8
0
 def test_making_from_list(self):
     tipnames_with_spaces = ['a_b', 'a b', "T'lk"]
     tipnames_with_spaces.sort()
     t = LoadTree(tip_names=tipnames_with_spaces)
     result = t.getTipNames()
     result.sort()
     assert result == tipnames_with_spaces
Esempio n. 9
0
 def test_getsetParamValue(self):
     """test getting, setting of param values"""
     t = LoadTree(treestring='((((a:.2,b:.3)ab:.1,c:.3)abc:.4),d:.6)')
     self.assertEqual(t.getParamValue('length', 'ab'), 0.1, 2)
     t.setParamValue('zz', 'ab', 4.321)
     node = t.getNodeMatchingName('ab')
     self.assertEqual(4.321, node.params['zz'], 4)
 def setUp(self):
     #length all edges 1 except c=2.  b&d transitions all other transverions
     self.al = LoadSeqs(
         data={'a':'tata', 'b':'tgtc', 'c':'gcga', 'd':'gaac', 'e':'gagc',})
     self.tree = LoadTree(treestring='((a,b),(c,d),e);')
     self.model = cogent.evolve.substitution_model.Nucleotide(
         do_scaling=True, equal_motif_probs=True, model_gaps=True)
Esempio n. 11
0
def get_tree(filename):
    tree = LoadTree(filename)
    treename = os.path.basename(filename).rsplit('.', 1)[0]
    for edge in tree.getEdgeVector():
        edge.NameLoaded = True
        edge.Name = edge.Name.replace('.', '_')
    return {'treename': treename, 'treestring': str(tree)}
Esempio n. 12
0
def MakeCachedObjects(model, tree, seq_length, opt_args):
    """simulates an alignment under F81, all models should be the same"""
    lf = model.makeLikelihoodFunction(tree)
    lf.setMotifProbs(dict(A=0.1, C=0.2, G=0.3, T=0.4))
    aln = lf.simulateAlignment(seq_length)
    results = dict(aln=aln)
    discrete_tree = LoadTree(tip_names=aln.Names)

    def fit_general(results=results):
        if 'general' in results:
            return
        gen = General(DNA.Alphabet)
        gen_lf = _make_likelihood(gen, tree, results)
        gen_lf.optimise(**opt_args)
        results['general'] = gen_lf
        return

    def fit_gen_stat(results=results):
        if 'gen_stat' in results:
            return
        gen_stat = GeneralStationary(DNA.Alphabet)
        gen_stat_lf = _make_likelihood(gen_stat, tree, results)
        gen_stat_lf.optimise(**opt_args)
        results['gen_stat'] = gen_stat_lf

    def fit_constructed_gen(results=results):
        if 'constructed_gen' in results:
            return
        preds = [
            MotifChange(a, b, forward_only=True)
            for a, b in [['A', 'C'], ['A', 'G'], ['A', 'T'], ['C', 'A'],
                         ['C', 'G'], ['C', 'T'], ['G', 'C'], ['G', 'T'],
                         ['T', 'A'], ['T', 'C'], ['T', 'G']]
        ]
        nuc = Nucleotide(predicates=preds)
        nuc_lf = _make_likelihood(nuc, tree, results)
        nuc_lf.optimise(**opt_args)
        results['constructed_gen'] = nuc_lf

    def fit_discrete(results=results):
        if 'discrete' in results:
            return
        dis_lf = _make_likelihood(DiscreteSubstitutionModel(DNA.Alphabet),
                                  discrete_tree,
                                  results,
                                  is_discrete=True)
        dis_lf.optimise(**opt_args)
        results['discrete'] = dis_lf

    funcs = dict(general=fit_general,
                 gen_stat=fit_gen_stat,
                 discrete=fit_discrete,
                 constructed_gen=fit_constructed_gen)

    def call(self, obj_name):
        if obj_name not in results:
            funcs[obj_name]()
        return results[obj_name]

    return call
Esempio n. 13
0
File: ml.py Progetto: HuttleyLab/gnc
def rooted(doc, rooted_edges=None, gc=None, **kw):
    aln = LoadSeqs(data=doc['aln'].encode('utf-8'), moltype=DNA)
    tree = LoadTree(treestring=doc['tree'].encode('utf-8'))

    code = get_genetic_code(gc)
    aln = aln.withoutTerminalStopCodons(code)
    aln = aln.filtered(lambda x: set(''.join(x)) <= set(DNA), motif_length=3)

    sp_kw = dict(upper=20., lower=0.05, is_independent=False)
    sm = MG94GTR(optimise_motif_probs=True)
    init_lf = sm.makeLikelihoodFunction(tree)
    init_lf.setAlignment(aln)
    with init_lf.updatesPostponed():
        for param in init_lf.getParamNames():
            if '/' in param:
                init_lf.setParamRule(param, **sp_kw)
    init_lf.setParamRule('length', edges=rooted_edges, is_independent=False)
    init_lf.optimise(local=True, show_progress=False, limit_action='raise')
    init_lf = nest.deflate_likelihood_function(init_lf, save_jsd=False)
    sm = GNC(optimise_motif_probs=True)
    lf = sm.makeLikelihoodFunction(tree)
    lf.setAlignment(aln)
    _populate_parameters(lf, init_lf, **sp_kw)
    for param in lf.getParamNames():
        if '>' in param or param == 'omega':
            lf.setParamRule(param, edges=rooted_edges, is_independent=False)
    lf.optimise(local=True, show_progress=False, limit_action='raise')
    flat_lf = nest.deflate_likelihood_function(lf)
    flat_lf['hard_up'] = _is_hard_up(lf)

    return {'lf': flat_lf, 'gc': code.Name, 'rooted_edges': rooted_edges}
Esempio n. 14
0
def test_gapped_CNFGTR():
    aln = get_aln(os.path.join(get_data_dir(), 'ENSG00000100393.fasta.gz'),
                  codon_position=-1,
                  filter_gaps=False)
    tree = LoadTree(treestring='(Human,Mouse,Opossum);')
    doc = {'aln': str(aln), 'tree': str(tree)}
    cnfgtr_result = gapped.ml(doc,
                              model='CNFGTR',
                              model_gaps=True,
                              omega_indep=False,
                              indel_indep=False)
    model = lambda: gapped.CNFGTR(optimise_motif_probs=True, model_gaps=True)
    cnfgtr = gapped.inflate_likelihood_function(cnfgtr_result['lf'], model)

    pi = cnfgtr.getMotifProbsByNode()['root'].asarray()
    P = cnfgtr.getPsubForEdge('Human')
    assert_almost_equal(pi.dot(P), pi)

    omega = cnfgtr.getParamValue('omega')
    pi = cnfgtr.getMotifProbs()
    Q = cnfgtr.getRateMatrixForEdge('Human')
    cond_p = pi['CCG'] / sum(pi['CC' + c] for c in 'ACGT')
    ref_cell = Q['CCT']['CCG'] / cond_p
    cond_p = pi['CCC'] / sum(pi['CC' + c] for c in 'ACGT')
    assert_almost_equal(Q['CCA']['CCC'] / cond_p / ref_cell,
                        cnfgtr.getParamValue('A/C'))
    assert_almost_equal(Q['---']['CCC'] / pi['CCC'] / ref_cell,
                        cnfgtr.getParamValue('indel'))
    R = Q.asarray() / pi.asarray()
    assert_almost_equal(R.T, R)
Esempio n. 15
0
def ml(doc,
       model='NG',
       gc=None,
       omega_indep=True,
       model_gaps=False,
       indel_indep=True,
       **kw):
    aln = LoadSeqs(data=doc['aln'].encode('utf-8'), moltype=DNA)
    tree = LoadTree(treestring=doc['tree'].encode('utf-8'))

    code = get_genetic_code(gc)
    if model != 'NG':
        # Trim terminal stop codons
        aln = aln.withoutTerminalStopCodons(code)
        if model_gaps:
            filt = lambda x: set(''.join(x)) <= set(DNA).union({'-'})
        else:
            filt = lambda x: set(''.join(x)) <= set(DNA)
        aln = aln.filtered(filt, motif_length=3)

    flat_lf, time = _fit(aln, tree, model, code, omega_indep, model_gaps,
                         indel_indep)
    return {
        'lf': flat_lf,
        'time': time,
        'model': model,
        'gc': code.Name,
        'omega_indep': omega_indep,
        'model_gaps': model_gaps,
        'indel_indep': indel_indep
    }
Esempio n. 16
0
    def __init__(self, TreePath, NeedsToBeCogentModded):
        self.Parsed = True  #used to determine if the full analysis can be conducted

        try:
            self.TreePath = TreePath
            self.NeedsToBeCogentModded = NeedsToBeCogentModded

            self.CogentTree = None

            #if the internal nodes need to be renamed, then it is done according to the "FixUpFileForCogent" method
            if self.NeedsToBeCogentModded:
                cogentFixUp = fixUpFileForCogent(self.TreePath)
                self.CogentTreeFile = cogentFixUp[0]
                self.CogentInputTreeString = cogentFixUp[1]

                self.CogentTree = LoadTree(self.CogentTreeFile.name)

            else:

                self.CogentTree = LoadTree(self.TreePath)

            #prepares an input string for FastML
            self.FastMLInputTreeString = self.FixUpFileForFastML(
                self.CogentTree)

            #executes method to fully parse tree, then sets all returned variables as class variables
            CogentNodesLeavesBranches = completeNodesLeavesBranches(
                self.CogentTree)
            self.NodeKey_L = CogentNodesLeavesBranches['NodeKey_L']
            self.LeafKey_L = CogentNodesLeavesBranches['LeafKey_L']
            self.UpperKey_L = CogentNodesLeavesBranches['UpperKey_L']
            self.TopKey = CogentNodesLeavesBranches['TopKey']
            self.BranchKey_L = CogentNodesLeavesBranches['BranchKey_L']
            self.Nodes_D = CogentNodesLeavesBranches['Nodes_D']

            #print self.LeafKey_L
            #executes quick run of FastML to get FastML's naming convention of internal nodes

            self.FastMLOutputTreeString = executeFastML(
                self.getTempFASTAFile(), self.FastMLInputTreeString, True)

            #prepares the FastMLToOriginalMatchedNodes_D
            self.MatchNodes()

        except Exception as e:

            self.Parsed = False
Esempio n. 17
0
 def setUp(self):
     self.name = 'small tree - '
     self.otu_names = ['NineBande', 'Mouse', 'HowlerMon', 'DogFaced']
     self.otu_names.sort()
     self.newick = '(((Human,HowlerMon),Mouse),NineBande,DogFaced);'
     self.newick_sorted = '(DogFaced,((HowlerMon,Human),Mouse),NineBande);'
     self.newick_reduced = '((HowlerMon,Mouse),NineBande,DogFaced);'
     self.tree = LoadTree(treestring=self.newick)
 def test_setConstantLengths(self):
     t = LoadTree(treestring='((a:1,b:2):3,(c:4,d:5):6,e:7);')
     lf = self.model.makeLikelihoodFunction(t)  #self.tree)
     lf.setParamRule('length', is_const=True)
     # lf.setConstantLengths(t)
     lf.setAlignment(self.al)
     self.assertEqual(lf.getParamValue('length', 'b'), 2)
     self.assertEqual(lf.getParamValue('length', 'd'), 5)
Esempio n. 19
0
def build_tree(tree_string, bl1, bl2, r):
    'build a PyCogent tree object from a string and branch lengths'
    # we use r/2.0 because PyCogent defaults to adding a branch of
    # length 1 if you don't explicitly specify it
    # having 2 branches of r/2.0 keeps our internal branch at r
    tree_string_bl = tree_string % (bl1, bl2, r / 2.0, bl1, bl2, r / 2.0)
    t = LoadTree(treestring=tree_string_bl)
    return t
Esempio n. 20
0
 def test_simulateAlignment2(self):
     "Simulate alignment with dinucleotide model"
     al = LoadSeqs(data={'a': 'ggaatt', 'c': 'cctaat'})
     t = LoadTree(treestring="(a,c);")
     sm = substitution_model.Dinucleotide(mprob_model='tuple')
     lf = sm.makeParamController(t)
     lf.setAlignment(al)
     simalign = lf.simulateAlignment()
     self.assertEqual(len(simalign), 6)
Esempio n. 21
0
    def test_getsubtree(self):
        """testing getting a subtree"""
        subtree = self.tree.unrooted().getSubTree(self.otu_names)

        new_tree = LoadTree(treestring=self.newick_reduced).unrooted()

        # check we get the same names
        self.assertEqual(*[len(t.Children) for t in (subtree, new_tree)])
        self.assertEqual(str(subtree), str(new_tree))
Esempio n. 22
0
def different_tree_simulate_alignment(tree_information_list, all_trees):
    '''
        input:

        list of tree information

        example can be[[p1,q1,r1,s1,t1,tree1],[p1,q1,r1,s1,t1,tree2]]

        tree1 and tree2 are the tree in the all_trees
        (a,b),(c,d)-->0
        (a,c),(b,d)-->1
        (a,d),(b,c)-->2

	output:

        PyCogent alignment that different sites follow different order

	'''
    #alnlist is to store all the alignment
    alnlist = []

    #for each tree_information, call the function to get the alignment
    for tree_information in tree_information_list:
        aln1 = simulate_alignment_treefixed(all_trees, tree_information[0],
                                            tree_information[1],
                                            tree_information[2],
                                            tree_information[3],
                                            tree_information[4],
                                            tree_information[5])
        alnlist.append(aln1)

    #put all the alignment together
    aln = alnlist[0]
    if (len(alnlist) > 1):
        for i in range(len(alnlist) - 1):
            aln = aln + alnlist[i + 1]

#find the true tree and construct it according to it has the longest alignment length
#index stands for the order of the longest alignment in the whole tree_information_list

    index = 0
    for i in range(len(tree_information_list)):
        if tree_information_list[i][3] > tree_information_list[index][3]:
            index = i

#build the true tree
#index_true_tree stands for the order of the tree in all_trees that the longest alingment follow

    index_true_tree = tree_information_list[index][5]
    tree_string = all_trees[index_true_tree]
    true_tree_bl = tree_string % (
        tree_information_list[index][0], tree_information_list[index][1],
        tree_information_list[index][2] / 2.0, tree_information_list[index][0],
        tree_information_list[index][1], tree_information_list[index][2] / 2.0)
    true_tree = LoadTree(treestring=true_tree_bl)

    return (aln, true_tree)
Esempio n. 23
0
def test_distribution():
    """distribution should return empirical distribution for DNA sequence"""
    al = get_aln('General', 1031).takeSeqs(('Mouse', ))
    distribution = jsd.distribution(al.getSeq('Mouse'))
    st = LoadTree(tip_names=('Mouse', ))
    sm = GTR()
    lf = sm.makeLikelihoodFunction(st)
    lf.setMotifProbsFromData(al)
    probs = lf.getMotifProbs()
    assert_array_almost_equal(array(probs), array(distribution))
Esempio n. 24
0
 def use_root_seq(root_sequence):
     al = LoadSeqs(data={'a': 'ggaatt', 'c': 'cctaat'})
     t = LoadTree(treestring="(a,c);")
     sm = substitution_model.Dinucleotide(mprob_model='tuple')
     lf = sm.makeParamController(t)
     lf.setAlignment(al)
     simalign = lf.simulateAlignment(exclude_internal=False,
                                     root_sequence=root_sequence)
     root = simalign.NamedSeqs['root']
     self.assertEqual(str(root), str(root_sequence))
Esempio n. 25
0
    def setUp(self):
        self.submodel = Nucleotide(do_scaling=True,
                                   model_gaps=False,
                                   equal_motif_probs=True,
                                   predicates={'beta': 'transition'})

        self.data = LoadSeqs(filename=os.path.join(data_path, 'brca1_5.paml'),
                             moltype=self.submodel.MolType)

        self.tree = LoadTree(filename=os.path.join(data_path, 'brca1_5.tree'))
Esempio n. 26
0
 def setUp(self):
     self.name = 'big tree - '
     self.otu_names = [
         'Horse', 'TombBat', 'Rhino', 'Pig', 'AsianElep', 'SpermWhal',
         'Cat', 'Gorilla', 'Orangutan', 'bandicoot', 'Hedgehog', 'Sloth',
         'HairyArma', 'Manatee', 'GoldenMol', 'Pangolin'
     ]
     self.otu_names.sort()
     self.newick = '((((((((FlyingFox,DogFaced),((FreeTaile,LittleBro),(TombBat,RoundEare))),(FalseVamp,LeafNose)),(((Horse,Rhino),(Pangolin,(Cat,Dog))),(Llama,(Pig,(Cow,(Hippo,(SpermWhal,HumpbackW))))))),(Mole,Hedgehog)),(TreeShrew,(FlyingLem,((Jackrabbit,(FlyingSqu,(OldWorld,(Mouse,Rat)))),(Galago,(HowlerMon,(Rhesus,(Orangutan,(Gorilla,(Human,Chimpanzee)))))))))),(((NineBande,HairyArma),(Anteater,Sloth)),(((Dugong,Manatee),((AfricanEl,AsianElep),(RockHyrax,TreeHyrax))),(Aardvark,((GoldenMol,(Madagascar,Tenrec)),(LesserEle,GiantElep)))))),(caenolest,(phascogale,(wombat,bandicoot))));'
     self.newick_reduced = '(((((TombBat,(((Horse,Rhino),(Pangolin,Cat)),(Pig,SpermWhal))),Hedgehog),(Orangutan,Gorilla)),((HairyArma,Sloth),((Manatee,AsianElep),GoldenMol))),bandicoot);'
     self.tree = LoadTree(treestring=self.newick)
Esempio n. 27
0
def ml(doc, model='GNC', gc=None, outgroup=None, neutral=None, **kw):
    aln = LoadSeqs(data=doc['aln'].encode('utf-8'), moltype=DNA)
    tree = LoadTree(treestring=doc['tree'].encode('utf-8'))

    code = get_genetic_code(gc)

    # Trim terminal stop codons
    aln = aln.withoutTerminalStopCodons(code)
    aln = aln.filtered(lambda x: set(''.join(x)) <= set(DNA), motif_length=3)

    flat_lf, time = _fit(aln, tree, model, code, outgroup, neutral)
    return {'lf': flat_lf, 'time': time, 'model': model, 'gc': code.Name}
Esempio n. 28
0
def test_distribution():
    """distribution should return empirical distribution for DNA sequence"""
    with GzipFile(os.path.join(get_data_dir(), 'General_1031.fasta.gz')) as ff:
        data = ff.read()
    al = Alignment(data=data).takeSeqs(('Mouse', ))
    distribution = jsd.distribution(al.getSeq('Mouse'))
    st = LoadTree(tip_names=('Mouse', ))
    sm = GTR()
    lf = sm.makeLikelihoodFunction(st)
    lf.setMotifProbsFromData(al)
    probs = lf.getMotifProbs()
    assert_array_almost_equal(array(probs), array(distribution))
 def test_pairwise_clock(self):
     al = LoadSeqs(data={'a':'agct','b':'ggct'})
     tree = LoadTree(treestring='(a,b);')
     model = cogent.evolve.substitution_model.Dinucleotide(
             do_scaling=True, equal_motif_probs=True, model_gaps=True,
             mprob_model='tuple')
     lf = model.makeLikelihoodFunction(tree)
     lf.setLocalClock('a','b')
     lf.setAlignment(al)
     lf.optimise(local=True)
     rd = lf.getParamValueDict(['edge'], params=['length'])
     self.assertAlmostEqual(lf.getLogLikelihood(),-10.1774488956)
     self.assertEqual(rd['length']['a'],rd['length']['b'])
Esempio n. 30
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    start_time = datetime.now()

    t = LoadTree(opts.input_tree)
    translation_dict = {}
    for i, tip in enumerate(t.iterTips()):
        translation_dict[tip.Name] = i

    single_rate = False

    #Generate commands telling BayesTraits which nodes to reconstruct
    bayestraits_commands = make_bayestraits_script(t,
                                                   translation_dict,
                                                   comments=False,
                                                   single_rate=single_rate)

    #TODO: make this dynamic
    #Temporarily assuming there is a nexus file available
    nexus_fp = opts.input_tree.rsplit(".", 1)[0] + ".nexus"
    command_fp = "./bayestraits_commands.txt"
    path_to_bayestraits = "../"
    outfile = "./bayestrait_reconstruction.trait_table"
    command_file = open(command_fp, "w+")
    command_file.writelines(bayestraits_commands)
    command_file.close()

    command_file = open(command_fp, "U")

    bayestraits = BayesTraits()
    bayestraits_result = bayestraits(data=(nexus_fp, opts.input_trait_data,
                                           command_fp))
    #print "StdOut:",result["StdOut"].read()
    print "StdErr:", bayestraits_result["StdErr"].read()
    print "Return code:", bayestraits_result["ExitStatus"]

    results = parse_reconstruction_output(
        bayestraits_result['StdOut'].readlines())
    #print "Reconstructions:",results

    #Reconstruction results
    f = open(outfile, "w+")
    f.writelines(results)
    f.close()

    end_time = datetime.now()
    print "Start time:", start_time
    print "End time:", end_time
    print "Time to reconstruct:", end_time - start_time
    bayestraits_result.cleanUp()