def test_calculation(self): c = treeCl.Collection(input_dir=os.path.join(thisdir, 'data'), param_dir=os.path.join(thisdir, 'data', 'cache'), file_format='phylip', show_progress=False) dm = c.get_inter_tree_distances('geo', show_progress=False) self.assertAlmostEqual(dm.df.values.sum(), 412.70677069540181)
def test_can_run_on_dna(self): self.c = treeCl.Collection(input_dir=os.path.join( thisdir, 'data', 'dna_alignments'), file_format='phylip', show_progress=False) self.c.calc_trees(indices=[0], model='GTRGAMMA', show_progress=False) self.assertFalse(self.c[0].parameters.ml_tree is None)
def setUp(self): self.c = treeCl.Collection(input_dir=os.path.join(thisdir, 'data'), trees_dir=os.path.join( thisdir, 'data', 'trees'), file_format='phylip', show_progress=False) self.tree1 = treeCl.Tree(self.c[0].tree) self.tree2 = treeCl.Tree(self.c[1].tree)
def test_read_trees(self): self.c = treeCl.Collection(input_dir=os.path.join(thisdir, 'data'), trees_dir=os.path.join( thisdir, 'data', 'trees'), file_format='phylip', show_progress=False) rec = self.c[0] self.assertEqual( rec.parameters.ml_tree[:72], '((((Sp1:1.48316688535948748573,(Sp4:1.16694627918414717271,((Sp8:0.00749' )
def test_read_parameters(self): self.c = treeCl.Collection(input_dir=os.path.join(thisdir, 'data'), param_dir=os.path.join( thisdir, 'data', 'cache'), file_format='phylip', show_progress=False) rec = self.c[0] self.assertEqual( rec.parameters.nj_tree[:72], '((((Sp1:1.47856,(Sp4:1.20999,((Sp8:0.00595845,Sp9:0.00469589):0.27853,Sp' )
def test_scorer_can_write(self): c = treeCl.Collection(input_dir=os.path.join(thisdir, 'data'), param_dir=os.path.join(thisdir, 'data', 'cache'), file_format='phylip', show_progress=False) raxml = treeCl.tasks.RaxmlTaskInterface() sc = treeCl.Scorer(c, cache_dir=self.workingdir, task_interface=raxml) p = treeCl.Partition([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2]) sc.write_partition(p) # check files were written import glob files = glob.glob(os.path.join(self.workingdir, '*.phy')) self.assertTrue(len(files) > 0)
def generate_npbs(path, i): c = treeCl.Collection(input_dir=path, file_format='phylip') working_dir = get_dirs(path, i)['wdir'] # Check if work already done work_done = True for rec in c: looking_for = '{}.phy'.format(os.path.join(working_dir, rec.name)) if not (os.path.exists(looking_for) and os.path.getsize(looking_for) > 0): if not (os.path.exists(looking_for + '.bz2') and os.path.getsize(looking_for + '.bz2') > 0): logger.error("File not found or is empty: {}".format(looking_for)) work_done = False if not work_done: npbs = c.permuted_copy() if not os.path.exists(working_dir): os.mkdir(working_dir) for rec in npbs: rec.write_alignment('{}.phy'.format(os.path.join(working_dir, rec.name)), 'phylip', True) AlignIO.convert('{}.phy'.format(os.path.join(working_dir, rec.name)), 'phylip-relaxed', '{}.phy_'.format(os.path.join(working_dir, rec.name)), 'phylip-relaxed') os.system('mv {} {}'.format('{}.phy_'.format(os.path.join(working_dir, rec.name)), '{}.phy'.format(os.path.join(working_dir, rec.name))))
#!/usr/bin/env python import time import treeCl time.sleep(15) c = treeCl.Collection(input_dir='/homes/kgori/scratch/simtest4', file_format='phylip') c.calc_pll_trees()
import treeCl """ The first point of call is the treeCl.Collection class. This handles loading your data, and calculating the trees and distances that will be used later. This is how to load your data. This should be a directory full of sequence alignments in fasta '*.fas' or phylip '*.phy' formats. These can also be zipped using gzip or bzip2, treeCl will load them directly. """ c = treeCl.Collection(input_dir='input_dir', file_format='phylip') """ Now it's time to calculate some trees. The simplest way to do this is """ c.calc_trees() """ This uses RAxML to infer a tree for each alignment. We can pass arguments to RAxML using keywords. """ c.calc_trees( executable='raxmlHPC-PTHREADS-AVX', # specify raxml binary to use threads=8, # use multithreaded raxml model='PROTGAMMAWAGX', # this model of evolution fast_tree=True) # use raxml's experimental fast tree search option """ We can use PhyML instead of RAxML. Switching programs is done using a TaskInterface """
def setUp(self): self.c = treeCl.Collection(input_dir=os.path.join(thisdir, 'data'), param_dir=os.path.join( thisdir, 'data', 'cache'), file_format='phylip', show_progress=False)
import treeCl c = treeCl.Collection(input_dir="/Users/kgori/scratch/simtest4", file_format="phylip") print c[5].get_distances() print c[5].chkdst() print c[5].get_bionj_tree() #conc = treeCl.Concatenation(c,[5,6,7,8,9]) #part = conc.qfile(protein_model='LGX') #p = conc.alignment.pll_get_instance(part, c[5].tree, 6)
def get_collection(path, i): working_dir = get_dirs(path, i)['wdir'] return treeCl.Collection(input_dir=working_dir, file_format='phylip')