def analyse(model, alignment_path, tree_path, branchlengths, cmdline_extras): """Do the analysis -- this will overwrite stuff!""" # Move it to a new name to stop raxml stomping on different model analyses # dupfile(alignment_path, analysis_path) model_params = models.get_model_commandline(model) if branchlengths == 'linked': #constrain all branchlengths to be equal bl = ' -f B ' elif branchlengths == 'unlinked': #let branchlenghts vary among subsets bl = ' -f e ' else: # WTF? log.error("Unknown option for branchlengths: %s", branchlengths) raise util.PartitionFinderError cmdline_extras = check_defaults(cmdline_extras) # we can save memory on gappy alignments like this #if str(model).count('LG4')==0: # cmdline_extras = ' '.join([cmdline_extras, '-U ']) #raxml doesn't append alignment names automatically, like PhyML, let's do that here analysis_ID = raxml_analysis_ID(alignment_path, model) #force raxml to write to the dir with the alignment in it #-e 1.0 sets the precision to 1 lnL unit. This is all that's required here, and helps with speed. aln_dir, fname = os.path.split(alignment_path) command = " %s -s '%s' -t '%s' %s -n %s -w '%s' %s" % ( bl, alignment_path, tree_path, model_params, analysis_ID, os.path.abspath(aln_dir), cmdline_extras) run_raxml(command)
def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras): # Now we re-estimate branchlengths using a GTR+G model on the # (unpartitioned) dataset cmdline_extras = check_defaults(cmdline_extras) dir_path, fname = os.path.split(topology_path) tree_path = os.path.join(dir_path, 'topology_tree.phy') log.debug("Copying %s to %s", topology_path, tree_path) util.dupfile(topology_path, tree_path) os.remove(topology_path) # saves headaches later... if datatype == "DNA": log.info("Estimating GTR+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) elif datatype == "protein": log.info("Estimating LG+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) elif datatype == "morphology": # LOOK OUT: this relies on the assumption that we can only specify a single # model for morphology analyses... # choose a model for the data - necessary for RAxML to load the data properly model = models.get_model_commandline(the_config.models[0]) log.info("Estimating %s branch lengths on tree using RAxML", the_config.models[0]) command = "-f e -s %s -t %s %s -K MK -n BLTREE -w %s %s" % ( alignment_path, tree_path, model, os.path.abspath(dir_path), cmdline_extras) else: log.error("Unrecognised datatype: '%s'" % (datatype)) raise util.PartitionFinderError run_raxml(command) dir, aln = os.path.split(alignment_path) tree_path = os.path.join(dir, "RAxML_result.BLTREE") if not os.path.exists(tree_path): log.error( "RAxML tree topology should be here but can't be be found: '%s'" % (tree_path)) raise util.PartitionFinderError else: log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path) with open(tree_path, 'r') as fin: log.debug('%s', fin.read()) log.info("Branchlength estimation finished") # Now return the path of the final tree with branch lengths return tree_path
def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras): # Now we re-estimate branchlengths using a GTR+G model on the # (unpartitioned) dataset cmdline_extras = check_defaults(cmdline_extras) dir_path, fname = os.path.split(topology_path) tree_path = os.path.join(dir_path, 'topology_tree.phy') log.debug("Copying %s to %s", topology_path, tree_path) util.dupfile(topology_path, tree_path) os.remove(topology_path) # saves headaches later... if datatype == "DNA": log.info("Estimating GTR+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) elif datatype == "protein": log.info("Estimating LG+G branch lengths on tree using RAxML") command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % ( alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras) elif datatype == "morphology": # LOOK OUT: this relies on the assumption that we can only specify a single # model for morphology analyses... # choose a model for the data - necessary for RAxML to load the data properly model = models.get_model_commandline(the_config.models[0]) log.info("Estimating %s branch lengths on tree using RAxML", the_config.models[0]) command = "-f e -s '%s' -t '%s' %s -K MK -n BLTREE -w '%s' %s" % ( alignment_path, tree_path, model, os.path.abspath(dir_path), cmdline_extras) else: log.error("Unrecognised datatype: '%s'" % (datatype)) raise util.PartitionFinderError run_raxml(command) dir, aln = os.path.split(alignment_path) tree_path = os.path.join(dir, "RAxML_result.BLTREE") if not os.path.exists(tree_path): log.error("RAxML tree topology should be here but can't be be found: '%s'" % (tree_path)) raise util.PartitionFinderError else: log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path) with open(tree_path, 'r') as fin: log.debug('%s', fin.read()) log.info("Branchlength estimation finished") # Now return the path of the final tree with branch lengths return tree_path
def make_topology(alignment_path, datatype, cmdline_extras): '''Make a MP tree to start the analysis''' log.info("Making MP tree for %s", alignment_path) cmdline_extras = check_defaults(cmdline_extras) # First get the MP topology like this (-p is a hard-coded random number seed): if datatype == "DNA": command = "-y -s '%s' -m GTRGAMMA -n MPTREE -p 123456789 %s" % ( alignment_path, cmdline_extras) elif datatype == "protein": command = "-y -s '%s' -m PROTGAMMALG -n MPTREE -p 123456789 %s" % ( alignment_path, cmdline_extras) elif datatype == "morphology": # LOOK OUT: this relies on the assumption that we can only specify a single # model for morphology analyses... # choose a model for the data - necessary for RAxML to load the data properly model = models.get_model_commandline(the_config.models[0]) command = "-y -s %s %s -K MK -n MPTREE -p 123456789 %s" % ( alignment_path, model, cmdline_extras) else: log.error("Unrecognised datatype: '%s'" % (datatype)) raise util.PartitionFinderError # Force raxml to write to the dir with the alignment in it aln_dir, fname = os.path.split(alignment_path) command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)]) run_raxml(command) dir, aln = os.path.split(alignment_path) tree_path = os.path.join(dir, "RAxML_parsimonyTree.MPTREE") if not os.path.exists(tree_path): log.error( "RAxML tree topology should be here but can't be be found: '%s'" % (tree_path)) raise (RaxmlError) else: log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path) with open(tree_path, 'r') as fin: log.debug('%s', fin.read()) log.info("Topology estimation finished") return tree_path
def make_topology(alignment_path, datatype, cmdline_extras): '''Make a MP tree to start the analysis''' log.info("Making MP tree for %s", alignment_path) cmdline_extras = check_defaults(cmdline_extras) # First get the MP topology like this (-p is a hard-coded random number seed): if datatype == "DNA": command = "-y -s '%s' -m GTRGAMMA -n MPTREE -p 123456789 %s" % ( alignment_path, cmdline_extras) elif datatype == "protein": command = "-y -s '%s' -m PROTGAMMALG -n MPTREE -p 123456789 %s" % ( alignment_path, cmdline_extras) elif datatype == "morphology": # LOOK OUT: this relies on the assumption that we can only specify a single # model for morphology analyses... # choose a model for the data - necessary for RAxML to load the data properly model = models.get_model_commandline(the_config.models[0]) command = "-y -s '%s' %s -K MK -n MPTREE -p 123456789 %s" % ( alignment_path, model, cmdline_extras) else: log.error("Unrecognised datatype: '%s'" % (datatype)) raise util.PartitionFinderError # Force raxml to write to the dir with the alignment in it aln_dir, fname = os.path.split(alignment_path) command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)]) run_raxml(command) dir, aln = os.path.split(alignment_path) tree_path = os.path.join(dir, "RAxML_parsimonyTree.MPTREE") if not os.path.exists(tree_path): log.error("RAxML tree topology should be here but can't be be found: '%s'" % (tree_path)) raise(RaxmlError) else: log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path) with open(tree_path, 'r') as fin: log.debug('%s', fin.read()) log.info("Topology estimation finished") return tree_path
def make_ml_topology(alignment_path, datatype, cmdline_extras, scheme, cpus): '''Make a ML tree to from a given partitioning scheme''' log.info( "Estimating Maximum Likelihood tree with RAxML fast experimental tree search for %s", alignment_path) if (the_config.datatype != "morphology"): partition_file = write_partition_file(scheme, alignment_path) # First get the ML topology like this (-p is a hard-coded random number seed): # we do this to an accuracy of 10 log likelihood units with -e 10 # we use the rapid ML option in RAxML -f E if datatype == "DNA": log.info("Using a separate GTR+G model for each data block") command = " -f E -s '%s' -m GTRGAMMA -O -n fastTREE -# 1 -p 123456789 -q '%s' -e 10 " % ( alignment_path, partition_file) elif datatype == "protein": log.info("Using a separate LG+G model for each data block") command = " -f E -s '%s' -m PROTGAMMALG -O -n fastTREE -# 1 -p 123456789 -q '%s' -e 10 " % ( alignment_path, partition_file) elif datatype == "morphology": model = models.get_model_commandline(the_config.models[0]) log.info("Using the model specified in the .cfg file") command = "-f E -s %s %s -n fastTREE -p 123456789 %s" % ( alignment_path, model, cmdline_extras) else: log.error("Unrecognised datatype: '%s'" % (datatype)) raise util.PartitionFinderError # Force raxml to write to the dir with the alignment in it aln_dir, fname = os.path.split(alignment_path) command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)]) run_raxml_pthreads(command, cpus) alndir, aln = os.path.split(alignment_path) fast_tree_path = os.path.join(alndir, "RAxML_fastTree.fastTREE") # now we make the branch lengths with a partitioned model without rate multipliers if datatype == "DNA": log.info( "Estimating GTR+G branch lengths on ML tree using all partitions") command = "-f e -s '%s' -t '%s' -m GTRGAMMA -O -n BLTREE -p 123456789 -q '%s' -w '%s' -e 1 " % ( alignment_path, fast_tree_path, partition_file, os.path.abspath(alndir)) elif datatype == "protein": log.info( "Estimating LG+G branch lengths on ML tree using all partitions") command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -O -n BLTREE -p 123456789 -q '%s' -w '%s' -e 1 " % ( alignment_path, fast_tree_path, partition_file, os.path.abspath(alndir)) elif datatype == "morphology": log.info("Estimating branch lengths on ML tree") command = "-f e -s '%s' -t '%s' %s -O -n BLTREE -p 123456789 -w '%s' -e 1 " % ( alignment_path, fast_tree_path, model, os.path.abspath(alndir)) else: log.error("Unrecognised datatype: '%s'" % (datatype)) raise util.PartitionFinderError run_raxml_pthreads(command, cpus) tree_path = os.path.join(alndir, "RAxML_result.BLTREE") if not os.path.exists(tree_path): log.error( "RAxML tree topology should be here but can't be be found: '%s'" % (tree_path)) raise (util.PartitionFinderError) else: log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path) with open(tree_path, 'r') as fin: log.debug('%s', fin.read()) log.info("ML topology estimation finished") return tree_path
def make_ml_topology(alignment_path, datatype, cmdline_extras, scheme, cpus): '''Make a ML tree to from a given partitioning scheme''' log.info("Estimating Maximum Likelihood tree with RAxML fast experimental tree search for %s", alignment_path) if(the_config.datatype != "morphology"): partition_file = write_partition_file(scheme, alignment_path) # First get the ML topology like this (-p is a hard-coded random number seed): # we do this to an accuracy of 10 log likelihood units with -e 10 # we use the rapid ML option in RAxML -f E if datatype == "DNA": log.info("Using a separate GTR+G model for each data block") command = " -f E -s '%s' -m GTRGAMMA -O -n fastTREE -# 1 -p 123456789 -q '%s' -e 10 " % ( alignment_path, partition_file) elif datatype == "protein": log.info("Using a separate LG+G model for each data block") command = " -f E -s '%s' -m PROTGAMMALG -O -n fastTREE -# 1 -p 123456789 -q '%s' -e 10 " % ( alignment_path, partition_file) elif datatype == "morphology": model = models.get_model_commandline(the_config.models[0]) log.info("Using the model specified in the .cfg file") command = "-f E -s '%s' %s -n fastTREE -p 123456789 %s" % ( alignment_path, model, cmdline_extras) else: log.error("Unrecognised datatype: '%s'" % (datatype)) raise util.PartitionFinderError # Force raxml to write to the dir with the alignment in it aln_dir, fname = os.path.split(alignment_path) command = ''.join([command, " -w '%s'" % os.path.abspath(aln_dir)]) run_raxml_pthreads(command, cpus) alndir, aln = os.path.split(alignment_path) fast_tree_path = os.path.join(alndir, "RAxML_fastTree.fastTREE") # now we make the branch lengths with a partitioned model without rate multipliers if datatype == "DNA": log.info("Estimating GTR+G branch lengths on ML tree using all partitions") command = "-f e -s '%s' -t '%s' -m GTRGAMMA -O -n BLTREE -p 123456789 -q '%s' -w '%s' -e 1 " % ( alignment_path, fast_tree_path, partition_file, os.path.abspath(alndir)) elif datatype == "protein": log.info("Estimating LG+G branch lengths on ML tree using all partitions") command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -O -n BLTREE -p 123456789 -q '%s' -w '%s' -e 1 " % ( alignment_path, fast_tree_path, partition_file, os.path.abspath(alndir)) elif datatype == "morphology": log.info("Estimating branch lengths on ML tree") command = "-f e -s '%s' -t '%s' %s -O -n BLTREE -p 123456789 -w '%s' -e 1 " % ( alignment_path, fast_tree_path, model, os.path.abspath(alndir)) else: log.error("Unrecognised datatype: '%s'" % (datatype)) raise util.PartitionFinderError run_raxml_pthreads(command, cpus) tree_path = os.path.join(alndir, "RAxML_result.BLTREE") if not os.path.exists(tree_path): log.error("RAxML tree topology should be here but can't be be found: '%s'" % (tree_path)) raise(util.PartitionFinderError) else: log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path) with open(tree_path, 'r') as fin: log.debug('%s', fin.read()) log.info("ML topology estimation finished") return tree_path