Esempio n. 1
0
def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras):
    # Now we re-estimate branchlengths using a GTR+I+G model on the
    # (unpartitioned) dataset
    cmdline_extras = check_defaults(cmdline_extras)
    dir_path, fname = os.path.split(topology_path)
    tree_path = os.path.join(dir_path, 'topology_tree.phy')
    log.debug("Copying %s to %s", topology_path, tree_path)
    util.dupfile(topology_path, tree_path)

    if datatype == "DNA":
        log.info("Estimating GTR+I+G branch lengths on tree")
        command = "-i '%s' -u '%s' -m GTR -c 4 -a e -v e -o lr -b 0 %s" % (
            alignment_path, tree_path, cmdline_extras)
        run_phyml(command)
    if datatype == "protein":
        log.info("Estimating LG+F branch lengths on tree")
        command = "-i '%s' -u '%s' -m LG -c 1 -v 0 -f m -d aa -o lr -b 0 %s" % (
            alignment_path, tree_path, cmdline_extras)
        run_phyml(command)

    tree_path = make_tree_path(alignment_path)
    log.info("Branchlength estimation finished")

    # Now return the path of the final tree alignment
    return tree_path
Esempio n. 2
0
def make_branch_lengths(alignment_path, topology_path, datatype,
                        cmdline_extras):
    # Now we re-estimate branchlengths using a GTR+I+G model on the
    # (unpartitioned) dataset
    cmdline_extras = check_defaults(cmdline_extras)
    dir_path, fname = os.path.split(topology_path)
    tree_path = os.path.join(dir_path, 'topology_tree.phy')
    log.debug("Copying %s to %s", topology_path, tree_path)
    util.dupfile(topology_path, tree_path)

    if datatype == "DNA":
        log.info("Estimating GTR+I+G branch lengths on tree")
        command = "-i '%s' -u '%s' -m GTR -c 4 -a e -v e -o lr -b 0 %s" % (
            alignment_path, tree_path, cmdline_extras)
        run_phyml(command)
    if datatype == "protein":
        log.info("Estimating LG+F branch lengths on tree")
        command = "-i '%s' -u '%s' -m LG -c 1 -v 0 -f m -d aa -o lr -b 0 %s" % (
            alignment_path, tree_path, cmdline_extras)
        run_phyml(command)

    tree_path = make_tree_path(alignment_path)
    log.info("Branchlength estimation finished")

    # Now return the path of the final tree alignment
    return tree_path
Esempio n. 3
0
def make_branch_lengths(alignment_path, topology_path, datatype,
                        cmdline_extras):
    # Now we re-estimate branchlengths using a GTR+G model on the
    # (unpartitioned) dataset
    cmdline_extras = check_defaults(cmdline_extras)
    dir_path, fname = os.path.split(topology_path)
    tree_path = os.path.join(dir_path, 'topology_tree.phy')
    log.debug("Copying %s to %s", topology_path, tree_path)
    util.dupfile(topology_path, tree_path)
    os.remove(topology_path)  # saves headaches later...

    if datatype == "DNA":
        log.info("Estimating GTR+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s  " % (
            alignment_path, tree_path, os.path.abspath(dir_path),
            cmdline_extras)
    elif datatype == "protein":
        log.info("Estimating LG+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % (
            alignment_path, tree_path, os.path.abspath(dir_path),
            cmdline_extras)
    elif datatype == "morphology":
        # LOOK OUT: this relies on the assumption that we can only specify a single
        # model for morphology analyses...
        # choose a model for the data - necessary for RAxML to load the data properly
        model = models.get_model_commandline(the_config.models[0])
        log.info("Estimating %s branch lengths on tree using RAxML",
                 the_config.models[0])
        command = "-f e -s %s -t %s %s -K MK -n BLTREE -w %s %s" % (
            alignment_path, tree_path, model, os.path.abspath(dir_path),
            cmdline_extras)
    else:
        log.error("Unrecognised datatype: '%s'" % (datatype))
        raise util.PartitionFinderError

    run_raxml(command)

    dir, aln = os.path.split(alignment_path)
    tree_path = os.path.join(dir, "RAxML_result.BLTREE")

    if not os.path.exists(tree_path):
        log.error(
            "RAxML tree topology should be here but can't be be found: '%s'" %
            (tree_path))
        raise util.PartitionFinderError
    else:
        log.debug("RAxML tree with branch lengths ('%s') looks like this: ",
                  tree_path)
        with open(tree_path, 'r') as fin:
            log.debug('%s', fin.read())

    log.info("Branchlength estimation finished")

    # Now return the path of the final tree with branch lengths
    return tree_path
Esempio n. 4
0
def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras):
    # Now we re-estimate branchlengths using a GTR+G model on the
    # (unpartitioned) dataset
    cmdline_extras = check_defaults(cmdline_extras)
    dir_path, fname = os.path.split(topology_path)
    tree_path = os.path.join(dir_path, 'topology_tree.phy')
    log.debug("Copying %s to %s", topology_path, tree_path)
    util.dupfile(topology_path, tree_path)
    os.remove(topology_path)  # saves headaches later...

    if datatype == "DNA":
        log.info("Estimating GTR+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s  " % (
            alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras)
    elif datatype == "protein":
        log.info("Estimating LG+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % (
            alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras)
    elif datatype == "morphology":
        # LOOK OUT: this relies on the assumption that we can only specify a single
        # model for morphology analyses...
        # choose a model for the data - necessary for RAxML to load the data properly
        model = models.get_model_commandline(the_config.models[0])
        log.info("Estimating %s branch lengths on tree using RAxML", the_config.models[0])
        command = "-f e -s '%s' -t '%s' %s -K MK -n BLTREE -w '%s' %s" % (
                alignment_path, tree_path, model, os.path.abspath(dir_path), cmdline_extras)
    else:
        log.error("Unrecognised datatype: '%s'" % (datatype))
        raise util.PartitionFinderError

    run_raxml(command)


    dir, aln = os.path.split(alignment_path)
    tree_path = os.path.join(dir, "RAxML_result.BLTREE")

    if not os.path.exists(tree_path):
        log.error("RAxML tree topology should be here but can't be be found: '%s'" % (tree_path))
        raise util.PartitionFinderError
    else:
        log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path)
        with open(tree_path, 'r') as fin:
            log.debug('%s', fin.read())

    log.info("Branchlength estimation finished")

    # Now return the path of the final tree with branch lengths
    return tree_path
Esempio n. 5
0
def make_branch_lengths(alignment_path, topology_path, datatype,
                        cmdline_extras):
    # Now we re-estimate branchlengths using a GTR+G model on the
    # (unpartitioned) dataset
    cmdline_extras = check_defaults(cmdline_extras)
    dir_path, fname = os.path.split(topology_path)
    tree_path = os.path.join(dir_path, 'topology_tree.phy')
    log.debug("Copying %s to %s", topology_path, tree_path)
    util.dupfile(topology_path, tree_path)
    os.remove(topology_path)  # saves headaches later...

    if datatype == "DNA":
        log.info("Estimating GTR+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s  " % (
            alignment_path, tree_path, os.path.abspath(dir_path),
            cmdline_extras)
        run_raxml(command)
    if datatype == "protein":
        log.info("Estimating LG+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % (
            alignment_path, tree_path, os.path.abspath(dir_path),
            cmdline_extras)
        run_raxml(command)

    dir, aln = os.path.split(alignment_path)
    tree_path = os.path.join(dir, "RAxML_result.BLTREE")

    if not os.path.exists(tree_path):
        log.error(
            "RAxML tree topology should be here but can't be be found: '%s'" %
            (tree_path))
        raise (RaxmlError)
    else:
        log.debug("RAxML tree with branch lengths ('%s') looks like this: ",
                  tree_path)
        with open(tree_path, 'r') as fin:
            log.debug('%s', fin.read())

    log.info("Branchlength estimation finished")

    # Now return the path of the final tree with branch lengths
    return tree_path
Esempio n. 6
0
def make_branch_lengths(alignment_path, topology_path, datatype, cmdline_extras):
    # Now we re-estimate branchlengths using a GTR+G model on the
    # (unpartitioned) dataset
    cmdline_extras = check_defaults(cmdline_extras)
    dir_path, fname = os.path.split(topology_path)
    tree_path = os.path.join(dir_path, 'topology_tree.phy')
    log.debug("Copying %s to %s", topology_path, tree_path)
    util.dupfile(topology_path, tree_path)
    os.remove(topology_path)  # saves headaches later...

    if datatype == "DNA":
        log.info("Estimating GTR+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m GTRGAMMA -n BLTREE -w '%s' %s  " % (
            alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras)
        run_raxml(command)
    if datatype == "protein":
        log.info("Estimating LG+G branch lengths on tree using RAxML")
        command = "-f e -s '%s' -t '%s' -m PROTGAMMALG -n BLTREE -w '%s' %s " % (
            alignment_path, tree_path, os.path.abspath(dir_path), cmdline_extras)
        run_raxml(command)

    dir, aln = os.path.split(alignment_path)
    tree_path = os.path.join(dir, "RAxML_result.BLTREE")

    if not os.path.exists(tree_path):
        log.error("RAxML tree topology should be here but can't be be found: '%s'" % (tree_path))
        raise(RaxmlError)
    else:
        log.debug("RAxML tree with branch lengths ('%s') looks like this: ", tree_path)
        with open(tree_path, 'r') as fin:
            log.debug('%s', fin.read())

    log.info("Branchlength estimation finished")

    # Now return the path of the final tree with branch lengths
    return tree_path
Esempio n. 7
0
    def make_tree(self, user_path):
        # Begin by making a filtered alignment, containing ONLY those columns
        # that are defined in the subsets
        subset_with_everything = subset_ops.merge_subsets(
            the_config.user_subsets)
        self.filtered_alignment = SubsetAlignment(self.alignment,
                                                  subset_with_everything)
        self.filtered_alignment_path = os.path.join(the_config.start_tree_path,
                                                    'filtered_source.phy')
        self.filtered_alignment.write(self.filtered_alignment_path)

        # Check the full subset against the alignment
        subset_ops.check_against_alignment(subset_with_everything,
                                           self.alignment, the_config)

        # We start by copying the alignment
        self.alignment_path = os.path.join(the_config.start_tree_path,
                                           'source.phy')

        # Now check for the tree
        tree_path = the_config.processor.make_tree_path(
            self.filtered_alignment_path)

        if self.need_new_tree(tree_path):
            log.debug("Estimating new starting tree, no old tree found")

            # If we have a user tree, then use that, otherwise, create a topology
            util.clean_out_folder(the_config.start_tree_path,
                                  keep=["filtered_source.phy", "source.phy"])

            if user_path is not None and user_path != "":
                # Copy it into the start tree folder
                log.info("Using user supplied topology at %s" % user_path)
                topology_path = os.path.join(the_config.start_tree_path,
                                             'user_topology.phy')
                util.dupfile(user_path, topology_path)
                need_bl = True
            elif the_config.no_ml_tree == True:
                log.debug("didn't find tree at %s, making a new one" %
                          tree_path)
                topology_path = the_config.processor.make_topology(
                    self.filtered_alignment_path, the_config.datatype,
                    the_config.cmdline_extras)
                need_bl = True
            elif the_config.no_ml_tree == False:
                log.debug(
                    "didn't find tree at %s, making an ML tree with RAxML" %
                    tree_path)

                tree_scheme = scheme.create_scheme(
                    the_config, "tree_scheme",
                    range(len(the_config.user_subsets)))

                topology_path = raxml.make_ml_topology(
                    self.filtered_alignment_path, the_config.datatype,
                    the_config.cmdline_extras, tree_scheme, self.threads)

                # here we copy the ML tree topology so it can be used with PhyML too
                # TODO: this is a hack, and it would be better to decide on a universal
                # name for the different types of tree we might have.
                phyml_tree = os.path.join(
                    os.path.dirname(topology_path),
                    "filtered_source.phy_phyml_tree.txt")
                copyfile(topology_path, phyml_tree)

                need_bl = False

            if need_bl == True:
                # Now estimate branch lengths
                tree_path = the_config.processor.make_branch_lengths(
                    self.filtered_alignment_path, topology_path,
                    the_config.datatype, the_config.cmdline_extras)

        self.tree_path = tree_path
        log.debug("Starting tree with branch lengths is here: %s" %
                  self.tree_path)
Esempio n. 8
0
    def make_tree(self, user_path):
        # Begin by making a filtered alignment, containing ONLY those columns
        # that are defined in the subsets
        subset_with_everything = subset_ops.merge_subsets(the_config.user_subsets)
        self.filtered_alignment = SubsetAlignment(
            self.alignment, subset_with_everything)
        self.filtered_alignment_path = os.path.join(
            the_config.start_tree_path,  'filtered_source.phy')
        self.filtered_alignment.write(self.filtered_alignment_path)

        # Check the full subset against the alignment
        subset_ops.check_against_alignment(subset_with_everything, self.alignment, the_config)

        # We start by copying the alignment
        self.alignment_path = os.path.join(
            the_config.start_tree_path, 'source.phy')

        # Now check for the tree
        tree_path = the_config.processor.make_tree_path(
            self.filtered_alignment_path)

        if self.need_new_tree(tree_path):
            log.debug("Estimating new starting tree, no old tree found")

            # If we have a user tree, then use that, otherwise, create a topology
            util.clean_out_folder(the_config.start_tree_path,
                                  keep=["filtered_source.phy", "source.phy"])

            if user_path is not None and user_path != "":
                # Copy it into the start tree folder
                log.info("Using user supplied topology at %s" % user_path)
                topology_path = os.path.join(the_config.start_tree_path, 'user_topology.phy')
                util.dupfile(user_path, topology_path)
                need_bl = True
            elif the_config.no_ml_tree == True:
                log.debug(
                    "didn't find tree at %s, making a new one" % tree_path)
                topology_path = the_config.processor.make_topology(
                    self.filtered_alignment_path, the_config.datatype, the_config.cmdline_extras)
                need_bl = True
            elif the_config.no_ml_tree == False:
                log.debug(
                    "didn't find tree at %s, making an ML tree with RAxML" % tree_path)

                tree_scheme = scheme.create_scheme(
                    the_config, "tree_scheme", range(len(the_config.user_subsets)))

                topology_path = raxml.make_ml_topology(
                    self.filtered_alignment_path, the_config.datatype, the_config.cmdline_extras, tree_scheme, self.threads)
                
                # here we copy the ML tree topology so it can be used with PhyML too
                # TODO: this is a hack, and it would be better to decide on a universal
                # name for the different types of tree we might have.
                phyml_tree = os.path.join(os.path.dirname(topology_path), "filtered_source.phy_phyml_tree.txt")
                copyfile(topology_path, phyml_tree)

                need_bl = False

            if need_bl == True:
                # Now estimate branch lengths
                tree_path = the_config.processor.make_branch_lengths(
                    self.filtered_alignment_path,
                    topology_path,
                    the_config.datatype,
                    the_config.cmdline_extras)

        self.tree_path = tree_path
        log.debug("Starting tree with branch lengths is here: %s" %
                 self.tree_path)