Example #1
0
def get_per_site_stats(alignment, cfg, a_subset):
    if cfg.kmeans == 'entropy':
        sub_align = SubsetAlignment(alignment, a_subset)
        return entropy.sitewise_entropies(sub_align)
    elif cfg.kmeans == 'fast_tiger':
        a_subset.make_alignment(cfg, alignment)
        phylip_file = a_subset.alignment_path
        return sitewise_tiger_rates(cfg, str(phylip_file))
    elif cfg.kmeans == 'tiger' and cfg.datatype == 'morphology':
        sub_align = SubsetAlignment(alignment, a_subset)
        set_parts = mt.create_set_parts(sub_align)
        rates = mt.calculate_rates(set_parts)
        return rates
    elif cfg.kmeans == 'tiger':
        sub_align = SubsetAlignment(alignment, a_subset)
        tiger = the_config.TigerDNA()
        tiger.build_bitsets(sub_align)
        rate_array = tiger.calc_rates()
        rate_array.shape = rate_array.shape[0], 1
        return rate_array

    else:  #wtf
        log.error(
            "Unkown option passed to 'kmeans'. Please check and try again")
        raise PartitionFinderError
Example #2
0
def get_per_site_stats(alignment, cfg, a_subset):
    if cfg.kmeans == 'entropy':
        sub_align = SubsetAlignment(alignment, a_subset)
        return entropy.sitewise_entropies(sub_align)
    elif cfg.kmeans == 'tiger' and cfg.datatype == 'morphology':
        sub_align = SubsetAlignment(alignment, a_subset)
        set_parts = mt.create_set_parts(sub_align)
        rates = mt.calculate_rates(set_parts)
        return rates
    else:  #wtf
        log.error(
            "Unkown option passed to 'kmeans'. Please check and try again")
        raise PartitionFinderError
Example #3
0
    def make_alignment(self, cfg, alignment):
        # Make an Alignment from the source, using this subset
        sub_alignment = SubsetAlignment(alignment, self)
        sub_path = os.path.join(cfg.phylofiles_path, self.name + '.phy')
        # Add it into the sub, so we keep it around
        self.alignment_path = sub_path

        # Maybe it is there already?
        if os.path.exists(sub_path):
            log.debug("Found existing alignment file %s", sub_path)
            old_align = Alignment()
            old_align.read(sub_path)

            # It had better be the same!
            if not old_align.same_as(sub_alignment):
                log.error(
                    "It looks like you have changed one or more of the "
                    "data_blocks in the configuration file, "
                    "so the new subset alignments "
                    "don't match the ones stored for this analysis. "
                    "You'll need to run the program with --force-restart")
                raise SubsetError
        else:
            # We need to write it
            sub_alignment.write(sub_path)
Example #4
0
    def make_tree(self, user_path):
        # Begin by making a filtered alignment, containing ONLY those columns
        # that are defined in the subsets
        subset_with_everything = subset.Subset(*list(self.cfg.partitions))
        self.filtered_alignment = SubsetAlignment(self.alignment,
                                                  subset_with_everything)
        self.filtered_alignment_path = os.path.join(self.cfg.start_tree_path,
                                                    'filtered_source.phy')
        self.filtered_alignment.write(self.filtered_alignment_path)

        # Now we've written this alignment, we need to lock everything in
        # place, no more adding partitions, or changing them from now on.
        self.cfg.partitions.check_against_alignment(self.alignment)
        self.cfg.partitions.finalise()

        # We start by copying the alignment
        self.alignment_path = os.path.join(self.cfg.start_tree_path,
                                           'source.phy')

        # Now check for the tree
        tree_path = self.cfg.processor.make_tree_path(
            self.filtered_alignment_path)

        if self.need_new_tree(tree_path) == True:
            log.debug("Estimating new starting tree, no old tree found")

            # If we have a user tree, then use that, otherwise, create a topology
            util.clean_out_folder(self.cfg.start_tree_path,
                                  keep=["filtered_source.phy", "source.phy"])

            if user_path is not None and user_path != "":
                # Copy it into the start tree folder
                log.info("Using user supplied topology at %s", user_path)
                topology_path = os.path.join(self.cfg.start_tree_path,
                                             'user_topology.phy')
                self.cfg.processor.dupfile(user_path, topology_path)
            else:
                log.debug("didn't find tree at %s, making a new one" %
                          tree_path)
                topology_path = self.cfg.processor.make_topology(
                    self.filtered_alignment_path, self.cfg.datatype,
                    self.cfg.cmdline_extras)

            # Now estimate branch lengths
            tree_path = self.cfg.processor.make_branch_lengths(
                self.filtered_alignment_path, topology_path, self.cfg.datatype,
                self.cfg.cmdline_extras)

        self.tree_path = tree_path
        log.info("Starting tree with branch lengths is here: %s",
                 self.tree_path)
Example #5
0
    def make_alignment(self, cfg, alignment):
        # Make an Alignment from the source, using this subset
        sub_alignment = SubsetAlignment(alignment, self)

        sub_path = os.path.join(cfg.phylofiles_path, self.subset_id + '.phy')
        # Add it into the sub, so we keep it around
        self.alignment_path = sub_path

        # Maybe it is there already?
        if os.path.exists(sub_path):
            log.debug("Found existing alignment file %s" % sub_path)
            old_align = Alignment()
            old_align.read(sub_path)

            # It had better be the same!
            if not old_align.same_as(sub_alignment):
                log.error(self.FORCE_RESTART_MESSAGE)
                raise SubsetError
        else:
            # We need to write it
            sub_alignment.write(sub_path)
Example #6
0
    def reassign_invariant_sites(self, subsets):

        #TODO add a skip:
        #if(len(subsets)==1):
        #   return(subsets)

        # get entropies for whole alignment for this subset
        onesub = subset_ops.merge_subsets(subsets)
        entropies = entropy.sitewise_entropies(
            SubsetAlignment(self.alignment, onesub))

        # find nearest site for each invariant site
        # replacements is a dict of: key: invariant col; value: replacement col,
        # e.g.
        # {512: 513, 514: 513, 515: 513, 516: 517}
        replacements = entropy.get_replacement_sites(entropies, onesub.columns)

        # now make a dict of the CURRENT subsets: key: site; value: subset
        sch_dict = {}
        for i, sub in enumerate(subsets):
            for site in sub.columns:
                sch_dict[site] = i

        # then reassign the sites as necessary based on replacements
        for r in replacements:
            sch_dict[r] = sch_dict[replacements[r]]

        # now build subsets according to the new sites
        sub_dict = {}  # this gives us the subsets to build
        for k, v in sch_dict.iteritems():
            sub_dict.setdefault(v, []).append(k)

        new_subsets = []
        for s in sub_dict:
            n = Subset(the_config, set(sub_dict[s]))
            new_subsets.append(n)

        return (new_subsets)
Example #7
0
    def make_tree(self, user_path):
        # Begin by making a filtered alignment, containing ONLY those columns
        # that are defined in the subsets
        subset_with_everything = subset_ops.merge_subsets(
            the_config.user_subsets)
        self.filtered_alignment = SubsetAlignment(self.alignment,
                                                  subset_with_everything)
        self.filtered_alignment_path = os.path.join(the_config.start_tree_path,
                                                    'filtered_source.phy')
        self.filtered_alignment.write(self.filtered_alignment_path)

        # Check the full subset against the alignment
        subset_ops.check_against_alignment(subset_with_everything,
                                           self.alignment, the_config)

        # We start by copying the alignment
        self.alignment_path = os.path.join(the_config.start_tree_path,
                                           'source.phy')

        # Now check for the tree
        tree_path = the_config.processor.make_tree_path(
            self.filtered_alignment_path)

        if self.need_new_tree(tree_path):
            log.debug("Estimating new starting tree, no old tree found")

            # If we have a user tree, then use that, otherwise, create a topology
            util.clean_out_folder(the_config.start_tree_path,
                                  keep=["filtered_source.phy", "source.phy"])

            if user_path is not None and user_path != "":
                # Copy it into the start tree folder
                log.info("Using user supplied topology at %s" % user_path)
                topology_path = os.path.join(the_config.start_tree_path,
                                             'user_topology.phy')
                util.dupfile(user_path, topology_path)
                need_bl = True
            elif the_config.no_ml_tree == True:
                log.debug("didn't find tree at %s, making a new one" %
                          tree_path)
                topology_path = the_config.processor.make_topology(
                    self.filtered_alignment_path, the_config.datatype,
                    the_config.cmdline_extras)
                need_bl = True
            elif the_config.no_ml_tree == False:
                log.debug(
                    "didn't find tree at %s, making an ML tree with RAxML" %
                    tree_path)

                tree_scheme = scheme.create_scheme(
                    the_config, "tree_scheme",
                    range(len(the_config.user_subsets)))

                topology_path = raxml.make_ml_topology(
                    self.filtered_alignment_path, the_config.datatype,
                    the_config.cmdline_extras, tree_scheme, self.threads)

                # here we copy the ML tree topology so it can be used with PhyML too
                # TODO: this is a hack, and it would be better to decide on a universal
                # name for the different types of tree we might have.
                phyml_tree = os.path.join(
                    os.path.dirname(topology_path),
                    "filtered_source.phy_phyml_tree.txt")
                copyfile(topology_path, phyml_tree)

                need_bl = False

            if need_bl == True:
                # Now estimate branch lengths
                tree_path = the_config.processor.make_branch_lengths(
                    self.filtered_alignment_path, topology_path,
                    the_config.datatype, the_config.cmdline_extras)

        self.tree_path = tree_path
        log.debug("Starting tree with branch lengths is here: %s" %
                  self.tree_path)