Esempio n. 1
0
def get_per_site_stats(alignment, cfg, a_subset):
    if cfg.kmeans == 'entropy':
        sub_align = SubsetAlignment(alignment, a_subset)
        return entropy.sitewise_entropies(sub_align)
    elif cfg.kmeans == 'fast_tiger':
        a_subset.make_alignment(cfg, alignment)
        phylip_file = a_subset.alignment_path
        return sitewise_tiger_rates(cfg, str(phylip_file))
    elif cfg.kmeans == 'tiger' and cfg.datatype == 'morphology':
        sub_align = SubsetAlignment(alignment, a_subset)
        set_parts = mt.create_set_parts(sub_align)
        rates = mt.calculate_rates(set_parts)
        return rates
    elif cfg.kmeans == 'tiger':
        sub_align = SubsetAlignment(alignment, a_subset)
        tiger = the_config.TigerDNA()
        tiger.build_bitsets(sub_align)
        rate_array = tiger.calc_rates()
        rate_array.shape = rate_array.shape[0], 1
        return rate_array

    else:  #wtf
        log.error(
            "Unkown option passed to 'kmeans'. Please check and try again")
        raise PartitionFinderError
Esempio n. 2
0
def get_per_site_stats(alignment, cfg, a_subset):
    if cfg.kmeans == 'entropy':
        sub_align = SubsetAlignment(alignment, a_subset)
        return entropy.sitewise_entropies(sub_align)
    elif cfg.kmeans == 'tiger' and cfg.datatype == 'morphology':
        sub_align = SubsetAlignment(alignment, a_subset)
        set_parts = mt.create_set_parts(sub_align)
        rates = mt.calculate_rates(set_parts)
        return rates
    else: #wtf
        log.error("Unkown option passed to 'kmeans'. Please check and try again")
        raise PartitionFinderError
Esempio n. 3
0
def get_per_site_stats(alignment, cfg, a_subset):
    if cfg.kmeans == 'entropy':
        sub_align = SubsetAlignment(alignment, a_subset)
        return entropy.sitewise_entropies(sub_align)
    elif cfg.kmeans == 'tiger' and cfg.datatype == 'morphology':
        sub_align = SubsetAlignment(alignment, a_subset)
        set_parts = mt.create_set_parts(sub_align)
        rates = mt.calculate_rates(set_parts)
        return rates
    else:  #wtf
        log.error(
            "Unkown option passed to 'kmeans'. Please check and try again")
        raise PartitionFinderError
Esempio n. 4
0
    def reassign_invariant_sites(self, subsets):

        #TODO add a skip:
        #if(len(subsets)==1):
        #   return(subsets)

        # get entropies for whole alignment for this subset
        onesub = subset_ops.merge_subsets(subsets)
        entropies = entropy.sitewise_entropies(
            SubsetAlignment(self.alignment, onesub))

        # find nearest site for each invariant site
        # replacements is a dict of: key: invariant col; value: replacement col,
        # e.g.
        # {512: 513, 514: 513, 515: 513, 516: 517}
        replacements = entropy.get_replacement_sites(entropies, onesub.columns)

        # now make a dict of the CURRENT subsets: key: site; value: subset
        sch_dict = {}
        for i, sub in enumerate(subsets):
            for site in sub.columns:
                sch_dict[site] = i

        # then reassign the sites as necessary based on replacements
        for r in replacements:
            sch_dict[r] = sch_dict[replacements[r]]

        # now build subsets according to the new sites
        sub_dict = {}  # this gives us the subsets to build
        for k, v in sch_dict.iteritems():
            sub_dict.setdefault(v, []).append(k)

        new_subsets = []
        for s in sub_dict:
            n = Subset(the_config, set(sub_dict[s]))
            new_subsets.append(n)

        return (new_subsets)
Esempio n. 5
0
    def reassign_invariant_sites(self, subsets):

        #TODO add a skip:
        #if(len(subsets)==1):
        #   return(subsets)

        # get entropies for whole alignment for this subset
        onesub = subset_ops.merge_subsets(subsets)
        entropies = entropy.sitewise_entropies(SubsetAlignment(self.alignment, onesub))

        # find nearest site for each invariant site
        # replacements is a dict of: key: invariant col; value: replacement col,
        # e.g.
        # {512: 513, 514: 513, 515: 513, 516: 517}
        replacements = entropy.get_replacement_sites(entropies, onesub.columns)

        # now make a dict of the CURRENT subsets: key: site; value: subset
        sch_dict = {}
        for i, sub in enumerate(subsets):
            for site in sub.columns:
                sch_dict[site] = i

        # then reassign the sites as necessary based on replacements
        for r in replacements:
            sch_dict[r] = sch_dict[replacements[r]]

        # now build subsets according to the new sites
        sub_dict = {} # this gives us the subsets to build
        for k, v in sch_dict.iteritems():
            sub_dict.setdefault(v, []).append(k)

        new_subsets = []
        for s in sub_dict:
            n = Subset(the_config, set(sub_dict[s]))
            new_subsets.append(n)

        return(new_subsets)