def get_per_site_stats(alignment, cfg, a_subset): if cfg.kmeans == 'entropy': sub_align = SubsetAlignment(alignment, a_subset) return entropy.sitewise_entropies(sub_align) elif cfg.kmeans == 'fast_tiger': a_subset.make_alignment(cfg, alignment) phylip_file = a_subset.alignment_path return sitewise_tiger_rates(cfg, str(phylip_file)) elif cfg.kmeans == 'tiger' and cfg.datatype == 'morphology': sub_align = SubsetAlignment(alignment, a_subset) set_parts = mt.create_set_parts(sub_align) rates = mt.calculate_rates(set_parts) return rates elif cfg.kmeans == 'tiger': sub_align = SubsetAlignment(alignment, a_subset) tiger = the_config.TigerDNA() tiger.build_bitsets(sub_align) rate_array = tiger.calc_rates() rate_array.shape = rate_array.shape[0], 1 return rate_array else: #wtf log.error( "Unkown option passed to 'kmeans'. Please check and try again") raise PartitionFinderError
def get_per_site_stats(alignment, cfg, a_subset): if cfg.kmeans == 'entropy': sub_align = SubsetAlignment(alignment, a_subset) return entropy.sitewise_entropies(sub_align) elif cfg.kmeans == 'tiger' and cfg.datatype == 'morphology': sub_align = SubsetAlignment(alignment, a_subset) set_parts = mt.create_set_parts(sub_align) rates = mt.calculate_rates(set_parts) return rates else: #wtf log.error("Unkown option passed to 'kmeans'. Please check and try again") raise PartitionFinderError
def get_per_site_stats(alignment, cfg, a_subset): if cfg.kmeans == 'entropy': sub_align = SubsetAlignment(alignment, a_subset) return entropy.sitewise_entropies(sub_align) elif cfg.kmeans == 'tiger' and cfg.datatype == 'morphology': sub_align = SubsetAlignment(alignment, a_subset) set_parts = mt.create_set_parts(sub_align) rates = mt.calculate_rates(set_parts) return rates else: #wtf log.error( "Unkown option passed to 'kmeans'. Please check and try again") raise PartitionFinderError
def reassign_invariant_sites(self, subsets): #TODO add a skip: #if(len(subsets)==1): # return(subsets) # get entropies for whole alignment for this subset onesub = subset_ops.merge_subsets(subsets) entropies = entropy.sitewise_entropies( SubsetAlignment(self.alignment, onesub)) # find nearest site for each invariant site # replacements is a dict of: key: invariant col; value: replacement col, # e.g. # {512: 513, 514: 513, 515: 513, 516: 517} replacements = entropy.get_replacement_sites(entropies, onesub.columns) # now make a dict of the CURRENT subsets: key: site; value: subset sch_dict = {} for i, sub in enumerate(subsets): for site in sub.columns: sch_dict[site] = i # then reassign the sites as necessary based on replacements for r in replacements: sch_dict[r] = sch_dict[replacements[r]] # now build subsets according to the new sites sub_dict = {} # this gives us the subsets to build for k, v in sch_dict.iteritems(): sub_dict.setdefault(v, []).append(k) new_subsets = [] for s in sub_dict: n = Subset(the_config, set(sub_dict[s])) new_subsets.append(n) return (new_subsets)
def reassign_invariant_sites(self, subsets): #TODO add a skip: #if(len(subsets)==1): # return(subsets) # get entropies for whole alignment for this subset onesub = subset_ops.merge_subsets(subsets) entropies = entropy.sitewise_entropies(SubsetAlignment(self.alignment, onesub)) # find nearest site for each invariant site # replacements is a dict of: key: invariant col; value: replacement col, # e.g. # {512: 513, 514: 513, 515: 513, 516: 517} replacements = entropy.get_replacement_sites(entropies, onesub.columns) # now make a dict of the CURRENT subsets: key: site; value: subset sch_dict = {} for i, sub in enumerate(subsets): for site in sub.columns: sch_dict[site] = i # then reassign the sites as necessary based on replacements for r in replacements: sch_dict[r] = sch_dict[replacements[r]] # now build subsets according to the new sites sub_dict = {} # this gives us the subsets to build for k, v in sch_dict.iteritems(): sub_dict.setdefault(v, []).append(k) new_subsets = [] for s in sub_dict: n = Subset(the_config, set(sub_dict[s])) new_subsets.append(n) return(new_subsets)