def generate_sequences(self, species_name, samples_per_pop=10, seq_len=2000, use_seq_gen=True): self.generate_pop_tree(species_name=species_name, samples_per_pop=samples_per_pop) self.generate_gene_tree(species_name=species_name, samples_per_pop=samples_per_pop) d = dendropy.DataSet(self.mutation_tree.taxon_set) if SEQGEN and use_seq_gen: sg = seqgen.SeqGen() sg.seqgen_path = self.seqgen_path sg.num_replicates = 1 sg.quiet = True sg.rng = self.rng sg.seq_len = seq_len sg.char_model = 'HKY' sg.ti_tv = float(self.kappa) / 2 sg.state_freqs = self.base_freqs sg.trees = [self.mutation_tree] d = sg.generate_dataset(dataset=d) return d else: return seqsim.generate_hky_dataset(seq_len=seq_len, tree_model=self.mutation_tree, mutation_rate=1.0, kappa=1.0, base_freqs=[0.25, 0.25, 0.25, 0.25], root_states=None, dataset=d, rng=self.rng)
def estimate_params(self, seq_len=10000, kappa=1.0, base_freqs=[0.25, 0.25, 0.25, 0.25], unequal_base_freqs=True, gamma_rates=False, prop_invar=False): output_ds = seqsim.generate_hky_dataset(seq_len, tree_model=self.tree_model, kappa=kappa, base_freqs=base_freqs) self.tree_model.reindex_taxa(output_ds.char_matrices[0].taxon_set) est_tree, mle = paup.estimate_model(char_matrix=output_ds.char_matrices[0], tree_model=self.tree_model, num_states=2, unequal_base_freqs=unequal_base_freqs, gamma_rates=gamma_rates, prop_invar=prop_invar, tree_est_criterion="likelihood", tree_user_brlens=True, paup_path='paup') return mle