def seqgen_to_file(files, seqgen_vals): s = seqgen.SeqGen() s.scale_branch_lens = 0.1 for k, v in seqgen_vals.items(): seqgen_vals[k] = seqgen.SeqGen(v) for file in files: schema = file.split('.')[1] trees = dendropy.Tree.get(path=file, schema=schema) filename = "seq_{}".format(file.split('.')[0] + "." + schema) d1 = s.generate(trees) with open(filename, "w") as f: f.write(d1.char_matrices[0].as_string(schema))
def generate_sequences(self, species_name, samples_per_pop=10, seq_len=2000, use_seq_gen=True): self.generate_pop_tree(species_name=species_name, samples_per_pop=samples_per_pop) self.generate_gene_tree(species_name=species_name, samples_per_pop=samples_per_pop) d = dendropy.DataSet(self.mutation_tree.taxon_namespace) if self.use_seq_gen is True: sg = seqgen.SeqGen() sg.seqgen_path = self.seqgen_path sg.num_replicates = 1 sg.quiet = True sg.rng = self.rng sg.seq_len = seq_len sg.char_model = 'HKY' sg.ti_tv = float(self.kappa) / 2 sg.state_freqs = self.base_freqs sg.trees = [self.mutation_tree] d = sg.generate_dataset(dataset=d) else: char_matrix = discrete.hky85_chars( seq_len=seq_len, tree_model=self.mutation_tree, mutation_rate=1.0, kappa=1.0, base_freqs=[0.25, 0.25, 0.25, 0.25], root_states=None, rng=self.rng) d.add_char_matrix(char_matrix) return d
def simulate_gtr_matrix(tree, seq_length, frequencies, rates, branch_scale): s = seqgen.SeqGen() s.char_model = seqgen.SeqGen.GTR s.state_freqs = frequencies s.general_rates = rates s.scale_branch_lens = branch_scale s.seq_len = seq_length d = s.generate(tree) fasta_string = d.char_matrices[0].as_string('fasta') return fasta_string
#! /usr/bin/env python # -*- coding: utf-8 -*- import dendropy from dendropy.interop import seqgen trees = dendropy.TreeList.get(path="pythonidae.mcmc.nex", schema="nexus") s = seqgen.SeqGen() # generate one alignment per tree # as substitution model is not specified, defaults to a JC model # will result in a DataSet object with one DnaCharacterMatrix per input tree d0 = s.generate(trees) print(len(d0.char_matrices)) print(d0.char_matrices[0].as_string("nexus")) # instruct Seq-Gen to scale branch lengths by factor of 0.1 # note that this does not modify the input trees s.scale_branch_lens = 0.1 # more complex model s.char_model = seqgen.SeqGen.GTR s.state_freqs = [0.4, 0.4, 0.1, 0.1] s.general_rates = [0.8, 0.4, 0.4, 0.2, 0.2, 0.1] d1 = s.generate(trees) print(len(d0.char_matrices)) print(d0.char_matrices[0].as_string("nexus"))
def main(): parser = argparse.ArgumentParser() parser.add_argument("source_trees", metavar="SOURCE_TREEFILE [SOURCE_TREEFILE [SOURCE_TREEFILE]]", nargs="+", help="Path to containing tree files. Specify '-' to read from standard input.") parser.add_argument("-f", "--input-format", default="nexus", dest="schema", help="Input trees format (default: $(default)s).") parser.add_argument("-z", "--random-seed", type=int, default=None, help="Seed for random number generator engine.") parser.add_argument("-t", "--title", default="bpprun", help="Run title (default: '%(default)s')") data_options = parser.add_argument_group("Data Options") data_options.add_argument("--population-size", type=int, default=1.0, help="Population size (default: %(default)s).") data_options.add_argument("--num-individuals-per-population", type=int, default=4, help="Number of individuals sampled per incipient species lineage (default: %(default)s).") data_options.add_argument("--num-loci-per-individual", type=int, default=10, help="Number of loci sampled per individual (default: %(default)s).") data_options.add_argument("--num-characters-per-locus", type=int, default=1000, help="Number of characters sampled per locus (default: %(default)s).") data_options.add_argument("--mutation-rate-per-site", type=float, # default=0.00001, default=1e-8, help="Per-site mutation rate (default: %(default)s).") parser.add_argument("--no-scale-tree-by-mutation-rate", action="store_true", help="Do not scale tree by mutation rate.") args = parser.parse_args() if args.random_seed is None: random_seed = random.randint(0, sys.maxsize-1) else: random_seed = args.random_seed rng = random.Random(random_seed) _log("Random seed: {}".format(random_seed)) sg = seqgen.SeqGen() sg.seq_len = args.num_characters_per_locus sg.scale_branch_lens = args.mutation_rate_per_site if "-" in args.source_trees: filepaths = sys.stdin.read().split("\n") args.source_trees.remove("-") else: filepaths = [] manifest_entries = [] filepaths.extend(args.source_trees) for idx, filepath in enumerate(filepaths): job_title = "{}_{:05d}".format(args.title, idx+1) manifest_entry = collections.OrderedDict() _log("{} of {}: {}: {}".format(idx+1, len(filepaths), job_title, filepath)) source_tree = dendropy.Tree.get( path=filepath, schema=args.schema, extract_comment_metadata=True, preserve_underscores=True, ) manifest_entry["speciation_initiation_from_orthospecies_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_initiation_from_orthospecies_rate"].value) manifest_entry["speciation_initiation_from_incipient_species_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_initiation_from_incipient_species_rate"].value) manifest_entry["speciation_completion_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_completion_rate"].value) manifest_entry["orthospecies_extinction_rate"] = try_to_coerce_to_float(source_tree.annotations["orthospecies_extinction_rate"].value) manifest_entry["incipient_species_extinction_rate"] = try_to_coerce_to_float(source_tree.annotations["incipient_species_extinction_rate"].value) manifest_entry["max_time"] = try_to_coerce_to_float(source_tree.annotations["max_time"].value) manifest_entry["max_extant_orthospecies"] = try_to_coerce_to_float(source_tree.annotations["max_extant_orthospecies"].value) manifest_entry["num_extant_lineages"] = try_to_coerce_to_float(source_tree.annotations["num_extant_lineages"].value) manifest_entry["num_extant_orthospecies"] = try_to_coerce_to_float(source_tree.annotations["num_extant_orthospecies"].value) manifest_entry["source_tree_type"] = source_tree.annotations["tree_type"].value manifest_entry["population_size"] = args.population_size manifest_entry["num_individuals_per_population"] = args.num_individuals_per_population manifest_entry["num_loci_per_individual"] = args.num_loci_per_individual manifest_entry["mutation_rate_per_site"] = args.mutation_rate_per_site source_tree.calc_node_ages() gene_trees = generate_contained_trees( containing_tree=source_tree, num_individuals_per_population=args.num_individuals_per_population, num_gene_trees=args.num_loci_per_individual, population_size=args.population_size, rng=rng, ) imap_filepath = "{}.input.imap.txt".format(job_title) f = open(imap_filepath, "w") for taxon in gene_trees.taxon_namespace: f.write("{} {}\n".format(taxon.label.split("^")[1], taxon.population_label)) # f.write("{} {}\n".format(taxon.label.split("^")[0], taxon.population_label)) # f.write("{} {}\n".format(taxon.label, taxon.population_label)) f.write("\n") d0 = sg.generate(gene_trees) chars_filepath = "{}.input.chars.txt".format(job_title) f = open(chars_filepath, "w") for cm_idx, cm in enumerate(d0.char_matrices): sys.stderr.write("Locus {}: pi = {}, Tajima's D = {}\n".format( cm_idx+1, popgenstat.nucleotide_diversity(cm), popgenstat.tajimas_d(cm))) cm.write(file=f, schema="phylip") f.write("\n") out_filepath = "{}.results.out.txt".format(job_title) mcmc_filepath = "{}.results.mcmc.txt".format(job_title) num_species = len(source_tree.taxon_namespace) species_labels = " ".join(t.label for t in source_tree.taxon_namespace) num_individuals_per_species = " ".join(str(args.num_individuals_per_population) for i in range(len(source_tree.taxon_namespace))) # Inverse Gamma Prior # IG(a,b), with mean given by b/(a-1) # So, # thetaprior 3 0.002 # has a mean of # 0.002/(3-1) = 0.001 theta_prior_mean = args.population_size * 4 * args.mutation_rate_per_site theta_prior_a = 3.0 theta_prior_b = theta_prior_mean * (theta_prior_a - 1) if args.no_scale_tree_by_mutation_rate: tau_prior_mean = source_tree.seed_node.age else: # tau_prior_mean = source_tree.seed_node.age * args.population_size * 4 * args.mutation_rate_per_site tau_prior_mean = source_tree.seed_node.age * args.mutation_rate_per_site * (1.0 / (args.num_loci_per_individual * args.num_characters_per_locus)) # tau_prior_mean = source_tree.seed_node.age / 100000 tau_prior_a = 3.0 tau_prior_b = tau_prior_mean * (tau_prior_a - 1) manifest_entry["num_input_lineages"] = len(species_labels) manifest_entry["theta"] = theta_prior_mean manifest_entry["theta_prior_a"] = theta_prior_a manifest_entry["theta_prior_b"] = theta_prior_b manifest_entry["root_age"] = source_tree.seed_node.age manifest_entry["tau_prior_a"] = tau_prior_a manifest_entry["tau_prior_b"] = tau_prior_b species_tree = source_tree.as_string( schema="newick", suppress_leaf_taxon_labels=False, suppress_leaf_node_labels=True, suppress_internal_taxon_labels=True, suppress_internal_node_labels=True, suppress_rooting=True, suppress_edge_lengths=True, unquoted_underscores=True, preserve_spaces=True, store_tree_weights=False, suppress_annotations=True, suppress_item_comments=True, ) bpp_config = BPP_TEMPLATE.format( chars_filepath=chars_filepath, imap_filepath=imap_filepath, out_filepath=out_filepath, mcmc_filepath=mcmc_filepath, num_species=num_species, species_labels=species_labels, num_individuals_per_species=num_individuals_per_species, species_tree=species_tree, theta_prior_mean=theta_prior_mean, theta_prior_a=theta_prior_a, theta_prior_b=theta_prior_b, tau_prior_mean=tau_prior_mean, tau_prior_a=tau_prior_a, tau_prior_b=tau_prior_b, num_loci=args.num_loci_per_individual, root_age=source_tree.seed_node.age ) bpp_ctl_filepath = "{}.input.bpp.ctl".format(job_title) f = open(bpp_ctl_filepath, "w") f.write(bpp_config) f.write("\n") jobf = open("{}.job.sge".format(job_title), "w") jobf.write("#! /bin/bash\n") jobf.write("#$ -cwd\n") jobf.write("#$ -V\n") jobf.write("#$ -S /bin/bash\n") jobf.write("#$ -l h_vmem=12G\n") jobf.write("#$ -l virtual_free=12G\n") jobf.write("bpp --cfile {}\n".format(bpp_ctl_filepath)) manifest_entry["source_tree_path"] = filepath manifest_entry["results_filepath"] = out_filepath manifest_entry["mcmc_filepath"] = mcmc_filepath manifest_entries.append(manifest_entry) out = _open_output_file_for_csv_writer( filepath="{}_manifest.csv".format(args.title), append=False) with out: writer = csv.DictWriter( out, fieldnames=manifest_entries[0].keys(), restval="NA", delimiter=",", lineterminator=os.linesep, ) writer.writeheader() writer.writerows(manifest_entries)
def main(): parser = argparse.ArgumentParser() parameter_options = parser.add_argument_group("Model Parameters") parameter_options.add_argument("--b1", "--speciation_initiation_from_orthospecies_rate", type=float, dest="speciation_initiation_from_orthospecies_rate", default=1.0, help="Rate at which orthospecies give rise to new incipient species [default: %(default)s].") parameter_options.add_argument("--b2", "--speciation_initiation_from_incipient_species_rate", type=float, dest="speciation_initiation_from_incipient_species_rate", default=1.0, help="Rate at which incipient species give rise to new incipient species [default: %(default)s].") parameter_options.add_argument("--c1", "--speciation-completion-rate", type=float, dest="speciation_completion_rate", default=1.0, help="Rate at which incipient species become orthospecies [default: %(default)s].") parameter_options.add_argument("--e1", "--orthospecies-extinction-rate", type=float, dest="orthospecies_extinction_rate", default=1.0, help="Rate at which orthospecies go extinct [default: %(default)s].") parameter_options.add_argument("--e2", "--incipient-species-extinction-rate", type=float, dest="incipient_species_extinction_rate", default=1.0, help="Rate at which incipient species go extinct [default: %(default)s].") termination_options = parser.add_argument_group("Simulation Termination Conditions") termination_options.add_argument("--max-time", type=float, default=None, help="Maximum length of time to to run (default: %(default)s).") termination_options.add_argument("--max-extant-orthospecies", type=int, default=None, help="Maximum number of orthospecies to generate (default: %(default)s).") termination_options.add_argument("--max-extant-lineages", type=int, default=None, help="Maximum number of lineages to generate (default: %(default)s).") data_options = parser.add_argument_group("Data Options") data_options.add_argument("--population-size", type=int, default=10000, help="Population size (default: %(default)s).") data_options.add_argument("--num-individuals-per-population", type=int, default=4, help="Number of individuals sampled per incipient species lineage (default: %(default)s).") data_options.add_argument("--num-loci-per-individual", type=int, default=10, help="Number of loci sampled per individual (default: %(default)s).") data_options.add_argument("--num-characters-per-locus", type=int, default=10, help="Number of characters sampled per locus (default: %(default)s).") data_options.add_argument("--mutation-rate-per-site", type=float, default=0.00001, help="Per-site mutation rate (default: %(default)s).") run_options = parser.add_argument_group("Run Options") run_options.add_argument("-t", "--run-title", default="run", help="Run title (default: '%(default)s')") run_options.add_argument("-n", "--nreps", type=int, default=10, help="Number of replicates (default: %(default)s).") run_options.add_argument("-z", "--random-seed", type=int, default=None, help="Seed for random number generator engine.") args = parser.parse_args() if not args.max_time and not args.max_extant_orthospecies and not args.max_extant_lineages: sys.exit("Need to specify termination condition, at least one of: '--max-time', '--max-extant-orthospecies', '--max-extant-lineages'") if args.random_seed is None: random_seed = random.randint(0, sys.maxint-1) else: random_seed = args.random_seed _log("Random seed: {}".format(random_seed)) rng = random.Random(random_seed) psm = protractedspeciation.ProtractedSpeciationProcess( speciation_initiation_from_orthospecies_rate=args.speciation_initiation_from_orthospecies_rate, orthospecies_extinction_rate=args.orthospecies_extinction_rate, speciation_initiation_from_incipient_species_rate=args.speciation_initiation_from_incipient_species_rate, speciation_completion_rate=args.speciation_completion_rate, incipient_species_extinction_rate=args.incipient_species_extinction_rate, rng=rng,) sg = seqgen.SeqGen() sg.scale_branch_lengths = args.mutation_rate_per_site for rep in range(args.nreps): job_title = "{}_{:03d}".format(args.run_title, rep+1) _log("Replicate {} of {}: {}".format(rep+1, args.nreps, job_title)) lineage_tree, orthospecies_tree = psm.generate_sample( max_time=args.max_time, max_extant_orthospecies=args.max_extant_orthospecies, max_extant_lineages=args.max_extant_lineages, is_initial_lineage_orthospecies=True, # is_correlate_lineage_and_species_trees=True, ) # lineage_tree.calc_node_ages() # orthospecies_tree.calc_node_ages() # _log(" Incipient species tree: {} tips, root age = {} ({} mutation units)".format(len(lineage_tree.leaf_nodes()), lineage_tree.seed_node.age, lineage_tree.seed_node.age * args.mutation_rate_per_site,)) # _log(" Orthospecies tree: {} tips, root age = {} ({} mutation units)".format(len(orthospecies_tree.leaf_nodes()), orthospecies_tree.seed_node.age)) label_lineage_tree(lineage_tree) label_orthospecies_tree(orthospecies_tree) lineage_tree.write(path="x1.nexus", schema="nexus") orthospecies_tree.write(path="x2.nexus", schema="nexus") logf = open("{}.setup.log".format(job_title), "w") logf.write("- Replicate {} of {} generated by command: {}\n".format(rep+1, args.nreps, " ".join(sys.argv))) logf.write("\n") logf.write("- Random seed used: {}\n".format(random_seed)) logf.write("\n") describe_tree(logf, lineage_tree, "- Lineage Tree Profile") logf.write("\n") describe_tree(logf, orthospecies_tree, "- Orthospecies Tree Profile") logf.write("\n") logf.write("- Protracted Speciation Model Parameters\n") logf.write(" - Speciation initiation from orthospecies rate: {}\n".format(args.speciation_initiation_from_orthospecies_rate)) logf.write(" - Speciation initiation from incipient species rate: {}\n".format(args.speciation_initiation_from_incipient_species_rate)) logf.write(" - Speciation completion rate: {}\n".format(args.speciation_completion_rate)) logf.write(" - Orthospecies extinction rate: {}\n".format(args.orthospecies_extinction_rate)) logf.write(" - Incipient species extinction rate: {}\n".format(args.incipient_species_extinction_rate)) logf.write(" - Termination: Maximum simulation time: {}\n".format(args.max_time)) logf.write(" - Termination: Maximum orthospecies: {}\n".format(args.max_extant_orthospecies)) logf.write(" - Termination: Maximum lineages: {}\n".format(args.max_extant_lineages)) logf.write("\n") logf.write("- Data Generation Parameters\n") logf.write(" - Population size: {}\n".format(args.population_size)) logf.write(" - Individuals per species/population: {}\n".format(args.num_individuals_per_population)) logf.write(" - Number of loci per individual: {}\n".format(args.num_loci_per_individual)) logf.write(" - Per-site mutation rate: {}\n".format(args.mutation_rate_per_site)) logf.write("\n")
def main(): parser = argparse.ArgumentParser() parser.add_argument("source_trees", metavar="SOURCE_TREEFILE [SOURCE_TREEFILE [SOURCE_TREEFILE]]", nargs="+", help="Path to source of tree files. Specify '-' to read from standard input.") parser.add_argument("-f", "--input-format", default="nexus", dest="schema", help="Input trees format (default: $(default)s).") parser.add_argument("-z", "--random-seed", type=int, default=None, help="Seed for random number generator engine.") parser.add_argument("-t", "--title", default="bpprun", help="Run title (default: '%(default)s')") data_options = parser.add_argument_group("Data Options") data_options.add_argument("--population-size", type=int, default=10000, help="Population size (default: %(default)s).") data_options.add_argument("--total-number-of-individuals", type=int, default=200, help="Number of individuals sampled across all populations (default: %(default)s).") data_options.add_argument("--num-loci-per-individual", type=int, default=10, help="Number of loci sampled per individual (default: %(default)s).") data_options.add_argument("--num-characters-per-locus", type=int, default=1000, help="Number of characters sampled per locus (default: %(default)s).") data_options.add_argument("--mutation-rate-per-site", type=float, default=0.00001, help="Per-site mutation rate (default: %(default)s).") args = parser.parse_args() if args.random_seed is None: random_seed = random.randint(0, sys.maxint-1) else: random_seed = args.random_seed rng = random.Random(random_seed) s00.log("Random seed: {}".format(random_seed)) sg = seqgen.SeqGen() sg.scale_branch_lengths = args.mutation_rate_per_site if "-" in args.source_trees: filepaths = sys.stdin.read().split("\n") args.source_trees.remove("-") else: filepaths = [] manifest_entries = [] filepaths.extend(args.source_trees) for idx, filepath in enumerate(filepaths): job_title = "{}_{:05d}".format(args.title, idx+1) manifest_entry = collections.OrderedDict() s00.log("{} of {}: {}: {}".format(idx+1, len(filepaths), job_title, filepath)) try: source_tree = dendropy.Tree.get( path=filepath, schema=args.schema, extract_comment_metadata=True, preserve_underscores=True, ) except OSError, dendropy.DataError: s00.log("Skipping failed file: {}".format(filepath)) continue manifest_entry["speciation_initiation_from_orthospecies_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_initiation_from_orthospecies_rate"].value) manifest_entry["speciation_initiation_from_incipient_species_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_initiation_from_incipient_species_rate"].value) manifest_entry["speciation_completion_rate"] = try_to_coerce_to_float(source_tree.annotations["speciation_completion_rate"].value) manifest_entry["orthospecies_extinction_rate"] = try_to_coerce_to_float(source_tree.annotations["orthospecies_extinction_rate"].value) manifest_entry["incipient_species_extinction_rate"] = try_to_coerce_to_float(source_tree.annotations["incipient_species_extinction_rate"].value) manifest_entry["max_time"] = try_to_coerce_to_float(source_tree.annotations["max_time"].value) manifest_entry["max_extant_orthospecies"] = try_to_coerce_to_float(source_tree.annotations["max_extant_orthospecies"].value) manifest_entry["num_extant_lineages"] = try_to_coerce_to_float(source_tree.annotations["num_extant_lineages"].value) manifest_entry["num_extant_orthospecies"] = try_to_coerce_to_float(source_tree.annotations["num_extant_orthospecies"].value) manifest_entry["source_tree_type"] = source_tree.annotations["tree_type"].value manifest_entry["population_size"] = args.population_size manifest_entry["total_number_of_individuals"] = args.total_number_of_individuals manifest_entry["num_loci_per_individual"] = args.num_loci_per_individual manifest_entry["mutation_rate_per_site"] = args.mutation_rate_per_site source_tree.calc_node_ages() original_containing_tree_num_species = len(source_tree.taxon_namespace) original_containing_tree_species_labels = " ".join(t.label for t in source_tree.taxon_namespace) containing_tree, gene_trees = generate_contained_trees( containing_tree=source_tree, total_number_of_individuals=args.total_number_of_individuals, num_gene_trees=args.num_loci_per_individual, population_size=args.population_size, rng=rng, ) imap_filepath = "{}.input.imap.txt".format(job_title) f = open(imap_filepath, "w") for taxon in gene_trees.taxon_namespace: f.write("{} {}\n".format(taxon.label.split("^")[1], taxon.population_label)) f.write("\n//end of file") d0 = sg.generate(gene_trees) chars_filepath = "{}.input.chars.txt".format(job_title) f = open(chars_filepath, "w") for cm in d0.char_matrices: d0.write(file=f, schema="phylip") f.write("\n") out_filepath = "{}.results.out.txt".format(job_title) mcmc_filepath = "{}.results.mcmc.txt".format(job_title) final_containing_tree_num_species = len(containing_tree.taxon_namespace) # final_containing_tree_num_species = len(containing_tree.taxon_namespace) # final_containing_tree_species_labels = " ".join(t.label for t in containing_tree.taxon_namespace) # num_individuals_per_species = " ".join(str(args.num_individuals_per_population) for i in range(len(source_tree.taxon_namespace))) final_containing_tree_species_labels = [] final_containing_tree_num_individuals_per_species = [] for nd in containing_tree.leaf_node_iter(): final_containing_tree_species_labels.append(nd.taxon.label) final_containing_tree_num_individuals_per_species.append(nd.num_individuals_sampled) final_containing_tree_num_species = len(final_containing_tree_species_labels) final_containing_tree_species_labels = " ".join(final_containing_tree_species_labels) final_containing_tree_num_individuals_per_species = " ".join([str(i) for i in final_containing_tree_num_individuals_per_species]) theta_prior_mean = args.population_size * 4 * args.mutation_rate_per_site theta_prior_a = 2.0 theta_prior_b = theta_prior_a/theta_prior_mean tau_prior_mean = containing_tree.seed_node.age tau_prior_a = 2.0 tau_prior_b = tau_prior_a/tau_prior_mean manifest_entry["num_input_lineages"] = final_containing_tree_num_species manifest_entry["theta"] = theta_prior_mean manifest_entry["theta_prior_a"] = theta_prior_a manifest_entry["theta_prior_b"] = theta_prior_b manifest_entry["root_age"] = containing_tree.seed_node.age manifest_entry["tau_prior_a"] = tau_prior_a manifest_entry["tau_prior_b"] = tau_prior_b species_tree = containing_tree.as_string( schema="newick", suppress_leaf_taxon_labels=False, suppress_leaf_node_labels=True, suppress_internal_taxon_labels=True, suppress_internal_node_labels=True, suppress_rooting=True, suppress_edge_lengths=True, unquoted_underscores=True, preserve_spaces=True, store_tree_weights=False, suppress_annotations=True, suppress_item_comments=True, ) bpp_config = BPP_TEMPLATE.format( chars_filepath=chars_filepath, imap_filepath=imap_filepath, out_filepath=out_filepath, mcmc_filepath=mcmc_filepath, num_species=final_containing_tree_num_species, species_labels=final_containing_tree_species_labels, num_individuals_per_species=final_containing_tree_num_individuals_per_species, species_tree=species_tree, theta_prior_a=theta_prior_a, theta_prior_b=theta_prior_b, tau_prior_a=tau_prior_a, tau_prior_b=tau_prior_b, num_loci=args.num_loci_per_individual, ) bpp_ctl_filepath = "{}.input.bpp.ctl".format(job_title) f = open(bpp_ctl_filepath, "w") f.write(bpp_config) f.write("\n") jobf = open("{}.job.sge".format(job_title), "w") jobf.write("#! /bin/bash\n") jobf.write("#$ -cwd\n") jobf.write("#$ -V\n") jobf.write("#$ -S /bin/bash\n") jobf.write("#$ -l h_vmem=12G\n") jobf.write("#$ -l virtual_free=12G\n") jobf.write("bpp {}\n".format(bpp_ctl_filepath)) manifest_entry["source_tree_path"] = filepath manifest_entry["results_filepath"] = out_filepath manifest_entry["mcmc_filepath"] = mcmc_filepath manifest_entries.append(manifest_entry)
def main(): """ Main CLI handler. """ parser = argparse.ArgumentParser(description=__description__) parser.add_argument("--version", action="version", version="%(prog)s " + __version__) parser.add_argument("output_prefix") parser.add_argument( "-t", "--tree-files", action="append", type=str, metavar="TREEFILE", help="Path to tree files (default: read from standard input).") parser.add_argument("-f", "--input-format", type=str, default="newick", choices=["nexus", "newick"], help="Input data format (default='%(default)s')") parser.add_argument( "-n", "--num-characters-per-locus", type=int, default=1000, help="Number of characters sampled per locus (default: %(default)s).") parser.add_argument("--mutation-rate-per-site", type=float, default=0.00001, help="Per-site mutation rate (default: %(default)s).") parser.add_argument( "-s", "--scale-branch-lengths", action="store", type=float, default=1.0, help="Scale branch lengths by this factor [default=%(default)s].") parser.add_argument("--num-replicates", type=int, default=1, help="Number of replicates (default: %(default)s).") parser.add_argument("-F", "--output-format", type=str, default="bpp", choices=["bpp", "nexus", "phylip"], help="Input data format (default='%(default)s')") parser.add_argument( "--concatenate", action="store_true", default=False, help="Concatenate the alignments across all genealogies") parser.add_argument("-z", "--random-seed", type=int, default=None, help="Seed for random number generator engine.") args = parser.parse_args() if not args.tree_files: sys.exit("Please specify path(s) to genealogy tree file(s)") sg = seqgen.SeqGen() sg.seq_len = args.num_characters_per_locus sg.scale_branch_lens = args.mutation_rate_per_site gene_trees = dendropy.TreeList() for src_idx, src_path in enumerate(args.tree_files): if src_path == "-": src = sys.stdin else: src = open(src_path) try: src_id = src.name except AttributeError: src_id = "<stdin>" with src: data = [] gene_trees.read(file=src, schema=args.input_format, rooting="force-rooted") if args.output_format == "bpp": for t in gene_trees.taxon_namespace: t.label = "^{}".format(t.label) for rep_idx in range(args.num_replicates): d0 = sg.generate(gene_trees) chars_filepath = "{}.{:03d}.chars".format(args.output_prefix, rep_idx + 1) if args.output_format == "nexus": chars_filepath += ".nex" d0.write(path=chars_filepath, schema="nexus") elif args.output_format == "phylip": chars_filepath += ".phylip" d0.write(path=chars_filepath, schema="phylip") elif args.output_format == "bpp": chars_filepath += ".txt" f = open(chars_filepath, "w") for cm in d0.char_matrices: cm.write(file=f, schema="phylip") f.write("\n") else: raise NotImplementedError